## imports

In [30]:
!pip install holidays==0.14.2
!pip install prophet==1.1



In [31]:
from prophet import Prophet

In [32]:
import pandas as pd

In [33]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split

## leitura dos dados

In [34]:
df = pd.read_csv('/content/ma_lga_12345.csv')
df

Unnamed: 0,saledate,MA,type,bedrooms
0,30/09/2007,441854,house,2
1,31/12/2007,441854,house,2
2,31/03/2008,441854,house,2
3,30/06/2008,441854,house,2
4,30/09/2008,451583,house,2
...,...,...,...,...
342,30/09/2018,603281,unit,3
343,31/12/2018,601167,unit,3
344,31/03/2019,605637,unit,3
345,30/06/2019,599339,unit,3


In [35]:
df['saledate'] = pd.to_datetime(df['saledate'], format='%d/%m/%Y')
df

Unnamed: 0,saledate,MA,type,bedrooms
0,2007-09-30,441854,house,2
1,2007-12-31,441854,house,2
2,2008-03-31,441854,house,2
3,2008-06-30,441854,house,2
4,2008-09-30,451583,house,2
...,...,...,...,...
342,2018-09-30,603281,unit,3
343,2018-12-31,601167,unit,3
344,2019-03-31,605637,unit,3
345,2019-06-30,599339,unit,3


In [36]:
df.type.unique()

array(['house', 'unit'], dtype=object)

In [37]:
df_encoded = pd.get_dummies(df, columns=['type'], drop_first=True)
df_encoded

Unnamed: 0,saledate,MA,bedrooms,type_unit
0,2007-09-30,441854,2,0
1,2007-12-31,441854,2,0
2,2008-03-31,441854,2,0
3,2008-06-30,441854,2,0
4,2008-09-30,451583,2,0
...,...,...,...,...
342,2018-09-30,603281,3,1
343,2018-12-31,601167,3,1
344,2019-03-31,605637,3,1
345,2019-06-30,599339,3,1


## predição com prophet

In [38]:
df_prophet = df_encoded[['saledate', 'MA', 'bedrooms', 'type_unit']].rename(columns={'saledate': 'ds', 'MA': 'y'})
df_prophet

Unnamed: 0,ds,y,bedrooms,type_unit
0,2007-09-30,441854,2,0
1,2007-12-31,441854,2,0
2,2008-03-31,441854,2,0
3,2008-06-30,441854,2,0
4,2008-09-30,451583,2,0
...,...,...,...,...
342,2018-09-30,603281,3,1
343,2018-12-31,601167,3,1
344,2019-03-31,605637,3,1
345,2019-06-30,599339,3,1


In [39]:
model = Prophet()
model.add_regressor('bedrooms')
model.add_regressor('type_unit')

<prophet.forecaster.Prophet at 0x7834adca2ce0>

In [40]:
model.fit(df_prophet)

INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpcl5x_odw/6sumq8wg.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpcl5x_odw/kzzs_zqg.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=75687', 'data', 'file=/tmp/tmpcl5x_odw/6sumq8wg.json', 'init=/tmp/tmpcl5x_odw/kzzs_zqg.json', 'output', 'file=/tmp/tmp2r0cnf2u/prophet_model-20241007141500.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
14:15:00 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
14:15:00 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


<prophet.forecaster.Prophet at 0x7834adca2ce0>

In [41]:
future_dates = model.make_future_dataframe(periods=5, freq='M')
future_dates['bedrooms'] = df_encoded['bedrooms'].iloc[-1]
future_dates['type_unit'] = df_encoded['type_unit'].iloc[-1]

In [42]:
forecast = model.predict(future_dates)

In [43]:
print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(10))

           ds           yhat     yhat_lower     yhat_upper
46 2018-09-30  631424.355978  571746.772447  685641.326579
47 2018-12-31  637834.457534  578217.579769  697103.273168
48 2019-03-31  635980.003901  576930.881006  694654.005360
49 2019-06-30  639563.395575  579606.191218  699036.812451
50 2019-09-30  642965.889201  588776.465622  699109.750360
51 2019-10-31  588909.522694  526684.653657  649324.046695
52 2019-11-30  601743.839491  544976.257798  661283.778264
53 2019-12-31  651807.145881  591291.990957  707717.738956
54 2020-01-31  611197.226769  554107.445172  668458.185675
55 2020-02-29  624800.180606  568342.174973  683742.655505


## predição com LSTM

In [44]:
features = ['MA', 'bedrooms', 'type_unit']
data = df_encoded[features].values

In [45]:
scaler = MinMaxScaler(feature_range=(0, 1))
data_scaled = scaler.fit_transform(data)

In [46]:
def create_sequences(data, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i + sequence_length, 1:])
        y.append(data[i + sequence_length, 0])
    return np.array(X), np.array(y)

In [47]:
sequence_length = 5
X, y = create_sequences(data_scaled, sequence_length)

In [48]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [49]:
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

  super().__init__(**kwargs)


In [50]:
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 105ms/step - loss: 0.1703 - val_loss: 0.0231
Epoch 2/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 0.0396 - val_loss: 0.0027
Epoch 3/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.0248 - val_loss: 0.0182
Epoch 4/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.0208 - val_loss: 0.0226
Epoch 5/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 0.0203 - val_loss: 0.0099
Epoch 6/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - loss: 0.0211 - val_loss: 0.0075
Epoch 7/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - loss: 0.0158 - val_loss: 0.0130
Epoch 8/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - loss: 0.0156 - val_loss: 0.0110
Epoch 9/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

<keras.src.callbacks.history.History at 0x7834affc2da0>

In [51]:
y_pred_scaled = model.predict(X_test)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 225ms/step


In [52]:
y_pred_combined = np.concatenate((y_pred_scaled, X_test[:, -1, 0].reshape(-1, 1), X_test[:, -1, 1].reshape(-1, 1)), axis=1)
y_pred = scaler.inverse_transform(y_pred_combined)[:, 0]

y_test_combined = np.concatenate((y_test.reshape(-1, 1), X_test[:, -1, 0].reshape(-1, 1), X_test[:, -1, 1].reshape(-1, 1)), axis=1)
y_test_original = scaler.inverse_transform(y_test_combined)[:, 0]

for i in range(5):
    print(f"valor predito: {y_pred[i]:.2f}, valor verdadeiro: {y_test_original[i]:.2f}")

valor predito: 411546.01, valor verdadeiro: 429832.00
valor predito: 411546.01, valor verdadeiro: 431567.00
valor predito: 411546.01, valor verdadeiro: 432730.00
valor predito: 411546.01, valor verdadeiro: 432791.00
valor predito: 411546.01, valor verdadeiro: 432801.00
