In [5]:
import pandas as pd 
import matplotlib.pyplot as plt
import random
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense
import numpy as np
import warnings

warnings.filterwarnings("ignore")
random.seed(30)

df = pd.read_csv("data/us_change.csv", index_col=0)
print(df.columns)  # Lista todas as colunas disponíveis
df

Index(['Quarter', 'Consumption', 'Income', 'Production', 'Savings',
       'Unemployment'],
      dtype='object')


Unnamed: 0,Quarter,Consumption,Income,Production,Savings,Unemployment
1,1970 Q1,0.618566,1.044801,-2.452486,5.299014,0.9
2,1970 Q2,0.451984,1.225647,-0.551459,7.789894,0.5
3,1970 Q3,0.872872,1.585154,-0.358652,7.403984,0.5
4,1970 Q4,-0.271848,-0.239545,-2.185691,1.169898,0.7
5,1971 Q1,1.901345,1.975925,1.909764,3.535667,-0.1
...,...,...,...,...,...,...
194,2018 Q2,0.983112,0.661825,1.117424,-2.723974,0.0
195,2018 Q3,0.853181,0.806271,1.256722,-0.085686,-0.3
196,2018 Q4,0.356512,0.695142,0.948148,5.031337,0.2
197,2019 Q1,0.282885,1.100753,-0.488206,9.760287,-0.1


In [6]:
def convert_to_period(quarter):
    """
    Function to convert a string to a pandas period object. 
    Checks if the input is a string in the form '2019 Q1' or already a Period.
    """
    if isinstance(quarter, pd.Period):
        return quarter
    year, quarter_str = quarter.split()
    year = int(year)
    quarter = int(quarter_str[-1])
    return pd.Period(year=year, quarter=quarter, freq='Q')

df['Quarter'] = df['Quarter'].apply(convert_to_period)


In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import r2_score

# Simulando o DataFrame com dados fictícios
np.random.seed(42)

# Normalizando os dados
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df.drop(columns=["Quarter"]))

# Criando a função para gerar as sequências para a LSTM
def create_sequences(data, sequence_length):
    sequences = []
    labels = []
    for i in range(len(data) - sequence_length):
        sequences.append(data[i:i + sequence_length, :-1])
        labels.append(data[i + sequence_length, 0])  # "Consumption" como target
    return np.array(sequences), np.array(labels)

# Definindo os parâmetros
sequence_length = 5  # Número de passos no histórico
X, y = create_sequences(scaled_data, sequence_length)

# Dividindo os dados em treino e teste
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Construindo o modelo LSTM
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    LSTM(50, return_sequences=False),
    Dense(25, activation="relu"),
    Dense(1)
])

# Compilando o modelo
model.compile(optimizer=Adam(learning_rate=0.001), loss="mean_squared_error")

# Treinando o modelo
history = model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test), verbose=1)

# Avaliando o modelo no conjunto de teste
loss = model.evaluate(X_test, y_test)
loss


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


0.004648719914257526

In [8]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

def mean_absolute_scaled_error(y_true, y_pred, y_train):
    naive_forecast_errors = np.abs(np.diff(y_train))  # Diferença do forecast ingênuo
    mae_naive = np.mean(naive_forecast_errors)  # MAE do modelo ingênuo
    mae_model = mean_absolute_error(y_true, y_pred)  # MAE do modelo
    return mae_model / mae_naive

y_pred = model.predict(X_test)

# aqui estamos revertendo a escala porque a utilizamos MinMaxScaler na LSTM
y_test_original = scaler.inverse_transform(np.hstack([np.zeros((y_test.shape[0], scaled_data.shape[1] - 1)), y_test.reshape(-1, 1)]))[:, -1]
y_pred_original = scaler.inverse_transform(np.hstack([np.zeros((y_pred.shape[0], scaled_data.shape[1] - 1)), y_pred]))[:, -1]


mae_original = mean_absolute_error(y_test_original, y_pred_original)
rmse_original = np.sqrt(mean_squared_error(y_test_original, y_pred_original))
mase_original = mean_absolute_scaled_error(y_test_original, y_pred_original, scaler.inverse_transform(np.hstack([np.zeros((y_train.shape[0], scaled_data.shape[1] - 1)), y_train.reshape(-1, 1)]))[:, -1])
r2 = r2_score(y_test_original, y_pred_original)

print(f"Mean Absolute Error (MAE) na escala original: {mae_original}")
print(f"Root Mean Squared Error (RMSE) na escala original: {rmse_original}")
print(f"Mean Absolute Scaled Error (MASE): {mase_original}")
print(f"R²: {r2}")

Mean Absolute Error (MAE) na escala original: 0.12019681245042772
Root Mean Squared Error (RMSE) na escala original: 0.15681750748948123
Mean Absolute Scaled Error (MASE): 0.3987874387362746
R²: 0.009399558478579206


Aplicando o modelo

Plotando os resultados: