In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import mlflow
import mlflow.pytorch


#### Verificação de valores e criação de features

In [2]:
df_acao_bruto = pd.read_csv('base_historica\\AAPL_7anos.csv')
df_acao_bruto['Date'] = pd.to_datetime(df_acao_bruto['Date'])
df_acao_bruto.info()

df_acao = df_acao_bruto[['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]
df_acao['Weekday'] = df_acao_bruto['Date'].dt.weekday
df_acao['Month'] = df_acao_bruto['Date'].dt.month
df_acao['Year'] = df_acao_bruto['Date'].dt.year
df_acao['day_sin'] = np.sin(2 * np.pi * df_acao_bruto['Date'].dt.dayofyear / 365)
df_acao['day_cos'] = np.cos(2 * np.pi * df_acao_bruto['Date'].dt.dayofyear / 365)

df_acao.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1760 entries, 0 to 1759
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       1760 non-null   datetime64[ns]
 1   Open       1760 non-null   float64       
 2   High       1760 non-null   float64       
 3   Low        1760 non-null   float64       
 4   Close      1760 non-null   float64       
 5   Adj Close  1760 non-null   float64       
 6   Volume     1760 non-null   int64         
dtypes: datetime64[ns](1), float64(5), int64(1)
memory usage: 96.4 KB


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Weekday,Month,Year,day_sin,day_cos
0,2017-01-03,28.950001,29.0825,28.690001,29.0375,26.89196,115127600,1,1,2017,0.05162,0.998667
1,2017-01-04,28.9625,29.127501,28.9375,29.004999,26.861864,84472400,2,1,2017,0.068802,0.99763
2,2017-01-05,28.98,29.215,28.952499,29.1525,26.998463,88774400,3,1,2017,0.085965,0.996298
3,2017-01-06,29.195,29.540001,29.1175,29.477501,27.299456,127007600,4,1,2017,0.103102,0.994671
4,2017-01-09,29.487499,29.8575,29.485001,29.747499,27.549498,134247600,0,1,2017,0.154309,0.988023


#### Normalização dos dados 

In [3]:
cols_norm = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Weekday', 'Month', 'Year']

scaler = MinMaxScaler(feature_range=(-1, 1))
df_acao[cols_norm] = scaler.fit_transform(df_acao[cols_norm])

df_acao.head()


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Weekday,Month,Year,day_sin,day_cos
0,2017-01-03,-1.0,-1.0,-1.0,-0.999616,-0.999647,-0.570271,-0.5,-1.0,-1.0,0.05162,0.998667
1,2017-01-04,-0.999852,-0.999472,-0.997059,-1.0,-1.0,-0.714908,0.0,-1.0,-1.0,0.068802,0.99763
2,2017-01-05,-0.999645,-0.998446,-0.996881,-0.998256,-0.998396,-0.69461,0.5,-1.0,-1.0,0.085965,0.996298
3,2017-01-06,-0.997102,-0.994635,-0.99492,-0.994412,-0.99486,-0.514219,1.0,-1.0,-1.0,0.103102,0.994671
4,2017-01-09,-0.993642,-0.990911,-0.990553,-0.991218,-0.991924,-0.480059,-1.0,-1.0,-1.0,0.154309,0.988023


#### Salvar arquivo tratado

In [4]:
df_acao.to_csv(f"AAPL_7_years_data_norm.csv", index=False)

# Modelo 

In [5]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm1 = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, output_size)
        self.lstm2 = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        
        # Forward propagate LSTM
        out, _ = self.lstm1(x, (h0, c0))
        out = self.fc1(out[:, -1, :])
        out, _ = self.lstm2(x, (h0, c0))
        out = self.fc2(out[:, -1, :])
        return out



def evaluate_model2(model, criterion):
    model.eval()
    test_loss = 0.0
    with torch.no_grad():
        for sequences, labels in test_loader:
            sequences, labels = sequences.to(device), labels.to(device)
            outputs = model(sequences)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

    average_test_loss = test_loss / len(test_loader)
    print(f"Test Loss: {average_test_loss:.4f}")
    mlflow.log_metric("test_loss", average_test_loss)
    


def train_model():
    model = LSTM(input_size, hidden_size, num_layers, output_size).to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.MSELoss()
    mlflow.set_experiment("LSTM Artificial Data Regression")
    with mlflow.start_run():
        # Log model parameters
        mlflow.log_params({
        "input_size": input_size,
        "hidden_size": hidden_size,
        "num_layers": num_layers,
        "output_size": output_size,
        "sequence_length": sequence_length,
        "batch_size": batch_size,
        "learning_rate": learning_rate,
        "num_epochs": num_epochs
        })

        for epoch in range(num_epochs):
            model.train()
            running_loss = 0.0
            
            for i, (sequences, labels) in enumerate(train_loader):
                sequences, labels = sequences.to(device), labels.to(device)

                # Forward pass
                outputs = model(sequences)
                loss = criterion(outputs, labels)

                # Backward pass and optimization
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                
                # Log metrics every 100 batches
                if i % 100 == 0:
                    print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}")
                    mlflow.log_metric("train_loss", running_loss / (i+1), step=epoch * len(train_loader) + i)

        # Save the model
        example_input = torch.randn(1, sequence_length, input_size).to(device)
        example_input_np = example_input.cpu().numpy()
        mlflow.pytorch.log_model(model, "lstm_artificial_data_model", input_example=example_input_np)
        # evitar warning
    
    return model


In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

data = df_acao[['Open', 'High', 'Low', 'Close', 'Volume', 'Weekday', 'Month', 'Year', 'day_sin', 'day_cos']].values
targets = df_acao[['Close']].values

input_size = data.shape[1]  # Número de recursos
hidden_size = 50
num_layers = 2
output_size = 1
# num_epochs = 50 reduzido de 50 para 30. Já na época 30 a loss se manteve.
num_epochs = 30
batch_size = 64
learning_rate = 0.001
sequence_length = 20

pre_X, pre_y = [], []
for i in range(len(data) - sequence_length):
    pre_X.append(data[i:i+sequence_length])
    pre_y.append(targets[i+sequence_length])

X = torch.tensor(np.array(pre_X), dtype=torch.float32)
y = torch.tensor(np.array(pre_y), dtype=torch.float32)

train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2, shuffle=False)

train_dataset = TensorDataset(train_X, train_y)
test_dataset = TensorDataset(test_X, test_y)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

model = train_model()


Epoch [1/30], Step [1/22], Loss: 0.3203
Epoch [2/30], Step [1/22], Loss: 0.0391
Epoch [3/30], Step [1/22], Loss: 0.0055
Epoch [4/30], Step [1/22], Loss: 0.0020
Epoch [5/30], Step [1/22], Loss: 0.0028
Epoch [6/30], Step [1/22], Loss: 0.0013
Epoch [7/30], Step [1/22], Loss: 0.0024
Epoch [8/30], Step [1/22], Loss: 0.0017
Epoch [9/30], Step [1/22], Loss: 0.0017
Epoch [10/30], Step [1/22], Loss: 0.0017
Epoch [11/30], Step [1/22], Loss: 0.0016
Epoch [12/30], Step [1/22], Loss: 0.0024
Epoch [13/30], Step [1/22], Loss: 0.0010
Epoch [14/30], Step [1/22], Loss: 0.0010
Epoch [15/30], Step [1/22], Loss: 0.0010
Epoch [16/30], Step [1/22], Loss: 0.0004
Epoch [17/30], Step [1/22], Loss: 0.0012
Epoch [18/30], Step [1/22], Loss: 0.0007
Epoch [19/30], Step [1/22], Loss: 0.0011
Epoch [20/30], Step [1/22], Loss: 0.0012
Epoch [21/30], Step [1/22], Loss: 0.0010
Epoch [22/30], Step [1/22], Loss: 0.0008
Epoch [23/30], Step [1/22], Loss: 0.0014
Epoch [24/30], Step [1/22], Loss: 0.0014
Epoch [25/30], Step [1/22

#### Validação do modelo

In [7]:
evaluate_model2(model, nn.MSELoss())

Test Loss: 0.0054
