# Importa√ß√£o das depend√™ncias

In [1]:
import os
import json
import platform
import shutil
import numpy as np
import pandas as pd
import torch
import glob
import joblib
from datetime import datetime
import pytorch_lightning as pl
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint, LearningRateMonitor

torch.__version__

'2.9.1'

In [2]:
torch.manual_seed(42)

<torch._C.Generator at 0x111bdc170>

# Configura√ß√£o Inicial

In [3]:
curr_ticker = "VALE3"

In [4]:
class Config:
    def __init__(self, ticker: str):
        # Carregar do JSON
        config_path = f'configs/{ticker}.json'
        with open(config_path, 'r') as f:
            data = json.load(f)
        
        # Atribuir todos os valores do JSON
        for key, value in data.items():
            setattr(self, key, value)
        
        # Valores calculados/din√¢micos/Fixos (n√£o v√£o no JSON)
        self.TICKER = ticker
        self.INPUT_SIZE = len(self.FEATURE_COLS)
        self.CHECKPOINT_DIR = "checkpoints"
        self.MIN_LOSS = np.inf
        self.NUM_WORKERS = 0 if platform.system() == "Darwin" else 2
        self.ACCELERATOR = torch.device(
            "cuda" if torch.cuda.is_available()
            else "mps" if torch.backends.mps.is_available()
            else "cpu"
        )

In [5]:
config = Config(curr_ticker)

print(f"‚úÖ Config carregado: {config.TICKER}")
print(f"   Features: {len(config.FEATURE_COLS)}")
print(f"   Sequence length: {config.SEQUENCE_LENGTH}")
print(f"   Batch size: {config.BATCH_SIZE}")
print(f"   Accelerator: {config.ACCELERATOR}")

‚úÖ Config carregado: VALE3
   Features: 23
   Sequence length: 70
   Batch size: 16
   Accelerator: mps


In [6]:
def drop_columns(df):
    return df.drop(columns=config.DROP_COLUMNS, errors='ignore')

In [7]:
pd.set_option('display.max_columns', None)
full_df = pd.read_parquet(f'data/train/{config.TICKER}.parquet')
full_df.shape

(1928, 30)

In [8]:
full_df = full_df.tail(config.DF_SIZE).reset_index(drop=True)

# Tratamento dos dados.

In [9]:
class SequenceDataset(Dataset):
    """
    Dataset com FEATURES E TARGET normalizados
    """

    def __init__(self, df, sequence_length, feature_cols, target_col, feature_scaler=None, target_scaler=None, fit_scalers=True):
        self.sequence_length = sequence_length
        self.feature_cols = feature_cols
        self.target_col = target_col

        # Normalizar FEATURES
        if fit_scalers:
            self.feature_scaler = MinMaxScaler()
            scaled_features = self.feature_scaler.fit_transform(df[feature_cols])
        else:
            self.feature_scaler = feature_scaler
            scaled_features = self.feature_scaler.transform(df[feature_cols])

        # Normalizar TARGET
        if fit_scalers:
            self.target_scaler = MinMaxScaler()
            scaled_targets = self.target_scaler.fit_transform(df[[target_col]])
        else:
            self.target_scaler = target_scaler
            scaled_targets = self.target_scaler.transform(df[[target_col]])

        self.features = scaled_features
        self.targets = scaled_targets.squeeze()

        # Criar sequ√™ncias
        self.X, self.y = self._create_sequences()

    def _create_sequences(self):
        X, y = [], []
        for i in range(self.sequence_length, len(self.features)):
            X.append(self.features[i - self.sequence_length:i])
            y.append(self.targets[i])
        return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx]), torch.tensor(self.y[idx])

    def get_scalers(self):
        return self.feature_scaler, self.target_scaler


print("Dataset criado com sucesso!")

Dataset criado com sucesso!


In [10]:
class StocksDataModule(pl.LightningDataModule):
    @staticmethod
    def drop_columns(df, columns):
        return df.drop(columns=columns, errors='ignore')

    def __init__(self, df, config):
        super().__init__()
        self.config = config
        self.df = self.drop_columns(df, self.config.DROP_COLUMNS)

        # Scalers a serem ajustados no setup
        self.feature_scaler = None
        self.target_scaler = None

        # Datasets
        self.train_dataset = None
        self.val_dataset = None
        self.test_dataset = None
    
    def setup(self, stage=None):
        config = self.config
        df = self.df

        print(f"\n{'='*60}")
        print(f"PREPARANDO DADOS")
        print(f"{'='*60}")
        
        # Calcular √≠ndices de split (temporal, n√£o aleat√≥rio!)
        n_samples = len(df) - config.SEQUENCE_LENGTH
        train_end = int(n_samples * config.TRAIN_RATIO)
        val_end = int(n_samples * (config.TRAIN_RATIO + config.VAL_RATIO))

        print(f"\nüìä Dataset original: {len(df)} registros")
        print(f"   Ap√≥s criar sequ√™ncias: {n_samples} samples")
        print(f"   Train: {train_end} samples ({config.TRAIN_RATIO*100:.0f}%)")
        print(f"   Val: {val_end - train_end} samples ({config.VAL_RATIO*100:.0f}%)")

        print(f"\nüìä Splits: Train={train_end}, Val={val_end-train_end}, Test={n_samples-val_end}")

        # Criar dataset de TREINO
        self.train_dataset = SequenceDataset(
            df=df,
            sequence_length=config.SEQUENCE_LENGTH,
            feature_cols=config.FEATURE_COLS,
            target_col=config.TARGET_COL,
            feature_scaler=None,
            target_scaler=None,
            fit_scalers=True
        )

        # Salvar os scalers
        self.feature_scaler, self.target_scaler = self.train_dataset.get_scalers()

        # Criar datasets de VAL e TEST (usam os mesmos scalers)
        self.val_dataset = SequenceDataset(
            df=df,
            sequence_length=config.SEQUENCE_LENGTH,
            feature_cols=config.FEATURE_COLS,
            target_col=config.TARGET_COL,
            feature_scaler=self.feature_scaler,
            target_scaler=self.target_scaler,
            fit_scalers=False
        )

        self.test_dataset = SequenceDataset(
            df=df,
            sequence_length=config.SEQUENCE_LENGTH,
            feature_cols=config.FEATURE_COLS,
            target_col=config.TARGET_COL,
            feature_scaler=self.feature_scaler,
            target_scaler=self.target_scaler,
            fit_scalers=False
        )

        # Split temporal
        self.train_dataset.X = self.train_dataset.X[:train_end]
        self.train_dataset.y = self.train_dataset.y[:train_end]
        
        self.val_dataset.X = self.val_dataset.X[train_end:val_end]
        self.val_dataset.y = self.val_dataset.y[train_end:val_end]
        
        self.test_dataset.X = self.test_dataset.X[val_end:]
        self.test_dataset.y = self.test_dataset.y[val_end:]
        
        print(f"‚úÖ Datasets prontos:")
        print(f"   Train: {len(self.train_dataset)}")
        print(f"   Val: {len(self.val_dataset)}")
        print(f"   Test: {len(self.test_dataset)}")
        

    def train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            batch_size=self.config.BATCH_SIZE,
            shuffle=True,
            num_workers=self.config.NUM_WORKERS
        )

    def val_dataloader(self):
        return DataLoader(
            self.val_dataset,
            batch_size=self.config.BATCH_SIZE,
            shuffle=False,
            num_workers=self.config.NUM_WORKERS
        )

    def test_dataloader(self):
        return DataLoader(
            self.test_dataset,
            batch_size=self.config.BATCH_SIZE,
            shuffle=False,
            num_workers=self.config.NUM_WORKERS
        )

# Constru√ß√£o da LSTM

In [11]:
class StocksLSTM(pl.LightningModule):
  def __init__(self, config):
    super().__init__()

    self.config = config

    self.lstm1 = nn.LSTM(input_size=len(config.FEATURE_COLS), hidden_size=config.INIT_HIDDEN_SIZE)
    self.lstm2 = nn.LSTM(config.INIT_HIDDEN_SIZE, config.SECOND_HIDDEN_SIZE)
    self.lstm3 = nn.LSTM(config.SECOND_HIDDEN_SIZE, config.SECOND_HIDDEN_SIZE, dropout=config.DROPOUT_VALUE, num_layers=config.NUM_LAYERS)
    self.dropout = nn.Dropout(p = config.DROPOUT_VALUE)
    if hasattr(config, 'USE_FC_LAYERS') and config.USE_FC_LAYERS:
      self.fc = nn.Sequential(
          nn.Linear(config.SECOND_HIDDEN_SIZE, config.FC_HIDDEN_1),
          nn.ReLU(),
          nn.Dropout(config.FC_DROPOUT),
          nn.Linear(config.FC_HIDDEN_1, config.FC_HIDDEN_2),
          nn.ReLU(),
          nn.Dropout(config.FC_DROPOUT),
          nn.Linear(config.FC_HIDDEN_2, 1)
      )
    else:
      # Manter original (1 camada)
      self.fc = nn.Linear(config.SECOND_HIDDEN_SIZE, 1)

  def forward(self, x):
    #(batch_size, timesteps) -> (timesteps, batch_size, dimensions)
    # x = x.permute(1, 0).unsqueeze(2)

    x = x.permute(1, 0, 2) # -> Mesma coisa da linha debaixo, de forma "contra√≠da"
    x, _ = self.lstm1(x)
    x = self.dropout(x)
    x, _ = self.lstm2(x)
    x = self.dropout(x)
    x, _ = self.lstm3(x)

    x = x[-1]
    x = self.dropout(x)
    x = self.fc(x)

    return x

  
  def training_step(self, batch, batch_idx):
    inputs, targets = batch

    outputs = self(inputs).flatten() # -> self(inputs) -> chama o forward
    loss = nn.functional.mse_loss(outputs, targets)

    self.log("train_loss", loss, prog_bar=True)
    return loss
  
  def validation_step(self, batch, batch_idx):
    inputs, targets = batch

    outputs = self(inputs).flatten()
    loss = nn.functional.mse_loss(outputs, targets)

    self.log("val_loss", loss, prog_bar=True)
    return loss
  
  def test_step(self, batch, batch_idx):
    inputs, targets = batch

    outputs = self(inputs).flatten()
    loss = nn.functional.mse_loss(outputs, targets)

    self.log("test_loss", loss, prog_bar=True)
    return loss
  
  def predict_step(self, batch, batch_idx):
    inputs, _ = batch
    return self(inputs).flatten()

  def configure_optimizers(self):
    optimizer = optim.Adam(
      self.parameters(),
      lr=self.config.LEARNING_RATE,
      weight_decay=self.config.WEIGHT_DECAY
    )

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
      optimizer,
      mode='min',
      factor = self.config.RLR_FACTOR,
      patience=self.config.RLR_PATIENCE
    )

    return {
      'optimizer': optimizer,
      'lr_scheduler': {
        'scheduler': scheduler,
        'monitor': 'val_loss' # M√©trica que o scheduler observa
      }
    }

# Treinamento

In [12]:
# Early Stopping - para se val_los n√£o melhorar
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=config.ES_PATIENCE,
    min_delta=config.ES_MIN_DELTA,
    mode='min',
    verbose=True
)

# Model Checkpoint - Salva o melhor modelo.
checkpoint = ModelCheckpoint(
    dirpath=config.CHECKPOINT_DIR,
    filename='best-{epoch:02d}-{val_loss:.4f}',
    monitor='val_loss',
    mode='min',
    save_top_k=1, # -> Salva o melhor modelo
    verbose=True
)

# LR Monitor - mostra o LR atual no progresso
lr_monitor = LearningRateMonitor(logging_interval='epoch')

print("‚úÖ Callbacks configurados!")

‚úÖ Callbacks configurados!


In [13]:
# Criar o Trainer
trainer = pl.Trainer(
    max_epochs=config.EPOCHS,
    accelerator='auto', # Detecta GPU/MPS/CPU automaticamente
    callbacks=[
        early_stopping,
        checkpoint,
        lr_monitor
    ],
    log_every_n_steps=config.LOG_EVERY_N_STEPS,
    gradient_clip_val=config.GRADIENT_CLIP_VAL,
)

print("‚úÖ Trainer configurado!")

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores


‚úÖ Trainer configurado!


In [14]:
model = StocksLSTM(config)
datamodule = StocksDataModule(df=full_df, config=config)

if os.path.exists(config.CHECKPOINT_DIR):
    shutil.rmtree(config.CHECKPOINT_DIR)
os.makedirs(config.CHECKPOINT_DIR, exist_ok=True)
print("‚úÖ Pasta de checkpoints limpa!")

# Treinar o modelo
trainer.fit(model, datamodule=datamodule)


  | Name    | Type       | Params | Mode  | FLOPs
-------------------------------------------------------
0 | lstm1   | LSTM       | 287 K  | train | 0    
1 | lstm2   | LSTM       | 197 K  | train | 0    
2 | lstm3   | LSTM       | 264 K  | train | 0    
3 | dropout | Dropout    | 0      | train | 0    
4 | fc      | Sequential | 10.4 K | train | 0    
-------------------------------------------------------
759 K     Trainable params
0         Non-trainable params
759 K     Total params
3.040     Total estimated model params size (MB)
12        Modules in train mode
0         Modules in eval mode
0         Total Flops


‚úÖ Pasta de checkpoints limpa!

PREPARANDO DADOS

üìä Dataset original: 1500 registros
   Ap√≥s criar sequ√™ncias: 1430 samples
   Train: 1072 samples (75%)
   Val: 215 samples (15%)

üìä Splits: Train=1072, Val=215, Test=143
‚úÖ Datasets prontos:
   Train: 1072
   Val: 215
   Test: 143
                                                                           

/Users/trebert/src/python/tech-challenge-04/notebooks/.venv/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/Users/trebert/src/python/tech-challenge-04/notebooks/.venv/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Epoch 0: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:02<00:00, 26.96it/s, v_num=15, train_loss=0.0545, val_loss=0.00477]

Metric val_loss improved. New best score: 0.005
Epoch 0, global step 67: 'val_loss' reached 0.00477 (best 0.00477), saving model to '/Users/trebert/src/python/tech-challenge-04/notebooks/checkpoints/best-epoch=00-val_loss=0.0048.ckpt' as top 1


Epoch 1: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 44.63it/s, v_num=15, train_loss=0.0633, val_loss=0.00488]

Epoch 1, global step 134: 'val_loss' was not in top 1


Epoch 2: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 43.81it/s, v_num=15, train_loss=0.011, val_loss=0.00834]  

Epoch 2, global step 201: 'val_loss' was not in top 1


Epoch 3: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 45.32it/s, v_num=15, train_loss=0.00557, val_loss=0.00552]

Epoch 3, global step 268: 'val_loss' was not in top 1


Epoch 4: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 45.21it/s, v_num=15, train_loss=0.0059, val_loss=0.00222] 

Metric val_loss improved by 0.003 >= min_delta = 0.0001. New best score: 0.002
Epoch 4, global step 335: 'val_loss' reached 0.00222 (best 0.00222), saving model to '/Users/trebert/src/python/tech-challenge-04/notebooks/checkpoints/best-epoch=04-val_loss=0.0022.ckpt' as top 1


Epoch 5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 41.98it/s, v_num=15, train_loss=0.0052, val_loss=0.00379] 

Epoch 5, global step 402: 'val_loss' was not in top 1


Epoch 6: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 44.94it/s, v_num=15, train_loss=0.00474, val_loss=0.000768]

Metric val_loss improved by 0.001 >= min_delta = 0.0001. New best score: 0.001
Epoch 6, global step 469: 'val_loss' reached 0.00077 (best 0.00077), saving model to '/Users/trebert/src/python/tech-challenge-04/notebooks/checkpoints/best-epoch=06-val_loss=0.0008.ckpt' as top 1


Epoch 7: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 44.32it/s, v_num=15, train_loss=0.0035, val_loss=0.00161]  

Epoch 7, global step 536: 'val_loss' was not in top 1


Epoch 8: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 45.48it/s, v_num=15, train_loss=0.00549, val_loss=0.00153]

Epoch 8, global step 603: 'val_loss' was not in top 1


Epoch 9: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 44.29it/s, v_num=15, train_loss=0.00253, val_loss=0.00141]

Epoch 9, global step 670: 'val_loss' was not in top 1


Epoch 10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 45.30it/s, v_num=15, train_loss=0.00279, val_loss=0.000611]

Metric val_loss improved by 0.000 >= min_delta = 0.0001. New best score: 0.001
Epoch 10, global step 737: 'val_loss' reached 0.00061 (best 0.00061), saving model to '/Users/trebert/src/python/tech-challenge-04/notebooks/checkpoints/best-epoch=10-val_loss=0.0006.ckpt' as top 1


Epoch 11: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 44.96it/s, v_num=15, train_loss=0.00232, val_loss=0.000651] 

Epoch 11, global step 804: 'val_loss' was not in top 1


Epoch 12: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 43.89it/s, v_num=15, train_loss=0.00402, val_loss=0.000607]

Epoch 12, global step 871: 'val_loss' reached 0.00061 (best 0.00061), saving model to '/Users/trebert/src/python/tech-challenge-04/notebooks/checkpoints/best-epoch=12-val_loss=0.0006.ckpt' as top 1


Epoch 13: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 44.87it/s, v_num=15, train_loss=0.00414, val_loss=0.000583]

Epoch 13, global step 938: 'val_loss' reached 0.00058 (best 0.00058), saving model to '/Users/trebert/src/python/tech-challenge-04/notebooks/checkpoints/best-epoch=13-val_loss=0.0006.ckpt' as top 1


Epoch 14: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 43.36it/s, v_num=15, train_loss=0.00204, val_loss=0.000698] 

Epoch 14, global step 1005: 'val_loss' was not in top 1


Epoch 15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 45.60it/s, v_num=15, train_loss=0.00818, val_loss=0.000759]

Epoch 15, global step 1072: 'val_loss' was not in top 1


Epoch 16: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 45.36it/s, v_num=15, train_loss=0.00419, val_loss=0.00164]  

Epoch 16, global step 1139: 'val_loss' was not in top 1


Epoch 17: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 44.09it/s, v_num=15, train_loss=0.00379, val_loss=0.000631]

Epoch 17, global step 1206: 'val_loss' was not in top 1


Epoch 18: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 45.29it/s, v_num=15, train_loss=0.00513, val_loss=0.00082]  

Epoch 18, global step 1273: 'val_loss' was not in top 1


Epoch 19: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 44.09it/s, v_num=15, train_loss=0.00464, val_loss=0.000655]

Epoch 19, global step 1340: 'val_loss' was not in top 1


Epoch 20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 44.68it/s, v_num=15, train_loss=0.00278, val_loss=0.000534] 

Epoch 20, global step 1407: 'val_loss' reached 0.00053 (best 0.00053), saving model to '/Users/trebert/src/python/tech-challenge-04/notebooks/checkpoints/best-epoch=20-val_loss=0.0005.ckpt' as top 1


Epoch 21: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 43.56it/s, v_num=15, train_loss=0.00129, val_loss=0.000552] 

Epoch 21, global step 1474: 'val_loss' was not in top 1


Epoch 22: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 43.04it/s, v_num=15, train_loss=0.00302, val_loss=0.000682] 

Epoch 22, global step 1541: 'val_loss' was not in top 1


Epoch 23: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 43.28it/s, v_num=15, train_loss=0.00377, val_loss=0.000761] 

Epoch 23, global step 1608: 'val_loss' was not in top 1


Epoch 24: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 44.76it/s, v_num=15, train_loss=0.00298, val_loss=0.00375] 

Epoch 24, global step 1675: 'val_loss' was not in top 1


Epoch 25: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 44.15it/s, v_num=15, train_loss=0.00219, val_loss=0.000539]

Monitored metric val_loss did not improve in the last 15 records. Best score: 0.001. Signaling Trainer to stop.
Epoch 25, global step 1742: 'val_loss' was not in top 1


Epoch 25: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:01<00:00, 44.08it/s, v_num=15, train_loss=0.00219, val_loss=0.000539]


In [15]:
# Encontrar o melhor checkpoint salvo
checkpoints = glob.glob(f"{config.CHECKPOINT_DIR}/*.ckpt")
print(f"üìÅ Checkpoints encontrados:")
for ckpt in checkpoints:
    print(f"   {ckpt}")

# Pegar o mais recente (ou voc√™ pode especificar o caminho)
best_checkpoint = checkpoint.best_model_path  # Pega do callback
print(f"\n‚úÖ Melhor checkpoint: {best_checkpoint}")

# Carregar o melhor modelo
model = StocksLSTM.load_from_checkpoint(best_checkpoint, config=config)
print(f"‚úÖ Modelo carregado do checkpoint!")

üìÅ Checkpoints encontrados:
   checkpoints/best-epoch=20-val_loss=0.0005.ckpt

‚úÖ Melhor checkpoint: /Users/trebert/src/python/tech-challenge-04/notebooks/checkpoints/best-epoch=20-val_loss=0.0005.ckpt
‚úÖ Modelo carregado do checkpoint!


# Validando e Testando o Modelo

In [16]:
# Obten√ß√£o dos dataloaders do datamodule
val_loader = datamodule.val_dataloader()
test_loader = datamodule.test_dataloader()

# Obter os targets
y_val = datamodule.val_dataset.y
y_test = datamodule.test_dataset.y

# Obter os Scalers 
target_scaler = datamodule.target_scaler

print(f"‚úÖ Dados prontos para avalia√ß√£o!")
print(f"   Val samples: {len(y_val)}")
print(f"   Test samples: {len(y_test)}")

‚úÖ Dados prontos para avalia√ß√£o!
   Val samples: 215
   Test samples: 143


In [17]:
trainer.validate(model, datamodule)

/Users/trebert/src/python/tech-challenge-04/notebooks/.venv/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.



PREPARANDO DADOS

üìä Dataset original: 1500 registros
   Ap√≥s criar sequ√™ncias: 1430 samples
   Train: 1072 samples (75%)
   Val: 215 samples (15%)

üìä Splits: Train=1072, Val=215, Test=143
‚úÖ Datasets prontos:
   Train: 1072
   Val: 215
   Test: 143
Validation DataLoader 0: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 14/14 [00:00<00:00, 117.75it/s]
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
     Validate metric           DataLoader 0
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î

[{'val_loss': 0.0005340442876331508}]

In [18]:
trainer.test(model, datamodule)

/Users/trebert/src/python/tech-challenge-04/notebooks/.venv/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.



PREPARANDO DADOS

üìä Dataset original: 1500 registros
   Ap√≥s criar sequ√™ncias: 1430 samples
   Train: 1072 samples (75%)
   Val: 215 samples (15%)

üìä Splits: Train=1072, Val=215, Test=143
‚úÖ Datasets prontos:
   Train: 1072
   Val: 215
   Test: 143
Testing DataLoader 0: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9/9 [00:00<00:00, 14.24it/s] 
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
       Test metric             DataLoader 0
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚

[{'test_loss': 0.0005022024852223694}]

In [19]:
# Gerar previs√µes usando o padr√£o Lightning
predictions_val = trainer.predict(model, dataloaders=val_loader)
predictions_test = trainer.predict(model, dataloaders=test_loader)

# O trainer.predict() retorna uma LISTA de tensores (um por batch)
# Precisamos concatenar em um √∫nico array
predictions_val = torch.cat(predictions_val).numpy().reshape(-1, 1)
predictions_test = torch.cat(predictions_test).numpy().reshape(-1, 1)

print(f"\n{'='*60}")
print(f"‚úÖ Previs√µes geradas!")
print(f"{'='*60}")
print(f"   Val shape: {predictions_val.shape}")
print(f"   Test shape: {predictions_test.shape}")

/Users/trebert/src/python/tech-challenge-04/notebooks/.venv/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.



PREPARANDO DADOS

üìä Dataset original: 1500 registros
   Ap√≥s criar sequ√™ncias: 1430 samples
   Train: 1072 samples (75%)
   Val: 215 samples (15%)

üìä Splits: Train=1072, Val=215, Test=143
‚úÖ Datasets prontos:
   Train: 1072
   Val: 215
   Test: 143
Predicting DataLoader 0: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 14/14 [00:00<00:00, 78.09it/s]

PREPARANDO DADOS

üìä Dataset original: 1500 registros
   Ap√≥s criar sequ√™ncias: 1430 samples
   Train: 1072 samples (75%)
   Val: 215 samples (15%)

üìä Splits: Train=1072, Val=215, Test=143
‚úÖ Datasets prontos:
   Train: 1072
   Val: 215
   Test: 143


/Users/trebert/src/python/tech-challenge-04/notebooks/.venv/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9/9 [00:00<00:00, 99.55it/s] 

‚úÖ Previs√µes geradas!
   Val shape: (215, 1)
   Test shape: (143, 1)


In [20]:
predictions_val_real = target_scaler.inverse_transform(predictions_val)
predictions_test_real = target_scaler.inverse_transform(predictions_test)

# Converter targets para valores reais tamb√©m
y_val_real = target_scaler.inverse_transform(y_val.reshape(-1, 1))
y_test_real = target_scaler.inverse_transform(y_test.reshape(-1, 1))

print(f"\n{'='*60}")
print(f"‚úÖ Valores convertidos para escala real!")
print(f"{'='*60}")
print(f"\nüìä Previs√µes (teste):")
print(f"   Min: R$ {predictions_test_real.min():.2f}")
print(f"   Max: R$ {predictions_test_real.max():.2f}")
print(f"\nüìä Valores reais (teste):")
print(f"   Min: R$ {y_test_real.min():.2f}")
print(f"   Max: R$ {y_test_real.max():.2f}")


‚úÖ Valores convertidos para escala real!

üìä Previs√µes (teste):
   Min: R$ 45.58
   Max: R$ 62.16

üìä Valores reais (teste):
   Min: R$ 48.23
   Max: R$ 63.81


# Avalia√ß√£o do Modelo

In [21]:
def calcular_metricas(y_real, predictions, nome_conjunto):
    """Calcula e exibe m√©tricas para um conjunto de dados."""
    mae = mean_absolute_error(y_real, predictions)
    mse = mean_squared_error(y_real, predictions)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_real, predictions)
    
    # MAPE - Mean Absolute Percentage Error
    # Evita divis√£o por zero usando np.where
    mape = np.mean(np.abs((y_real - predictions) / np.where(y_real != 0, y_real, 1))) * 100
    
    # Acur√°cia Direcional
    mean_y = np.mean(y_real)
    real_direction = np.sign(y_real - mean_y)
    pred_direction = np.sign(predictions - mean_y)
    directional_accuracy = (real_direction == pred_direction).mean()
    
    print(f"\nüìä {nome_conjunto}:")
    print(f"   MAE:  R$ {mae:.3f}")
    print(f"   MSE:  R$ {mse:.3f}")
    print(f"   RMSE: R$ {rmse:3f}")
    print(f"   MAPE: {mape:.2f}%")
    print(f"   R¬≤:   {r2:.3f} ({r2*100:.2f}%)")
    print(f"   Acur√°cia Direcional: {directional_accuracy*100:.2f}%")
    
    return {'mae': mae, 'rmse': rmse, 'mape': mape, 'r2': r2, 'da': directional_accuracy}

In [22]:
print(f"\n{'='*60}")
print(f"AVALIA√á√ÉO DO MODELO - {config.TICKER}")
print(f"{'='*60}")

metricas_val = calcular_metricas(y_val_real, predictions_val_real, "VALIDA√á√ÉO")
metricas_test = calcular_metricas(y_test_real, predictions_test_real, "TESTE")

print(f"\n{'='*60}")

print(f"\n{'='*60}")
print("OBSERVA√á√ïES")
print(f"{'='*60}")
print(f"Se teste ‚âà valida√ß√£o ‚Üí Modelo generaliza bem ‚úÖ")
print(f"Se teste << valida√ß√£o ‚Üí Poss√≠vel overfitting ‚ö†Ô∏è")


AVALIA√á√ÉO DO MODELO - VALE3

üìä VALIDA√á√ÉO:
   MAE:  R$ 0.993
   MSE:  R$ 1.731
   RMSE: R$ 1.315861
   MAPE: 1.86%
   R¬≤:   0.719 (71.89%)
   Acur√°cia Direcional: 76.74%

üìä TESTE:
   MAE:  R$ 0.930
   MSE:  R$ 1.628
   RMSE: R$ 1.276030
   MAPE: 1.73%
   R¬≤:   0.862 (86.24%)
   Acur√°cia Direcional: 95.10%


OBSERVA√á√ïES
Se teste ‚âà valida√ß√£o ‚Üí Modelo generaliza bem ‚úÖ
Se teste << valida√ß√£o ‚Üí Poss√≠vel overfitting ‚ö†Ô∏è


## Graf√≠cos de Valida√ß√£o e Teste

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Gr√°fico 1: Previs√µes vs Reais (Teste) - Linha do tempo
ax1 = axes[0, 0]
ax1.plot(y_test_real, label='Real', color='blue', alpha=0.7)
ax1.plot(predictions_test_real, label='Previs√£o', color='red', alpha=0.7)
ax1.set_title(f'Previs√µes vs Reais - Teste ({config.TICKER})')
ax1.set_xlabel('Amostras')
ax1.set_ylabel('Pre√ßo (R$)')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Gr√°fico 2: Previs√µes vs Reais (Valida√ß√£o) - Linha do tempo
ax2 = axes[0, 1]
ax2.plot(y_val_real, label='Real', color='blue', alpha=0.7)
ax2.plot(predictions_val_real, label='Previs√£o', color='red', alpha=0.7)
ax2.set_title(f'Previs√µes vs Reais - Valida√ß√£o ({config.TICKER})')
ax2.set_xlabel('Amostras')
ax2.set_ylabel('Pre√ßo (R$)')
ax2.legend()
ax2.grid(True, alpha=0.3)

# Gr√°fico 3: Scatter plot (Teste)
ax3 = axes[1, 0]
ax3.scatter(y_test_real, predictions_test_real, alpha=0.5, color='green')
min_val = min(y_test_real.min(), predictions_test_real.min())
max_val = max(y_test_real.max(), predictions_test_real.max())
ax3.plot([min_val, max_val], [min_val, max_val], 'k--', label='Ideal (y=x)')
ax3.set_title('Dispers√£o - Teste')
ax3.set_xlabel('Valor Real (R$)')
ax3.set_ylabel('Previs√£o (R$)')
ax3.legend()
ax3.grid(True, alpha=0.3)

# Gr√°fico 4: Scatter plot (Valida√ß√£o)
ax4 = axes[1, 1]
ax4.scatter(y_val_real, predictions_val_real, alpha=0.5, color='orange')
min_val = min(y_val_real.min(), predictions_val_real.min())
max_val = max(y_val_real.max(), predictions_val_real.max())
ax4.plot([min_val, max_val], [min_val, max_val], 'k--', label='Ideal (y=x)')
ax4.set_title('Dispers√£o - Valida√ß√£o')
ax4.set_xlabel('Valor Real (R$)')
ax4.set_ylabel('Previs√£o (R$)')
ax4.legend()
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Analise da baixa Performance do modelo

In [None]:
# ============================================================
# DIAGN√ìSTICO: Por que R¬≤ est√° negativo?
# ============================================================

print("üìä AN√ÅLISE DO SCALER (TARGET)")
print("="*60)

# Range que o scaler aprendeu no TREINO
train_min = target_scaler.data_min_[0]
train_max = target_scaler.data_max_[0]
print(f"\nüîµ Range do TREINO (usado para normalizar):")
print(f"   Min: R$ {train_min:.2f}")
print(f"   Max: R$ {train_max:.2f}")

print(f"\nüü† Range da VALIDA√á√ÉO (real):")
print(f"   Min: R$ {y_val_real.min():.2f}")
print(f"   Max: R$ {y_val_real.max():.2f}")

print(f"\nüî¥ Range do TESTE (real):")
print(f"   Min: R$ {y_test_real.min():.2f}")
print(f"   Max: R$ {y_test_real.max():.2f}")

# Verificar se est√° fora do range
val_fora = (y_val_real.min() < train_min) or (y_val_real.max() > train_max)
test_fora = (y_test_real.min() < train_min) or (y_test_real.max() > train_max)

print(f"\n‚ö†Ô∏è  PROBLEMA DETECTADO:")
print(f"   Valida√ß√£o fora do range de treino: {'SIM ‚ùå' if val_fora else 'N√ÉO ‚úÖ'}")
print(f"   Teste fora do range de treino: {'SIM ‚ùå' if test_fora else 'N√ÉO ‚úÖ'}")

# Verificar output do modelo
print(f"\nüìà Output do modelo (normalizado 0-1):")
print(f"   Previs√µes Val - Min: {predictions_val.min():.4f}, Max: {predictions_val.max():.4f}")
print(f"   Previs√µes Test - Min: {predictions_test.min():.4f}, Max: {predictions_test.max():.4f}")

# O que o sigmoid limita
print(f"\nüîí Sigmoid limita output entre 0 e 1")
print(f"   Se target normalizado > 1, modelo NUNCA vai acertar!")

# Calcular target normalizado
y_test_normalized = datamodule.test_dataset.y
print(f"\nüìä Target TESTE (normalizado):")
print(f"   Min: {y_test_normalized.min():.4f}")
print(f"   Max: {y_test_normalized.max():.4f}")

if y_test_normalized.max() > 1 or y_test_normalized.min() < 0:
    print(f"\n‚ùå PROBLEMA: Target de teste est√° FORA do range 0-1!")
    print(f"   O scaler foi ajustado no treino, mas teste tem valores diferentes.")
    print(f"   Sigmoid n√£o consegue produzir valores > 1 ou < 0.")


# ============================================================
# DEBUG: VERIFICAR DISTRIBUI√á√ÉO DOS DADOS
# ============================================================

print("\nüìä AN√ÅLISE DE DISTRIBUI√á√ÉO")
print("="*60)

# Range do TARGET no treino vs teste
train_min = datamodule.train_dataset.targets.min()
train_max = datamodule.train_dataset.targets.max()
test_min = datamodule.test_dataset.targets.min()
test_max = datamodule.test_dataset.targets.max()

print(f"\nüîµ TARGET NORMALIZADO (0-1):")
print(f"   Train: [{train_min:.4f}, {train_max:.4f}]")
print(f"   Test:  [{test_min:.4f}, {test_max:.4f}]")

if test_min < 0 or test_max > 1:
    print(f"\n‚ùå PROBLEMA: Teste est√° FORA do range 0-1!")
    print(f"   Scaler foi ajustado no treino, mas teste tem valores diferentes.")

# Range do TARGET em valores reais
train_real_min = datamodule.target_scaler.data_min_[0]
train_real_max = datamodule.target_scaler.data_max_[0]
test_real_min = y_test_real.min()
test_real_max = y_test_real.max()

print(f"\nüü† TARGET REAL (R$):")
print(f"   Train: [R$ {train_real_min:.2f}, R$ {train_real_max:.2f}]")
print(f"   Test:  [R$ {test_real_min:.2f}, R$ {test_real_max:.2f}]")

if test_real_min < train_real_min or test_real_max > train_real_max:
    print(f"\n‚ö†Ô∏è  ALERTA: Teste tem valores FORA do range de treino!")
    print(f"   Diferen√ßa Min: R$ {test_real_min - train_real_min:.2f}")
    print(f"   Diferen√ßa Max: R$ {test_real_max - train_real_max:.2f}")


In [None]:
len(full_df)