In [1]:
import sys, os
from pathlib import Path

ROOT = Path.cwd()
if ROOT.name.lower() == "notebooks":  
    ROOT = ROOT.parent

sys.path.insert(0, str(ROOT))
print("Project root on sys.path:", ROOT)

Project root on sys.path: C:\Users\saita\Yahoo Stock Forecasting


In [2]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

from src.config import (
    LSTM_LOOKBACK,
    LSTM_HIDDEN_SIZE,
    LSTM_NUM_LAYERS,
    LSTM_DROPOUT,
    LSTM_BATCH_SIZE,
    LSTM_NUM_EPOCHS,
    LSTM_LR,
    MODEL_DIR,
)
from src.models_lstm import LSTMRegressor

In [3]:
data = np.load("lstm_data.npz")

X_train = data["X_train"]
y_train = data["y_train"]
X_val   = data["X_val"]
y_val   = data["y_val"]
X_test  = data["X_test"]
y_test  = data["y_test"]

print("X_train:", X_train.shape)
print("y_train:", y_train.shape)
print("X_val  :", X_val.shape)
print("y_val  :", y_val.shape)
print("X_test :", X_test.shape)
print("y_test :", y_test.shape)

num_features = X_train.shape[-1]
seq_len = X_train.shape[1]
print("Sequence length:", seq_len, "| Num features:", num_features)

X_train: (1217, 60, 6)
y_train: (1217, 1)
X_val  : (213, 60, 6)
y_val  : (213, 1)
X_test : (215, 60, 6)
y_test : (215, 1)
Sequence length: 60 | Num features: 6


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

train_ds = TensorDataset(
    torch.tensor(X_train, dtype=torch.float32),
    torch.tensor(y_train, dtype=torch.float32),
)

val_ds = TensorDataset(
    torch.tensor(X_val, dtype=torch.float32),
    torch.tensor(y_val, dtype=torch.float32),
)

train_dl = DataLoader(train_ds, batch_size=LSTM_BATCH_SIZE, shuffle=True)
val_dl   = DataLoader(val_ds,   batch_size=LSTM_BATCH_SIZE, shuffle=False)

Using device: cpu


In [5]:
model = LSTMRegressor(
    num_features=num_features,
    hidden_size=LSTM_HIDDEN_SIZE,
    num_layers=LSTM_NUM_LAYERS,
    dropout=LSTM_DROPOUT,
).to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LSTM_LR)

best_val_loss = float("inf")
best_path = os.path.join(MODEL_DIR, "lstm_model_best.pth")

PATIENCE = 7        
MIN_DELTA = 1e-4   

print(model)
print("Model will be saved to:", best_path)
print(f"Early stopping: patience={PATIENCE}, min_delta={MIN_DELTA}")

LSTMRegressor(
  (lstm): LSTM(6, 64, num_layers=2, batch_first=True, dropout=0.2)
  (fc): Linear(in_features=64, out_features=1, bias=True)
)
Model will be saved to: C:\Users\saita\Yahoo Stock Forecasting\models\lstm_model_best.pth
Early stopping: patience=7, min_delta=0.0001


In [6]:
epochs_no_improve = 0

for epoch in range(1, LSTM_NUM_EPOCHS + 1):
    # ---- Training ----
    model.train()
    train_loss = 0.0

    for xb, yb in train_dl:
        xb = xb.to(device)
        yb = yb.to(device)

        optimizer.zero_grad()
        preds = model(xb)          # (batch, 1)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * xb.size(0)

    train_loss /= len(train_dl.dataset)

    # ---- Validation ----
    model.eval()
    val_loss = 0.0

    with torch.no_grad():
        for xb, yb in val_dl:
            xb = xb.to(device)
            yb = yb.to(device)

            preds = model(xb)
            loss = criterion(preds, yb)

            val_loss += loss.item() * xb.size(0)

    val_loss /= len(val_dl.dataset)

    print(f"Epoch {epoch:03d} | train_loss={train_loss:.6f} | val_loss={val_loss:.6f}")

    # ---- Early stopping logic ----
    if val_loss + MIN_DELTA < best_val_loss:
        best_val_loss = val_loss
        epochs_no_improve = 0
        torch.save(model.state_dict(), best_path)
        print(f"  -> New best model saved (val_loss={val_loss:.6f})")
    else:
        epochs_no_improve += 1
        print(f"  -> No improvement for {epochs_no_improve} epoch(s)")

        if epochs_no_improve >= PATIENCE:
            print(f"\nEarly stopping triggered after {epoch} epochs.")
            break

Epoch 001 | train_loss=0.190906 | val_loss=0.125987
  -> New best model saved (val_loss=0.125987)
Epoch 002 | train_loss=0.023672 | val_loss=0.028719
  -> New best model saved (val_loss=0.028719)
Epoch 003 | train_loss=0.006938 | val_loss=0.003209
  -> New best model saved (val_loss=0.003209)
Epoch 004 | train_loss=0.002119 | val_loss=0.000996
  -> New best model saved (val_loss=0.000996)
Epoch 005 | train_loss=0.002315 | val_loss=0.001970
  -> No improvement for 1 epoch(s)
Epoch 006 | train_loss=0.001548 | val_loss=0.001831
  -> No improvement for 2 epoch(s)
Epoch 007 | train_loss=0.001422 | val_loss=0.002170
  -> No improvement for 3 epoch(s)
Epoch 008 | train_loss=0.001404 | val_loss=0.002392
  -> No improvement for 4 epoch(s)
Epoch 009 | train_loss=0.001327 | val_loss=0.003132
  -> No improvement for 5 epoch(s)
Epoch 010 | train_loss=0.001370 | val_loss=0.006955
  -> No improvement for 6 epoch(s)
Epoch 011 | train_loss=0.001287 | val_loss=0.003963
  -> No improvement for 7 epoch(s)

In [8]:
model.eval()
with torch.no_grad():
    xb, yb = next(iter(train_dl))
    xb = xb.to(device)
    preds = model(xb)
    print("Input batch shape :", xb.shape)      
    print("Preds batch shape :", preds.shape)  

Input batch shape : torch.Size([64, 60, 6])
Preds batch shape : torch.Size([64, 1])
