In [1]:
import sys, os
from pathlib import Path

ROOT = Path.cwd()
if ROOT.name.lower() == "notebooks":  
    ROOT = ROOT.parent

sys.path.insert(0, str(ROOT))
print("Project root on sys.path:", ROOT)

Project root on sys.path: C:\Users\saita\Yahoo Stock Forecasting


In [2]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

from src.models_tft import TimeSeriesTransformer
from src.config import MODEL_DIR, LSTM_BATCH_SIZE, LSTM_NUM_EPOCHS, LSTM_LR


In [3]:
data = np.load("lstm_data.npz")

X_train = data["X_train"]  # (N_train, seq_len, num_features)
y_train = data["y_train"]  # (N_train, 1)
X_val   = data["X_val"]
y_val   = data["y_val"]
X_test  = data["X_test"]
y_test  = data["y_test"]

print("X_train:", X_train.shape)
print("y_train:", y_train.shape)
print("X_val  :", X_val.shape)
print("y_val  :", y_val.shape)
print("X_test :", X_test.shape)
print("y_test :", y_test.shape)

num_features = X_train.shape[-1]
seq_len = X_train.shape[1]
print("Sequence length:", seq_len, "| Num features:", num_features)

X_train: (1217, 60, 6)
y_train: (1217, 1)
X_val  : (213, 60, 6)
y_val  : (213, 1)
X_test : (215, 60, 6)
y_test : (215, 1)
Sequence length: 60 | Num features: 6


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

train_ds = TensorDataset(
    torch.tensor(X_train, dtype=torch.float32),
    torch.tensor(y_train, dtype=torch.float32),
)

val_ds = TensorDataset(
    torch.tensor(X_val, dtype=torch.float32),
    torch.tensor(y_val, dtype=torch.float32),
)

train_dl = DataLoader(train_ds, batch_size=LSTM_BATCH_SIZE, shuffle=True)
val_dl   = DataLoader(val_ds,   batch_size=LSTM_BATCH_SIZE, shuffle=False)

Using device: cpu


In [5]:
# Transformer hyperparameters â€“ you can tweak
D_MODEL = 64
NHEAD = 4
NUM_LAYERS = 2
DIM_FEEDFORWARD = 128
DROPOUT = 0.1

model = TimeSeriesTransformer(
    num_features=num_features,
    d_model=D_MODEL,
    nhead=NHEAD,
    num_layers=NUM_LAYERS,
    dim_feedforward=DIM_FEEDFORWARD,
    dropout=DROPOUT,
).to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LSTM_LR)

best_val_loss = float("inf")
best_path = os.path.join(MODEL_DIR, "tft_model.pth")

# Early stopping params
PATIENCE = 7
MIN_DELTA = 1e-4

print(model)
print("Model will be saved to:", best_path)
print(f"Early stopping: patience={PATIENCE}, min_delta={MIN_DELTA}")

TimeSeriesTransformer(
  (input_proj): Linear(in_features=6, out_features=64, bias=True)
  (pos_encoder): PositionalEncoding()
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-1): 2 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
        )
        (linear1): Linear(in_features=64, out_features=128, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=128, out_features=64, bias=True)
        (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (fc_out): Linear(in_features=64, out_features=1, bias=True)
)
Model will be saved to: C:\Users\saita\Yahoo Stock Forecasting\models\tft_model.pth
Early stopping

In [6]:
epochs_no_improve = 0

for epoch in range(1, LSTM_NUM_EPOCHS + 1):
    # ---- Training ----
    model.train()
    train_loss = 0.0

    for xb, yb in train_dl:
        xb = xb.to(device)
        yb = yb.to(device)

        optimizer.zero_grad()
        preds = model(xb)          # (batch, 1)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * xb.size(0)

    train_loss /= len(train_dl.dataset)

    # ---- Validation ----
    model.eval()
    val_loss = 0.0

    with torch.no_grad():
        for xb, yb in val_dl:
            xb = xb.to(device)
            yb = yb.to(device)

            preds = model(xb)
            loss = criterion(preds, yb)

            val_loss += loss.item() * xb.size(0)

    val_loss /= len(val_dl.dataset)

    print(f"Epoch {epoch:03d} | train_loss={train_loss:.6f} | val_loss={val_loss:.6f}")

    # ---- Early stopping logic ----
    if val_loss + MIN_DELTA < best_val_loss:
        best_val_loss = val_loss
        epochs_no_improve = 0
        torch.save(model.state_dict(), best_path)
        print(f"  -> New best model saved (val_loss={val_loss:.6f})")
    else:
        epochs_no_improve += 1
        print(f"  -> No improvement for {epochs_no_improve} epoch(s)")

        if epochs_no_improve >= PATIENCE:
            print(f"\nEarly stopping triggered after {epoch} epochs.")
            break

Epoch 001 | train_loss=0.081418 | val_loss=0.091790
  -> New best model saved (val_loss=0.091790)
Epoch 002 | train_loss=0.014715 | val_loss=0.012822
  -> New best model saved (val_loss=0.012822)
Epoch 003 | train_loss=0.008129 | val_loss=0.015312
  -> No improvement for 1 epoch(s)
Epoch 004 | train_loss=0.007851 | val_loss=0.015960
  -> No improvement for 2 epoch(s)
Epoch 005 | train_loss=0.006058 | val_loss=0.021678
  -> No improvement for 3 epoch(s)
Epoch 006 | train_loss=0.005742 | val_loss=0.004860
  -> New best model saved (val_loss=0.004860)
Epoch 007 | train_loss=0.005734 | val_loss=0.009761
  -> No improvement for 1 epoch(s)
Epoch 008 | train_loss=0.003503 | val_loss=0.010600
  -> No improvement for 2 epoch(s)
Epoch 009 | train_loss=0.002979 | val_loss=0.007575
  -> No improvement for 3 epoch(s)
Epoch 010 | train_loss=0.002621 | val_loss=0.006355
  -> No improvement for 4 epoch(s)
Epoch 011 | train_loss=0.002788 | val_loss=0.003180
  -> New best model saved (val_loss=0.003180)

In [7]:
model.eval()
with torch.no_grad():
    xb, yb = next(iter(train_dl))
    xb = xb.to(device)
    preds = model(xb)
    print("Input batch shape :", xb.shape)      
    print("Preds batch shape :", preds.shape)  

Input batch shape : torch.Size([64, 60, 6])
Preds batch shape : torch.Size([64, 1])
