# Notebook 06 – GRU on Daily + 4h Intraday Features (SPY)

Goal: Train a GRU sequence model on daily SPY data augmented with 4-hour intraday
summary features, and compare performance to the price-only GRU and MLP models.


In [1]:
import math
from pathlib import Path

import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler

ROOT = Path("..").resolve()
DATA_PROC = ROOT / "data" / "processed"

print(ROOT, DATA_PROC)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


C:\Users\KDP only\Documents\ANN_Final_Project\spy-ann C:\Users\KDP only\Documents\ANN_Final_Project\spy-ann\data\processed
Using device: cuda


In [2]:
DATA_PATH = DATA_PROC / "daily_with_4h.parquet"

df = pd.read_parquet(DATA_PATH)
df["date"] = pd.to_datetime(df["date"])

print("=== DATA PREVIEW ===")
display(df.head())
print("\nShape:", df.shape)
print("Date range:", df["date"].min(), "→", df["date"].max())
print("\nLabel distribution (proportion):")
print(df["label_up"].value_counts(normalize=True))


=== DATA PREVIEW ===


Unnamed: 0,date,Close,High,Low,Open,Volume,ret_1d,log_ret_1d,ma_close_5,ma_close_20,...,label_up,intraday_mean_ret_4h,intraday_std_ret_4h,intraday_n_up_4h,intraday_n_candles_4h,intraday_high_max,intraday_low_min,intraday_frac_up_4h,intraday_range_4h,intraday_last_ret_4h
0,2023-12-18,461.944824,462.93338,459.909019,460.975862,70375300,0.005625,0.005609,458.495721,448.292444,...,1,-0.00109,,1.0,2.0,472.980011,470.799988,0.5,2.180023,-0.00109
1,2023-12-19,464.753906,464.832224,462.414688,462.492975,55761800,0.006081,0.006063,460.964941,449.389354,...,0,0.002973,0.001668,2.0,2.0,474.920013,472.450012,1.0,2.470001,0.001793
2,2023-12-20,458.31366,465.791362,457.883003,463.892568,102921000,-0.013857,-0.013954,460.898364,450.212508,...,1,-0.00719,0.012402,1.0,2.0,475.894989,467.820007,0.5,8.074982,-0.01596
3,2023-12-21,462.659332,462.933384,458.881307,461.318408,86667500,0.009482,0.009437,461.406531,451.16765,...,1,0.00502,0.000708,1.0,2.0,472.975006,468.839996,0.5,4.13501,0.00552
4,2023-12-22,463.589111,465.282375,461.68055,463.794642,67160400,0.00201,0.002008,462.252167,452.155632,...,1,0.000988,0.004317,1.0,2.0,475.380005,471.700012,0.5,3.679993,-0.002065



Shape: (260, 24)
Date range: 2023-12-18 00:00:00 → 2024-12-30 00:00:00

Label distribution (proportion):
label_up
1    0.592308
0    0.407692
Name: proportion, dtype: float64


In [3]:
drop_cols = ["date", "future_price", "future_ret_1d", "label_up"]
feature_cols = [c for c in df.columns if c not in drop_cols]

print("\nFeature columns:")
print(feature_cols)
print("Number of features:", len(feature_cols))

X_all = df[feature_cols].values.astype("float32")
y_all = df["label_up"].values.astype("float32")



Feature columns:
['Close', 'High', 'Low', 'Open', 'Volume', 'ret_1d', 'log_ret_1d', 'ma_close_5', 'ma_close_20', 'vol_5', 'vol_20', 'intraday_mean_ret_4h', 'intraday_std_ret_4h', 'intraday_n_up_4h', 'intraday_n_candles_4h', 'intraday_high_max', 'intraday_low_min', 'intraday_frac_up_4h', 'intraday_range_4h', 'intraday_last_ret_4h']
Number of features: 20


In [4]:
n = len(df)
train_end = int(n * 0.7)
val_end   = int(n * 0.85)

train_idx = np.arange(0, train_end)
val_idx   = np.arange(train_end, val_end)
test_idx  = np.arange(val_end, n)

print("Indices:")
print("Train:", train_idx[0], "→", train_idx[-1], "| count:", len(train_idx))
print("Val  :", val_idx[0],   "→", val_idx[-1],   "| count:", len(val_idx))
print("Test :", test_idx[0],  "→", test_idx[-1],  "| count:", len(test_idx))

X_train = X_all[train_idx]
y_train = y_all[train_idx]

X_val = X_all[val_idx]
y_val = y_all[val_idx]

X_test = X_all[test_idx]
y_test = y_all[test_idx]

print("\nRaw split sizes:")
print("Train:", X_train.shape[0])
print("Val  :", X_val.shape[0])
print("Test :", X_test.shape[0])


Indices:
Train: 0 → 181 | count: 182
Val  : 182 → 220 | count: 39
Test : 221 → 259 | count: 39

Raw split sizes:
Train: 182
Val  : 39
Test : 39


In [5]:
scaler = StandardScaler()
scaler.fit(X_train)

X_train_scaled = scaler.transform(X_train)
X_val_scaled   = scaler.transform(X_val)
X_test_scaled  = scaler.transform(X_test)


In [6]:
def build_windows(X, y, window_size: int):
    """
    X: (N, F), y: (N,)
    Returns:
        X_seq: (N_windows, W, F)
        y_seq: (N_windows,)
    Each window uses rows [i, ..., i+W-1], target is y[i+W]
    """
    X = np.asarray(X)
    y = np.asarray(y)

    n = len(X)
    max_start = n - window_size - 1  # need i+W to exist

    if max_start < 0:
        return np.empty((0, window_size, X.shape[1]), dtype=X.dtype), np.empty((0,), dtype=y.dtype)

    seqs = []
    targets = []
    for i in range(max_start + 1):
        seq = X[i : i + window_size]     # (W, F)
        target = y[i + window_size]      # scalar
        seqs.append(seq)
        targets.append(target)

    X_seq = np.stack(seqs, axis=0)
    y_seq = np.array(targets, dtype=y.dtype)
    return X_seq, y_seq


In [7]:
class SeqDataset(Dataset):
    def __init__(self, X_seq: np.ndarray, y_seq: np.ndarray):
        self.X = X_seq
        self.y = y_seq

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        x = torch.tensor(self.X[idx], dtype=torch.float32)  # (W, F)
        y = torch.tensor(self.y[idx], dtype=torch.float32)  # scalar 0/1
        return x, y


In [8]:
class GRUNet(nn.Module):
    def __init__(self, input_dim: int, hidden_dim: int = 32, num_layers: int = 1, dropout: float = 0.0):
        super().__init__()
        self.gru = nn.GRU(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0.0,
        )
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        """
        x: (batch, seq_len, input_dim)
        """
        out, h_n = self.gru(x)       # h_n: (num_layers, batch, hidden_dim)
        last_hidden = h_n[-1]        # (batch, hidden_dim)
        logits = self.fc(last_hidden).squeeze(-1)  # (batch,)
        return logits


In [9]:
def train_gru_price_plus_4h(
    window_size: int,
    hidden_dim: int,
    num_layers: int = 1,
    batch_size: int = 32,
    num_epochs: int = 25,
    lr: float = 1e-3,
    verbose: bool = True,
):
    # Build windowed datasets
    Xtr_seq, ytr_seq = build_windows(X_train_scaled, y_train, window_size)
    Xval_seq, yval_seq = build_windows(X_val_scaled, y_val, window_size)
    Xte_seq, yte_seq = build_windows(X_test_scaled, y_test, window_size)

    print(f"\n=== GRU price+4h: W={window_size}, hidden={hidden_dim}, layers={num_layers} ===")
    print("Train windows:", Xtr_seq.shape[0], "Val windows:", Xval_seq.shape[0], "Test windows:", Xte_seq.shape[0])

    if Xtr_seq.shape[0] == 0 or Xval_seq.shape[0] == 0 or Xte_seq.shape[0] == 0:
        print("Not enough data for this window size; skipping.")
        return {
            "window_size": window_size,
            "hidden_dim": hidden_dim,
            "num_layers": num_layers,
            "best_val_acc": float("nan"),
            "test_acc": float("nan"),
        }

    train_ds = SeqDataset(Xtr_seq, ytr_seq)
    val_ds   = SeqDataset(Xval_seq, yval_seq)
    test_ds  = SeqDataset(Xte_seq, yte_seq)

    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    val_loader   = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
    test_loader  = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

    input_dim = Xtr_seq.shape[2]
    model = GRUNet(input_dim=input_dim, hidden_dim=hidden_dim, num_layers=num_layers).to(device)

    criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    best_val_acc = 0.0
    best_state = None

    for epoch in range(1, num_epochs + 1):
        # ---- Train ----
        model.train()
        total_loss = 0.0
        total_correct = 0
        total_examples = 0

        for Xb, yb in train_loader:
            Xb = Xb.to(device)
            yb = yb.to(device)

            optimizer.zero_grad()
            logits = model(Xb)
            loss = criterion(logits, yb)
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * yb.size(0)
            probs = torch.sigmoid(logits)
            preds = (probs >= 0.5).float()
            total_correct += (preds == yb).float().sum().item()
            total_examples += yb.size(0)

        train_loss = total_loss / total_examples
        train_acc  = total_correct / total_examples

        # ---- Val ----
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_examples = 0

        with torch.no_grad():
            for Xb, yb in val_loader:
                Xb = Xb.to(device)
                yb = yb.to(device)

                logits = model(Xb)
                loss = criterion(logits, yb)

                val_loss += loss.item() * yb.size(0)
                probs = torch.sigmoid(logits)
                preds = (probs >= 0.5).float()
                val_correct += (preds == yb).float().sum().item()
                val_examples += yb.size(0)

        val_loss /= val_examples
        val_acc  = val_correct / val_examples

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_state = model.state_dict()

        if verbose:
            print(
                f"Epoch {epoch:02d} | "
                f"train_loss={train_loss:.4f} acc={train_acc:.4f} | "
                f"val_loss={val_loss:.4f} acc={val_acc:.4f}"
            )

    # Load best state
    if best_state is not None:
        model.load_state_dict(best_state)

    # ---- Test ----
    model.eval()
    test_correct = 0
    test_examples = 0

    with torch.no_grad():
        for Xb, yb in test_loader:
            Xb = Xb.to(device)
            yb = yb.to(device)

            logits = model(Xb)
            probs = torch.sigmoid(logits)
            preds = (probs >= 0.5).float()
            test_correct += (preds == yb).float().sum().item()
            test_examples += yb.size(0)

    test_acc = test_correct / test_examples if test_examples > 0 else float("nan")
    print(f"Best val acc={best_val_acc:.4f} | Test acc={test_acc:.4f}")

    return {
        "window_size": window_size,
        "hidden_dim": hidden_dim,
        "num_layers": num_layers,
        "best_val_acc": best_val_acc,
        "test_acc": test_acc,
    }


In [10]:
experiment_configs = [
    {"window_size": 20, "hidden_dim": 32, "num_layers": 1},
    {"window_size": 20, "hidden_dim": 64, "num_layers": 1},
    {"window_size": 30, "hidden_dim": 32, "num_layers": 1},
    {"window_size": 30, "hidden_dim": 64, "num_layers": 1},
]

results = []

for cfg in experiment_configs:
    res = train_gru_price_plus_4h(
        window_size=cfg["window_size"],
        hidden_dim=cfg["hidden_dim"],
        num_layers=cfg["num_layers"],
        batch_size=32,
        num_epochs=20,
        lr=1e-3,
        verbose=False,  # flip to True if you want full logs
    )
    print(
        f"W={res['window_size']}, hidden={res['hidden_dim']} "
        f"| best_val_acc={res['best_val_acc']:.4f}, test_acc={res['test_acc']:.4f}"
    )
    results.append(res)

results_df = pd.DataFrame(results)
results_df



=== GRU price+4h: W=20, hidden=32, layers=1 ===
Train windows: 162 Val windows: 19 Test windows: 19
Best val acc=0.4737 | Test acc=0.5789
W=20, hidden=32 | best_val_acc=0.4737, test_acc=0.5789

=== GRU price+4h: W=20, hidden=64, layers=1 ===
Train windows: 162 Val windows: 19 Test windows: 19
Best val acc=0.4737 | Test acc=0.5789
W=20, hidden=64 | best_val_acc=0.4737, test_acc=0.5789

=== GRU price+4h: W=30, hidden=32, layers=1 ===
Train windows: 152 Val windows: 9 Test windows: 9
Best val acc=0.5556 | Test acc=0.5556
W=30, hidden=32 | best_val_acc=0.5556, test_acc=0.5556

=== GRU price+4h: W=30, hidden=64, layers=1 ===
Train windows: 152 Val windows: 9 Test windows: 9
Best val acc=0.5556 | Test acc=0.5556
W=30, hidden=64 | best_val_acc=0.5556, test_acc=0.5556


Unnamed: 0,window_size,hidden_dim,num_layers,best_val_acc,test_acc
0,20,32,1,0.473684,0.578947
1,20,64,1,0.473684,0.578947
2,30,32,1,0.555556,0.555556
3,30,64,1,0.555556,0.555556
