# Notebook 08 – Price + Macro Indicators (Daily SPY)

Goal: Augment the daily SPY feature set with macro / market-wide indicators
(VIX, yield curve etc.), and evaluate whether they improve next-day direction
prediction compared to price-only MLP/GRU baselines.


In [26]:
from pathlib import Path

import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
import yfinance as yf

ROOT = Path("..").resolve()
DATA_PROC = ROOT / "data" / "processed"

print("ROOT:", ROOT)
print("DATA_PROC:", DATA_PROC)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


ROOT: C:\Users\KDP only\Documents\ANN_Final_Project\spy-ann
DATA_PROC: C:\Users\KDP only\Documents\ANN_Final_Project\spy-ann\data\processed
Using device: cuda


In [27]:
daily_path = DATA_PROC / "daily_merged.parquet"
df = pd.read_parquet(daily_path)
df["date"] = pd.to_datetime(df["date"])

print("=== DAILY SPY (BASE) ===")
display(df.head())
print("Shape:", df.shape)
print("Date range:", df["date"].min(), "→", df["date"].max())
print("\nLabel distribution:")
print(df["label_up"].value_counts(normalize=True))


=== DAILY SPY (BASE) ===


Unnamed: 0,date,Close,High,Low,Open,Volume,ret_1d,log_ret_1d,ma_close_5,ma_close_20,vol_5,vol_20,future_price,future_ret_1d,label_up
0,2010-02-02,83.059364,83.217386,81.930636,82.216584,216327900,0.012104,0.012031,82.055548,84.347997,0.012653,0.010585,82.645493,-0.004983,0
1,2010-02-03,82.645493,83.134609,82.404697,82.683113,172730700,-0.004983,-0.004995,82.055548,84.205024,0.012873,0.010574,80.094604,-0.030865,0
2,2010-02-04,80.094604,82.04354,80.079552,82.005919,356715700,-0.030865,-0.031352,81.734995,83.931498,0.018783,0.012403,80.260124,0.002067,1
3,2010-02-05,80.260124,80.425666,78.694953,80.184871,493585800,0.002067,0.002064,81.625131,83.648186,0.018457,0.012344,79.68071,-0.007219,0
4,2010-02-08,79.68071,80.764291,79.62051,80.320322,224166900,-0.007219,-0.007245,81.148059,83.321606,0.015917,0.01227,80.681526,0.01256,1


Shape: (3753, 15)
Date range: 2010-02-02 00:00:00 → 2024-12-30 00:00:00

Label distribution:
label_up
1    0.552625
0    0.447375
Name: proportion, dtype: float64


In [28]:
start_date = df["date"].min()
end_date = df["date"].max()

macro_tickers = ["^VIX", "^TNX", "^IRX"]
print("Downloading:", macro_tickers)

data_macro = yf.download(
    tickers=" ".join(macro_tickers),
    start=start_date,
    end=end_date,
    interval="1d",
    auto_adjust=True,
)

# data_macro has columns like ('Adj Close', '^VIX'), ('Adj Close', '^TNX'), etc.
print("=== RAW MACRO DATA ===")
display(data_macro.head())
print("Shape:", data_macro.shape)


[*********************100%***********************]  3 of 3 completed

Downloading: ['^VIX', '^TNX', '^IRX']
=== RAW MACRO DATA ===





Price,Close,Close,Close,High,High,High,Low,Low,Low,Open,Open,Open,Volume,Volume,Volume
Ticker,^IRX,^TNX,^VIX,^IRX,^TNX,^VIX,^IRX,^TNX,^VIX,^IRX,^TNX,^VIX,^IRX,^TNX,^VIX
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
2010-02-02,0.09,3.635,21.48,0.095,3.656,22.99,0.09,3.633,21.08,0.09,3.65,22.59,0.0,0.0,0
2010-02-03,0.09,3.703,21.6,0.1,3.709,22.110001,0.09,3.666,21.33,0.09,3.679,22.110001,0.0,0.0,0
2010-02-04,0.085,3.61,26.08,0.095,3.683,26.32,0.085,3.594,22.629999,0.09,3.683,22.629999,0.0,0.0,0
2010-02-05,0.085,3.546,26.110001,0.085,3.645,29.219999,0.075,3.537,25.370001,0.075,3.585,25.690001,0.0,0.0,0
2010-02-08,0.09,3.592,26.51,0.1,3.602,27.110001,0.08,3.571,25.48,0.09,3.577,26.110001,0.0,0.0,0


Shape: (3752, 15)


In [29]:
# Keep only Adj Close level
adj_close = data_macro["Close"].copy()
adj_close = adj_close.rename(columns={
    "^VIX": "vix",
    "^TNX": "tnx_10y",
    "^IRX": "irx_3m",
})

# Index is DatetimeIndex; reset to column 'date'
adj_close = adj_close.reset_index().rename(columns={"Date": "date"})
adj_close["date"] = pd.to_datetime(adj_close["date"])

print("=== MACRO ADJ CLOSE ===")
display(adj_close.head())

# Build macro features
macro = adj_close.copy()
macro["term_spread"] = macro["tnx_10y"] - macro["irx_3m"]
macro["log_vix"] = np.log(macro["vix"].replace(0, np.nan))
macro["vix_change_1d"] = macro["vix"].pct_change()

print("=== MACRO FEATURES PREVIEW ===")
display(macro.head())
print("Shape:", macro.shape)


=== MACRO ADJ CLOSE ===


Ticker,date,irx_3m,tnx_10y,vix
0,2010-02-02,0.09,3.635,21.48
1,2010-02-03,0.09,3.703,21.6
2,2010-02-04,0.085,3.61,26.08
3,2010-02-05,0.085,3.546,26.110001
4,2010-02-08,0.09,3.592,26.51


=== MACRO FEATURES PREVIEW ===


Ticker,date,irx_3m,tnx_10y,vix,term_spread,log_vix,vix_change_1d
0,2010-02-02,0.09,3.635,21.48,3.545,3.067122,
1,2010-02-03,0.09,3.703,21.6,3.613,3.072693,0.005587
2,2010-02-04,0.085,3.61,26.08,3.525,3.261169,0.207407
3,2010-02-05,0.085,3.546,26.110001,3.461,3.262318,0.00115
4,2010-02-08,0.09,3.592,26.51,3.502,3.277522,0.01532


Shape: (3752, 7)


In [30]:
df_merged = df.merge(macro, on="date", how="left")

# forward-fill macro columns
macro_cols = ["vix", "tnx_10y", "irx_3m", "term_spread", "log_vix", "vix_change_1d"]
df_merged[macro_cols] = df_merged[macro_cols].ffill()

print("=== DAILY + MACRO MERGED ===")
display(df_merged.head())
print("Shape:", df_merged.shape)
print("Date range:", df_merged["date"].min(), "→", df_merged["date"].max())
print("\nLabel distribution:")
print(df_merged["label_up"].value_counts(normalize=True))

out_path = DATA_PROC / "daily_with_macro.parquet"
df_merged.to_parquet(out_path)
print("Saved:", out_path)



=== DAILY + MACRO MERGED ===


Unnamed: 0,date,Close,High,Low,Open,Volume,ret_1d,log_ret_1d,ma_close_5,ma_close_20,...,vol_20,future_price,future_ret_1d,label_up,irx_3m,tnx_10y,vix,term_spread,log_vix,vix_change_1d
0,2010-02-02,83.059364,83.217386,81.930636,82.216584,216327900,0.012104,0.012031,82.055548,84.347997,...,0.010585,82.645493,-0.004983,0,0.09,3.635,21.48,3.545,3.067122,
1,2010-02-03,82.645493,83.134609,82.404697,82.683113,172730700,-0.004983,-0.004995,82.055548,84.205024,...,0.010574,80.094604,-0.030865,0,0.09,3.703,21.6,3.613,3.072693,0.005587
2,2010-02-04,80.094604,82.04354,80.079552,82.005919,356715700,-0.030865,-0.031352,81.734995,83.931498,...,0.012403,80.260124,0.002067,1,0.085,3.61,26.08,3.525,3.261169,0.207407
3,2010-02-05,80.260124,80.425666,78.694953,80.184871,493585800,0.002067,0.002064,81.625131,83.648186,...,0.012344,79.68071,-0.007219,0,0.085,3.546,26.110001,3.461,3.262318,0.00115
4,2010-02-08,79.68071,80.764291,79.62051,80.320322,224166900,-0.007219,-0.007245,81.148059,83.321606,...,0.01227,80.681526,0.01256,1,0.09,3.592,26.51,3.502,3.277522,0.01532


Shape: (3753, 21)
Date range: 2010-02-02 00:00:00 → 2024-12-30 00:00:00

Label distribution:
label_up
1    0.552625
0    0.447375
Name: proportion, dtype: float64
Saved: C:\Users\KDP only\Documents\ANN_Final_Project\spy-ann\data\processed\daily_with_macro.parquet


In [31]:
dfm = df_merged.copy()

drop_cols = ["date", "future_price", "future_ret_1d", "label_up"]
feature_cols = [c for c in dfm.columns if c not in drop_cols]

print("Feature columns:")
print(feature_cols)
print("Number of features:", len(feature_cols))

X_all = dfm[feature_cols].values.astype("float32")
y_all = dfm["label_up"].values.astype("float32")

train_end_date = pd.Timestamp("2018-12-31")
val_end_date   = pd.Timestamp("2021-12-31")

train_mask = dfm["date"] <= train_end_date
val_mask   = (dfm["date"] > train_end_date) & (dfm["date"] <= val_end_date)
test_mask  = dfm["date"] > val_end_date

X_train = X_all[train_mask.values]
y_train = y_all[train_mask.values]

X_val = X_all[val_mask.values]
y_val = y_all[val_mask.values]

X_test = X_all[test_mask.values]
y_test = y_all[test_mask.values]

print("Raw split sizes:")
print("Train:", X_train.shape[0])
print("Val  :", X_val.shape[0])
print("Test :", X_test.shape[0])

print("\nLabel distribution (train/val/test):")
for name, mask in [("Train", train_mask), ("Val", val_mask), ("Test", test_mask)]:
    print(name)
    print(dfm.loc[mask, "label_up"].value_counts(normalize=True))
    print()


Feature columns:
['Close', 'High', 'Low', 'Open', 'Volume', 'ret_1d', 'log_ret_1d', 'ma_close_5', 'ma_close_20', 'vol_5', 'vol_20', 'irx_3m', 'tnx_10y', 'vix', 'term_spread', 'log_vix', 'vix_change_1d']
Number of features: 17
Raw split sizes:
Train: 2244
Val  : 757
Test : 752

Label distribution (train/val/test):
Train
label_up
1    0.550802
0    0.449198
Name: proportion, dtype: float64

Val
label_up
1    0.583884
0    0.416116
Name: proportion, dtype: float64

Test
label_up
1    0.526596
0    0.473404
Name: proportion, dtype: float64



In [32]:
scaler = StandardScaler()
scaler.fit(X_train)

X_train_scaled = scaler.transform(X_train)
X_val_scaled   = scaler.transform(X_val)
X_test_scaled  = scaler.transform(X_test)


In [33]:
def build_windows(X, y, window_size: int):
    """
    X: (N, F), y: (N,)
    Returns:
        X_seq: (N_windows, W, F)
        y_seq: (N_windows,)
    Window uses rows [i, ..., i+W-1], target is y[i+W]
    """
    X = np.asarray(X)
    y = np.asarray(y)

    n = len(X)
    max_start = n - window_size - 1

    if max_start < 0:
        return np.empty((0, window_size, X.shape[1]), dtype=X.dtype), np.empty((0,), dtype=y.dtype)

    seqs, targets = [], []
    for i in range(max_start + 1):
        seq = X[i : i + window_size]
        target = y[i + window_size]
        seqs.append(seq)
        targets.append(target)

    X_seq = np.stack(seqs, axis=0)
    y_seq = np.array(targets, dtype=y.dtype)
    return X_seq, y_seq


class SeqDataset(Dataset):
    def __init__(self, X_seq: np.ndarray, y_seq: np.ndarray):
        self.X = X_seq
        self.y = y_seq

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        x = torch.tensor(self.X[idx], dtype=torch.float32)
        y = torch.tensor(self.y[idx], dtype=torch.float32)
        return x, y


In [34]:
class MLPWindow(nn.Module):
    def __init__(self, input_dim: int, hidden_sizes=(64, 32)):
        super().__init__()
        layers = []
        prev = input_dim
        for h in hidden_sizes:
            layers.append(nn.Linear(prev, h))
            layers.append(nn.ReLU())
            prev = h
        layers.append(nn.Linear(prev, 1))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        # x: (batch, W, F) → flatten
        b, w, f = x.shape
        x = x.view(b, w * f)
        logits = self.net(x).squeeze(-1)
        return logits


def train_mlp_macro(
    window_size: int,
    hidden_sizes=(64, 32),
    batch_size: int = 64,
    num_epochs: int = 25,
    lr: float = 1e-3,
    use_scaled: bool = True,
    verbose: bool = True,
):
    """
    Train MLP on price+macro features, with option to use scaled or raw inputs.

    Assumes you have:
        X_train, X_val, X_test (raw)
        X_train_scaled, X_val_scaled, X_test_scaled (StandardScaler)
        y_train, y_val, y_test
    defined in the notebook scope.
    """

    if use_scaled:
        Xtr_base, Xval_base, Xte_base = X_train_scaled, X_val_scaled, X_test_scaled
        scale_mode = "scaled"
    else:
        Xtr_base, Xval_base, Xte_base = X_train, X_val, X_test
        scale_mode = "raw"

    Xtr_seq, ytr_seq = build_windows(Xtr_base, y_train, window_size)
    Xval_seq, yval_seq = build_windows(Xval_base, y_val, window_size)
    Xte_seq, yte_seq = build_windows(Xte_base, y_test, window_size)

    print(
        f"\n=== MLP price+macro ({scale_mode}) "
        f"W={window_size}, hidden={hidden_sizes} ==="
    )
    print("Train windows:", Xtr_seq.shape[0], "Val:", Xval_seq.shape[0], "Test:", Xte_seq.shape[0])

    if Xtr_seq.shape[0] == 0 or Xval_seq.shape[0] == 0 or Xte_seq.shape[0] == 0:
        print("Not enough data for this window size; skipping.")
        return {
            "model": "MLP",
            "features": "price+macro",
            "window_size": window_size,
            "hidden_desc": str(hidden_sizes),
            "scaled": use_scaled,
            "best_val_acc": float("nan"),
            "test_acc": float("nan"),
        }

    train_ds = SeqDataset(Xtr_seq, ytr_seq)
    val_ds   = SeqDataset(Xval_seq, yval_seq)
    test_ds  = SeqDataset(Xte_seq, yte_seq)

    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    val_loader   = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
    test_loader  = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

    input_dim = Xtr_seq.shape[1] * Xtr_seq.shape[2]
    model = MLPWindow(input_dim=input_dim, hidden_sizes=hidden_sizes).to(device)

    criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    best_val_acc = 0.0
    best_state = None

    for epoch in range(1, num_epochs + 1):
        # ---- Train ----
        model.train()
        total_loss = 0.0
        total_correct = 0
        total_examples = 0

        for Xb, yb in train_loader:
            Xb = Xb.to(device)
            yb = yb.to(device)

            optimizer.zero_grad()
            logits = model(Xb)
            loss = criterion(logits, yb)
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * yb.size(0)
            preds = (torch.sigmoid(logits) >= 0.5).float()
            total_correct += (preds == yb).float().sum().item()
            total_examples += yb.size(0)

        train_loss = total_loss / total_examples
        train_acc = total_correct / total_examples

        # ---- Val ----
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_examples = 0

        with torch.no_grad():
            for Xb, yb in val_loader:
                Xb = Xb.to(device)
                yb = yb.to(device)

                logits = model(Xb)
                loss = criterion(logits, yb)

                val_loss += loss.item() * yb.size(0)
                preds = (torch.sigmoid(logits) >= 0.5).float()
                val_correct += (preds == yb).float().sum().item()
                val_examples += yb.size(0)

        val_loss /= val_examples
        val_acc = val_correct / val_examples

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_state = model.state_dict()

        if verbose:
            print(
                f"Epoch {epoch:02d} | "
                f"train_loss={train_loss:.4f} acc={train_acc:.4f} | "
                f"val_loss={val_loss:.4f} acc={val_acc:.4f}"
            )

    if best_state is not None:
        model.load_state_dict(best_state)

    # ---- Test ----
    model.eval()
    test_correct = 0
    test_examples = 0

    with torch.no_grad():
        for Xb, yb in test_loader:
            Xb = Xb.to(device)
            yb = yb.to(device)

            logits = model(Xb)
            preds = (torch.sigmoid(logits) >= 0.5).float()
            test_correct += (preds == yb).float().sum().item()
            test_examples += yb.size(0)

    test_acc = test_correct / test_examples if test_examples > 0 else float("nan")
    print(f"[{scale_mode}] Best val acc={best_val_acc:.4f} | Test acc={test_acc:.4f}")

    return {
        "model": "MLP",
        "features": "price+macro",
        "window_size": window_size,
        "hidden_desc": str(hidden_sizes),
        "scaled": use_scaled,
        "best_val_acc": best_val_acc,
        "test_acc": test_acc,
    }


In [35]:
class GRUNet(nn.Module):
    def __init__(self, input_dim: int, hidden_dim: int = 32, num_layers: int = 1, dropout: float = 0.0):
        super().__init__()
        self.gru = nn.GRU(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0.0,
        )
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        # x: (batch, W, F)
        out, h_n = self.gru(x)
        last_hidden = h_n[-1]
        logits = self.fc(last_hidden).squeeze(-1)
        return logits


def train_gru_macro(
    window_size: int,
    hidden_dim: int,
    num_layers: int = 1,
    batch_size: int = 64,
    num_epochs: int = 25,
    lr: float = 1e-3,
    use_scaled: bool = True,
    verbose: bool = True,
):
    """
    Train GRU on price+macro features, with option to use scaled or raw inputs.

    Assumes you have:
        X_train, X_val, X_test (raw)
        X_train_scaled, X_val_scaled, X_test_scaled (StandardScaler)
        y_train, y_val, y_test
    defined in the notebook scope.
    """

    if use_scaled:
        Xtr_base, Xval_base, Xte_base = X_train_scaled, X_val_scaled, X_test_scaled
        scale_mode = "scaled"
    else:
        Xtr_base, Xval_base, Xte_base = X_train, X_val, X_test
        scale_mode = "raw"

    Xtr_seq, ytr_seq = build_windows(Xtr_base, y_train, window_size)
    Xval_seq, yval_seq = build_windows(Xval_base, y_val, window_size)
    Xte_seq, yte_seq = build_windows(Xte_base, y_test, window_size)

    print(
        f"\n=== GRU price+macro ({scale_mode}) "
        f"W={window_size}, hidden={hidden_dim}, layers={num_layers} ==="
    )
    print("Train windows:", Xtr_seq.shape[0], "Val:", Xval_seq.shape[0], "Test:", Xte_seq.shape[0])

    if Xtr_seq.shape[0] == 0 or Xval_seq.shape[0] == 0 or Xte_seq.shape[0] == 0:
        print("Not enough data for this window size; skipping.")
        return {
            "model": "GRU",
            "features": "price+macro",
            "window_size": window_size,
            "hidden_desc": f"hidden={hidden_dim}, layers={num_layers}",
            "scaled": use_scaled,
            "best_val_acc": float("nan"),
            "test_acc": float("nan"),
        }

    train_ds = SeqDataset(Xtr_seq, ytr_seq)
    val_ds   = SeqDataset(Xval_seq, yval_seq)
    test_ds  = SeqDataset(Xte_seq, yte_seq)

    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    val_loader   = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
    test_loader  = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

    input_dim = Xtr_seq.shape[2]
    model = GRUNet(input_dim=input_dim, hidden_dim=hidden_dim, num_layers=num_layers).to(device)

    criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    best_val_acc = 0.0
    best_state = None

    for epoch in range(1, num_epochs + 1):
        # ---- Train ----
        model.train()
        total_loss = 0.0
        total_correct = 0
        total_examples = 0

        for Xb, yb in train_loader:
            Xb = Xb.to(device)
            yb = yb.to(device)

            optimizer.zero_grad()
            logits = model(Xb)
            loss = criterion(logits, yb)
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * yb.size(0)
            preds = (torch.sigmoid(logits) >= 0.5).float()
            total_correct += (preds == yb).float().sum().item()
            total_examples += yb.size(0)

        train_loss = total_loss / total_examples
        train_acc = total_correct / total_examples

        # ---- Val ----
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_examples = 0

        with torch.no_grad():
            for Xb, yb in val_loader:
                Xb = Xb.to(device)
                yb = yb.to(device)

                logits = model(Xb)
                loss = criterion(logits, yb)

                val_loss += loss.item() * yb.size(0)
                preds = (torch.sigmoid(logits) >= 0.5).float()
                val_correct += (preds == yb).float().sum().item()
                val_examples += yb.size(0)

        val_loss /= val_examples
        val_acc = val_correct / val_examples

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_state = model.state_dict()

        if verbose:
            print(
                f"Epoch {epoch:02d} | "
                f"train_loss={train_loss:.4f} acc={train_acc:.4f} | "
                f"val_loss={val_loss:.4f} acc={val_acc:.4f}"
            )

    if best_state is not None:
        model.load_state_dict(best_state)

    # ---- Test ----
    model.eval()
    test_correct = 0
    test_examples = 0

    with torch.no_grad():
        for Xb, yb in test_loader:
            Xb = Xb.to(device)
            yb = yb.to(device)

            logits = model(Xb)
            preds = (torch.sigmoid(logits) >= 0.5).float()
            test_correct += (preds == yb).float().sum().item()
            test_examples += yb.size(0)

    test_acc = test_correct / test_examples if test_examples > 0 else float("nan")
    print(f"[{scale_mode}] Best val acc={best_val_acc:.4f} | Test acc={test_acc:.4f}")

    return {
        "model": "GRU",
        "features": "price+macro",
        "window_size": window_size,
        "hidden_desc": f"hidden={hidden_dim}, layers={num_layers}",
        "scaled": use_scaled,
        "best_val_acc": best_val_acc,
        "test_acc": test_acc,
    }


In [40]:
class LSTMNet(nn.Module):
    def __init__(self, input_dim: int, hidden_dim: int = 32, num_layers: int = 1, dropout: float = 0.0):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0.0,
        )
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        # x: (batch, W, F)
        out, (h_n, c_n) = self.lstm(x)
        last_hidden = h_n[-1]  # (batch, hidden_dim)
        logits = self.fc(last_hidden).squeeze(-1)
        return logits


def train_lstm_macro(
    window_size: int,
    hidden_dim: int,
    num_layers: int = 1,
    batch_size: int = 64,
    num_epochs: int = 25,
    lr: float = 1e-3,
    use_scaled: bool = True,
    verbose: bool = True,
):
    """
    Train LSTM on price+macro features, with option to use scaled or raw inputs.

    Assumes:
        X_train, X_val, X_test (raw)
        X_train_scaled, X_val_scaled, X_test_scaled (scaled)
        y_train, y_val, y_test
    exist in the notebook.
    """

    if use_scaled:
        Xtr_base, Xval_base, Xte_base = X_train_scaled, X_val_scaled, X_test_scaled
        scale_mode = "scaled"
    else:
        Xtr_base, Xval_base, Xte_base = X_train, X_val, X_test
        scale_mode = "raw"

    Xtr_seq, ytr_seq = build_windows(Xtr_base, y_train, window_size)
    Xval_seq, yval_seq = build_windows(Xval_base, y_val, window_size)
    Xte_seq, yte_seq = build_windows(Xte_base, y_test, window_size)

    print(
        f"\n=== LSTM price+macro ({scale_mode}) "
        f"W={window_size}, hidden={hidden_dim}, layers={num_layers} ==="
    )
    print("Train windows:", Xtr_seq.shape[0], "Val:", Xval_seq.shape[0], "Test:", Xte_seq.shape[0])

    if Xtr_seq.shape[0] == 0 or Xval_seq.shape[0] == 0 or Xte_seq.shape[0] == 0:
        print("Not enough data for this window size; skipping.")
        return {
            "model": "LSTM",
            "features": "price+macro",
            "window_size": window_size,
            "hidden_desc": f"hidden={hidden_dim}, layers={num_layers}",
            "scaled": use_scaled,
            "best_val_acc": float("nan"),
            "test_acc": float("nan"),
        }

    train_ds = SeqDataset(Xtr_seq, ytr_seq)
    val_ds   = SeqDataset(Xval_seq, yval_seq)
    test_ds  = SeqDataset(Xte_seq, yte_seq)

    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    val_loader   = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
    test_loader  = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

    input_dim = Xtr_seq.shape[2]
    model = LSTMNet(input_dim=input_dim, hidden_dim=hidden_dim, num_layers=num_layers).to(device)

    criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    best_val_acc = 0.0
    best_state = None

    for epoch in range(1, num_epochs + 1):
        # ---- Train ----
        model.train()
        total_loss = 0.0
        total_correct = 0
        total_examples = 0

        for Xb, yb in train_loader:
            Xb = Xb.to(device)
            yb = yb.to(device)

            optimizer.zero_grad()
            logits = model(Xb)
            loss = criterion(logits, yb)
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * yb.size(0)
            preds = (torch.sigmoid(logits) >= 0.5).float()
            total_correct += (preds == yb).float().sum().item()
            total_examples += yb.size(0)

        train_loss = total_loss / total_examples
        train_acc = total_correct / total_examples

        # ---- Val ----
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_examples = 0

        with torch.no_grad():
            for Xb, yb in val_loader:
                Xb = Xb.to(device)
                yb = yb.to(device)

                logits = model(Xb)
                loss = criterion(logits, yb)

                val_loss += loss.item() * yb.size(0)
                preds = (torch.sigmoid(logits) >= 0.5).float()
                val_correct += (preds == yb).float().sum().item()
                val_examples += yb.size(0)

        val_loss /= val_examples
        val_acc = val_correct / val_examples

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_state = model.state_dict()

        if verbose:
            print(
                f"Epoch {epoch:02d} | "
                f"train_loss={train_loss:.4f} acc={train_acc:.4f} | "
                f"val_loss={val_loss:.4f} acc={val_acc:.4f}"
            )

    if best_state is not None:
        model.load_state_dict(best_state)

    # ---- Test ----
    model.eval()
    test_correct = 0
    test_examples = 0
    
    all_preds = []
    all_true = []

    with torch.no_grad():
        for Xb, yb in test_loader:
            Xb = Xb.to(device)
            yb = yb.to(device)

            logits = model(Xb)
            preds = (torch.sigmoid(logits) >= 0.5).float()
            test_correct += (preds == yb).float().sum().item()
            test_examples += yb.size(0)
            
            all_preds.append(preds.cpu().numpy())
            all_true.append(yb.cpu().numpy())
            
    all_preds = np.concatenate(all_preds)
    all_true = np.concatenate(all_true)
    print("Test mean(true):", all_true.mean())
    print("Test mean(pred):", all_preds.mean())

    test_acc = test_correct / test_examples if test_examples > 0 else float("nan")
    print(f"[{scale_mode}] Best val acc={best_val_acc:.4f} | Test acc={test_acc:.4f}")

    return {
        "model": "LSTM",
        "features": "price+macro",
        "window_size": window_size,
        "hidden_desc": f"hidden={hidden_dim}, layers={num_layers}",
        "scaled": use_scaled,
        "best_val_acc": best_val_acc,
        "test_acc": test_acc,
    }


In [41]:
results = []

mlp_configs = [
    {"window_size": 30, "hidden_sizes": (32, 16)},
    {"window_size": 30, "hidden_sizes": (64, 32)},
    {"window_size": 60, "hidden_sizes": (64, 32)},
    {"window_size": 60, "hidden_sizes": (128, 64)},
    {"window_size": 90, "hidden_sizes": (64, 32)},
]

gru_configs = [
    {"window_size": 30, "hidden_dim": 16, "num_layers": 1},
    {"window_size": 30, "hidden_dim": 32, "num_layers": 1},
    {"window_size": 60, "hidden_dim": 32, "num_layers": 1},
    {"window_size": 60, "hidden_dim": 64, "num_layers": 1},
    {"window_size": 90, "hidden_dim": 32, "num_layers": 2},
]

lstm_configs = [
    {"window_size": 30, "hidden_dim": 32, "num_layers": 1},
    {"window_size": 60, "hidden_dim": 32, "num_layers": 1},
    {"window_size": 60, "hidden_dim": 64, "num_layers": 1},
]
results = []

for use_scaled in [True, False]:
    print("\n==============================")
    print("use_scaled =", use_scaled)
    print("==============================")

    # --- MLPs ---
    for cfg in mlp_configs:
        res = train_mlp_macro(
            window_size=cfg["window_size"],
            hidden_sizes=cfg["hidden_sizes"],
            batch_size=64,
            num_epochs=20,
            lr=1e-3,
            use_scaled=use_scaled,
            verbose=False,
        )
        print(res)
        results.append(res)

    # --- GRUs ---
    for cfg in gru_configs:
        res = train_gru_macro(
            window_size=cfg["window_size"],
            hidden_dim=cfg["hidden_dim"],
            num_layers=cfg["num_layers"],
            batch_size=64,
            num_epochs=20,
            lr=1e-3,
            use_scaled=use_scaled,
            verbose=False,
        )
        print(res)
        results.append(res)

    # --- LSTMs ---
    for cfg in lstm_configs:
        res = train_lstm_macro(
            window_size=cfg["window_size"],
            hidden_dim=cfg["hidden_dim"],
            num_layers=cfg["num_layers"],
            batch_size=64,
            num_epochs=20,
            lr=1e-3,
            use_scaled=use_scaled,
            verbose=False,
        )
        print(res)
        results.append(res)

results_df = pd.DataFrame(results)
results_df


use_scaled = True

=== MLP price+macro (scaled) W=30, hidden=(32, 16) ===
Train windows: 2214 Val: 727 Test: 722
[scaled] Best val acc=0.4223 | Test acc=0.4681
{'model': 'MLP', 'features': 'price+macro', 'window_size': 30, 'hidden_desc': '(32, 16)', 'scaled': True, 'best_val_acc': 0.422283356258597, 'test_acc': 0.46814404432132967}

=== MLP price+macro (scaled) W=30, hidden=(64, 32) ===
Train windows: 2214 Val: 727 Test: 722
[scaled] Best val acc=0.4223 | Test acc=0.4681
{'model': 'MLP', 'features': 'price+macro', 'window_size': 30, 'hidden_desc': '(64, 32)', 'scaled': True, 'best_val_acc': 0.422283356258597, 'test_acc': 0.46814404432132967}

=== MLP price+macro (scaled) W=60, hidden=(64, 32) ===
Train windows: 2184 Val: 697 Test: 692
[scaled] Best val acc=0.4204 | Test acc=0.4653
{'model': 'MLP', 'features': 'price+macro', 'window_size': 60, 'hidden_desc': '(64, 32)', 'scaled': True, 'best_val_acc': 0.42037302725968434, 'test_acc': 0.4653179190751445}

=== MLP price+macro (scaled) W=

Unnamed: 0,model,features,window_size,hidden_desc,scaled,best_val_acc,test_acc
0,MLP,price+macro,30,"(32, 16)",True,0.422283,0.468144
1,MLP,price+macro,30,"(64, 32)",True,0.422283,0.468144
2,MLP,price+macro,60,"(64, 32)",True,0.420373,0.465318
3,MLP,price+macro,60,"(128, 64)",True,0.420373,0.465318
4,MLP,price+macro,90,"(64, 32)",True,0.41979,0.460725
5,GRU,price+macro,30,"hidden=16, layers=1",True,0.422283,0.468144
6,GRU,price+macro,30,"hidden=32, layers=1",True,0.422283,0.468144
7,GRU,price+macro,60,"hidden=32, layers=1",True,0.420373,0.465318
8,GRU,price+macro,60,"hidden=64, layers=1",True,0.420373,0.465318
9,GRU,price+macro,90,"hidden=32, layers=2",True,0.41979,0.460725


In [38]:
print("Train label_up mean:", dfm.loc[train_mask, "label_up"].mean())
print("Val   label_up mean:", dfm.loc[val_mask, "label_up"].mean())
print("Test  label_up mean:", dfm.loc[test_mask, "label_up"].mean())

Train label_up mean: 0.5508021390374331
Val   label_up mean: 0.583883751651255
Test  label_up mean: 0.526595744680851


In [39]:
# After training one of the models:
all_preds = []
all_true = []

with torch.no_grad():
    for Xb, yb in test_loader:
        Xb = Xb.to(device)
        yb = yb.to(device)
        logits = model(Xb)
        probs = torch.sigmoid(logits)
        preds = (probs >= 0.5).float()

        all_preds.append(preds.cpu().numpy())
        all_true.append(yb.cpu().numpy())

all_preds = np.concatenate(all_preds)
all_true = np.concatenate(all_true)

print("Test mean(true):", all_true.mean())
print("Test mean(pred):", all_preds.mean())


NameError: name 'test_loader' is not defined