In [None]:
# ============================================================
# Minimal DNN + R2OOS baselines (naive / cs_yhat / condmean)
# ============================================================
import os
import sys
import warnings
import numpy as np
import pandas as pd

warnings.filterwarnings("ignore")

# framework entrypoint (Machine must have the new R2OOS signature)
from rolling_framework import Machine

# --------------------- Paths & dates ---------------------
DATA_DIR      = "data"
Y_FILE        = os.path.join(DATA_DIR, "exrets.csv")
SLOPE_FILE    = os.path.join(DATA_DIR, "slope.csv")
CS_YHAT_FILE  = os.path.join(DATA_DIR, "cs_yhat.csv")  # precomputed CS baseline

BURN_START, BURN_END     = "197108", "199001"
PERIOD_START, PERIOD_END = "197108", "202312"
HORIZON = 12

# columns must exist in exrets.csv (and cs_yhat.csv for CS baselines)
MATURITIES = ["xr_2", "xr_3", "xr_5", "xr_7", "xr_10"]

# --------------------- Helpers ---------------------
def _load_csv(path, name):
    try:
        return pd.read_csv(path, index_col="Time")
    except FileNotFoundError as e:
        sys.exit(f"[ERROR] missing {name} → {e.filename}")

def _align_time(*dfs):
    idx = None
    for d in dfs:
        idx = d.index if idx is None else idx.intersection(d.index)
    return [d.loc[idx].sort_index() for d in dfs]

def _mean_scalar(x):
    if isinstance(x, (pd.Series, pd.DataFrame, np.ndarray, list, tuple)):
        arr = np.asarray(x, dtype=float)
        if arr.size == 0:
            return float("nan")
        return float(np.nanmean(arr))
    try:
        return float(x)
    except Exception:
        return float("nan")

# --------------------- Load data ---------------------
y  = _load_csv(Y_FILE, "exrets")
X  = _load_csv(SLOPE_FILE, "slope")  # simple features = slope only

# restrict y to maturities of interest
y_cols = [c for c in MATURITIES if c in y.columns]
if not y_cols:
    sys.exit("[ERROR] MATURITIES not found in exrets.csv")
y = y[y_cols]

# hard align (no shifting inside Machine)
y, X = _align_time(y, X)

print("✓ shapes:", {k: v.shape for k, v in [("y", y), ("X", X)]})

# --------------------- Minimal DNN config ---------------------
opt = {
    "scaler":  "standard",
    "hidden":  (32, 16),
    "dropout": 0.1,
    "lr":      1e-3,
    "wd":      1e-4,   # L2 (weight decay)
    "bs":      32,
    "epochs":  150,
    "patience": 20,
    "seed":    0,
}
# Tiny grid (still goes through the unified grid path)
grid = {
    "dnn__module__hidden":          [(32, 16)],
    "dnn__module__dropout":         [0.1],
    "dnn__optimizer__lr":           [1e-3],
    "dnn__optimizer__weight_decay": [1e-4],
}

# --------------------- Run Machine ---------------------
m = Machine(
    X, y, "DNN",
    option=opt, params_grid=grid,
    burn_in_start=BURN_START, burn_in_end=BURN_END,
    period=[PERIOD_START, PERIOD_END],
    forecast_horizon=HORIZON,
)

print("\n▶ Training DNN (features = slope)")
m.training()

# --------------------- R2OOS with three baselines ---------------------
# 1) naive: denominator = ||y||^2
r2_naive = m.R2OOS(baseline="naive", per_maturity=True)
# 2) cs_yhat: denominator = ||y - cs_yhat||^2
r2_cs = m.R2OOS(baseline="cs_yhat", cs_path=CS_YHAT_FILE, per_maturity=True)
# 3) condmean: denominator = ||y - mean_j(cs_yhat_tj)||^2 (cross-sectional mean each t)
r2_cond = m.R2OOS(baseline="condmean", cs_path=CS_YHAT_FILE, per_maturity=True)

# --------------------- Print results ---------------------
res = pd.DataFrame({
    "R2OOS_naive":    r2_naive,
    "R2OOS_cs_yhat":  r2_cs,
    "R2OOS_condmean": r2_cond,
}).reindex(columns=["R2OOS_naive", "R2OOS_cs_yhat", "R2OOS_condmean"])

print("\n=== R2OOS by maturity ===")
print(res.round(4))

print("\n=== Averages across maturities ===")
print(pd.Series({
    "avg_R2OOS_naive":    _mean_scalar(r2_naive),
    "avg_R2OOS_cs_yhat":  _mean_scalar(r2_cs),
    "avg_R2OOS_condmean": _mean_scalar(r2_cond),
}).round(4))

# Also show which params were effectively used
bp = getattr(m, "best_params", None)
print("\n=== Used hyperparameters ===")
if bp:
    for k, v in bp.items():
        print(f"  {k}: {v}")
else:
    # fall back to opt if grid didn't override
    for k, v in opt.items():
        print(f"  {k}: {v}")

▶ OLS-SL_nonDNN


OLS rolling:  93%|█████████▎| 485/520 [00:08<00:00, 59.08it/s]