In [2]:
# ============================================================
# Ridge / Lasso / ElasticNet / RF / ExtraTrees / XGB
# X = fwds
# Baselines: naive / condmean / cs_yhat
# Console output only
# ============================================================

import os, sys, warnings
import numpy as np
import pandas as pd
warnings.filterwarnings("ignore")

from rolling_framework import Machine

# ---------------- Paths ----------------
DATA_DIR = "data"
Y_FILE = os.path.join(DATA_DIR, "exrets.csv")
FWD_FILE = os.path.join(DATA_DIR, "fwds.csv")
CS_YHAT_FILE = os.path.join(DATA_DIR, "cs_yhat.csv")

BURN_START, BURN_END = "197108", "199001"
PERIOD_START, PERIOD_END = "197108", "202312"
HORIZON = 12
MATURITIES = ["xr_2", "xr_3", "xr_5", "xr_7", "xr_10"]

# ---------------- Helpers ----------------
def _load_csv(path):
    try:
        return pd.read_csv(path, index_col="Time")
    except FileNotFoundError:
        sys.exit(f"Missing file: {path}")

def _align_time(*dfs):
    idx = None
    for d in dfs:
        idx = d.index if idx is None else idx.intersection(d.index)
    return [d.loc[idx].sort_index() for d in dfs]

def _to_series(x, index):
    if isinstance(x, pd.Series):
        return x.reindex(index)
    if isinstance(x, pd.DataFrame):
        return x.squeeze().reindex(index)
    try:
        return pd.Series(float(x), index=index)
    except Exception:
        return pd.Series(np.nan, index=index)

def _print_r2_block(title, m, cs_path=None):
    print(f"\n==== {title} ====")
    r2_naive = m.R2OOS(baseline="naive")
    r2_cond  = m.R2OOS(baseline="condmean")
    r2_cs    = m.R2OOS(baseline="cs_yhat", cs_path=cs_path) if cs_path else np.nan

    cols = getattr(m, "targets", getattr(m, "y", pd.DataFrame()).columns)
    s_naive, s_cond, s_cs = map(lambda x: _to_series(x, cols), [r2_naive, r2_cond, r2_cs])

    tbl = pd.DataFrame({
        "R2OOS_naive": s_naive,
        "R2OOS_condmean": s_cond,
        "R2OOS_cs_yhat": s_cs
    })
    print(tbl.round(4))
    print("Mean:", tbl.mean(axis=0, skipna=True).round(4).to_dict())

def run_model(model_type, option, grid, X, y, cs_path, title):
    m = Machine(
        X, y,
        model_type=model_type, option=option, params_grid=grid,
        burn_in_start=BURN_START, burn_in_end=BURN_END,
        period=[PERIOD_START, PERIOD_END],
        forecast_horizon=HORIZON,
    )
    print(f"\n▶ {title}")
    m.training()
    _print_r2_block(title, m, cs_path=cs_path)

# ---------------- Data ----------------
y = _load_csv(Y_FILE)
fwd = _load_csv(FWD_FILE)

y_cols = [c for c in MATURITIES if c in y.columns]
if not y_cols:
    sys.exit("MATURITIES not found in exrets.csv")
y = y[y_cols]

y, fwd = _align_time(y, fwd)
X = fwd.copy()

# Clean cs_yhat
cs = _load_csv(CS_YHAT_FILE)
cs = cs[~cs.index.duplicated(keep="last")]
cs = cs.reindex(columns=y.columns)
CS_CLEAN_FILE = os.path.join(DATA_DIR, "cs_yhat.csv")
cs.to_csv(CS_CLEAN_FILE)

print("✓ data:", {"X": X.shape, "y": y.shape, "cs_yhat": cs.shape})

# ---------------- Penalized Models ----------------
grid_lasso = {"reg__alpha": np.logspace(-3, 2, 5)}
grid_ridge = {"reg__alpha": np.logspace(-3, 2, 5)}
grid_enet  = {"reg__alpha": np.logspace(-3, 2, 5), "reg__l1_ratio": [0.1, 0.3, 0.5]}

run_model("Penalized", "lasso",      grid_lasso, X, y, CS_CLEAN_FILE, "Lasso")
run_model("Penalized", "ridge",      grid_ridge, X, y, CS_CLEAN_FILE, "Ridge")
run_model("Penalized", "elasticnet", grid_enet,  X, y, CS_CLEAN_FILE, "ElasticNet")

# ---------------- Tree Models ----------------
grid_rf = {
    "model__estimator__n_estimators": [300],
    "model__estimator__max_depth": [2, 8],
    "model__estimator__min_samples_split": [2, 4],
    "model__estimator__min_samples_leaf": [1, 2, 4],
    "model__estimator__max_features": [0.25, 0.5, 1],
}
grid_et = {
    "model__estimator__n_estimators": [300],
    "model__estimator__max_depth": [2, 8],
    "model__estimator__min_samples_split": [2, 4],
    "model__estimator__min_samples_leaf": [1, 2, 4],
    "model__estimator__max_features": [0.25, 0.5, 1],
}
grid_xgb = {
    "model__estimator__n_estimators": [300],
    "model__estimator__max_depth": [2, 4],
    "model__estimator__learning_rate": [0.01],
    "model__estimator__subsample": [0.7, 0.5],
    "model__estimator__reg_lambda": [0.1, 1.0],
}

run_model("Tree", "rf",  grid_rf,  X, y, CS_CLEAN_FILE, "RandomForest")
run_model("Tree", "et",  grid_et,  X, y, CS_CLEAN_FILE, "ExtraTrees")
run_model("Tree", "xgb", grid_xgb, X, y, CS_CLEAN_FILE, "XGBoost")

print("\n✓ done")

✓ data: {'X': (629, 10), 'y': (629, 5), 'cs_yhat': (612, 5)}

▶ Lasso


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c


==== Lasso ====
       R2OOS_naive  R2OOS_condmean  R2OOS_cs_yhat
xr_2        0.0927         -0.0315         0.0656
xr_3        0.0887         -0.0539        -0.0082
xr_5        0.0813         -0.0804        -0.0714
xr_7        0.0712         -0.0896        -0.1076
xr_10       0.0381         -0.1172        -0.1883
Mean: {'R2OOS_naive': 0.0744, 'R2OOS_condmean': -0.0745, 'R2OOS_cs_yhat': -0.062}

▶ Ridge


Penalized rolling: 100%|██████████| 408/408 [00:08<00:00, 50.93it/s]



==== Ridge ====
       R2OOS_naive  R2OOS_condmean  R2OOS_cs_yhat
xr_2       -0.2131         -0.3782        -0.2493
xr_3       -0.1923         -0.3782        -0.3191
xr_5       -0.1612         -0.3649        -0.3542
xr_7       -0.1445         -0.3422        -0.3648
xr_10      -0.1159         -0.2956        -0.3785
Mean: {'R2OOS_naive': -0.1654, 'R2OOS_condmean': -0.3518, 'R2OOS_cs_yhat': -0.3332}

▶ ElasticNet


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c


==== ElasticNet ====
       R2OOS_naive  R2OOS_condmean  R2OOS_cs_yhat
xr_2        0.0830         -0.0425         0.0556
xr_3        0.0810         -0.0628        -0.0167
xr_5        0.0772         -0.0853        -0.0762
xr_7        0.0624         -0.1000        -0.1181
xr_10       0.0287         -0.1281        -0.1998
Mean: {'R2OOS_naive': 0.0665, 'R2OOS_condmean': -0.0837, 'R2OOS_cs_yhat': -0.071}

▶ RandomForest


Tree rolling: 100%|██████████| 408/408 [1:09:43<00:00, 10.25s/it]



==== RandomForest ====
       R2OOS_naive  R2OOS_condmean  R2OOS_cs_yhat
xr_2       -0.0709         -0.2152        -0.1028
xr_3       -0.0703         -0.2360        -0.1842
xr_5       -0.0497         -0.2331        -0.2241
xr_7       -0.0548         -0.2364        -0.2579
xr_10      -0.0571         -0.2267        -0.3059
Mean: {'R2OOS_naive': -0.0606, 'R2OOS_condmean': -0.2295, 'R2OOS_cs_yhat': -0.215}

▶ ExtraTrees


Tree rolling: 100%|██████████| 408/408 [40:19<00:00,  5.93s/it]



==== ExtraTrees ====
       R2OOS_naive  R2OOS_condmean  R2OOS_cs_yhat
xr_2        0.0159         -0.1178        -0.0135
xr_3       -0.0035         -0.1598        -0.1102
xr_5       -0.0080         -0.1848        -0.1755
xr_7       -0.0250         -0.2021        -0.2224
xr_10      -0.0566         -0.2268        -0.3053
Mean: {'R2OOS_naive': -0.0154, 'R2OOS_condmean': -0.1783, 'R2OOS_cs_yhat': -0.1654}

▶ XGBoost


Tree rolling: 100%|██████████| 408/408 [04:11<00:00,  1.62it/s]



==== XGBoost ====
       R2OOS_naive  R2OOS_condmean  R2OOS_cs_yhat
xr_2       -0.1356         -0.2912        -0.1695
xr_3       -0.1729         -0.3568        -0.2976
xr_5       -0.1632         -0.3682        -0.3565
xr_7       -0.1599         -0.3610        -0.3831
xr_10      -0.1366         -0.3203        -0.4040
Mean: {'R2OOS_naive': -0.1536, 'R2OOS_condmean': -0.3395, 'R2OOS_cs_yhat': -0.3222}

✓ done
