# Ridge

In [10]:
# ============================================================
# (1) FWD-only Ridge
# - 모델: Ridge(X = fwd_*, y = xr_*)
# - CV: simple tscv on alpha
# - NO MSE weighting; tqdm progress
# ============================================================
import os
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from rolling_framework import ExpandingRunner, make_strategy

# -------------------- USER CONFIG ---------------------------
DATA_DIR    = ""
DATA_FILE   = os.path.join(DATA_DIR, "dataset.csv")  # index='Time'
Y_COLS      = ["xr_2","xr_3","xr_5","xr_7","xr_10"]
FWD_PREFIX  = "fwd_"
SLOPE_PREFIX= "s_"

PERIOD      = ["197108", "202312"]
BURN_IN_END = "200609"
HORIZON     = 12

# -------------------- LOAD & PREP ---------------------------
df = pd.read_csv(DATA_FILE, index_col="Time")
df.index = df.index.astype(str)

y = df[Y_COLS].copy()
fwd_cols = [c for c in df.columns if c.startswith(FWD_PREFIX)]
assert len(fwd_cols) > 0, "No fwd_* columns found."

X = df[fwd_cols].copy()

# CS OLS baseline (strict per-maturity: xr_j ~ s_j)
slope_map = {yc: f"{SLOPE_PREFIX}{yc.split('_',1)[1]}" for yc in Y_COLS}
def build_cs_baseline(runner):
    rows = []
    for t in runner.test_times:
        tr = [s for s in runner.times if s < t]
        if not tr: 
            continue
        row = {}
        for ycol in Y_COLS:
            scol = slope_map[ycol]
            y_tr = y.loc[tr, ycol].astype(float).values
            x_tr = df.loc[tr, [scol]].astype(float).values
            x_te = [[float(df.loc[t, scol])]]
            reg = LinearRegression().fit(x_tr, y_tr)
            row[ycol] = float(reg.predict(x_te).ravel()[0])
        rows.append(pd.Series(row, name=t))
    return pd.DataFrame(rows).reindex(index=runner.test_times, columns=Y_COLS)

# -------------------- Strategy & Runner ---------------------
ridge_fwd = make_strategy(
    "Ridge",
    target_cols=Y_COLS,
    params={"random_state": 0},
    scale=True,
    cv={"mode": "tscv", "n_splits": 10, "grid": {"alpha": [1e-4, 1e-3, 1e-2, 1e-1, 1.0, 10.0]}},
)

runner = ExpandingRunner(X=X, y=y, strategy=ridge_fwd,
                         period=PERIOD, burn_in_end=BURN_IN_END, horizon=HORIZON)
runner.fit_walk(progress=True, desc="(1) FWD-only Ridge")

# -------------------- Metrics -------------------------------
r2_naive = runner.R2OOS(baseline="naive")
r2_cond  = runner.R2OOS(baseline="condmean")
cs_bench = build_cs_baseline(runner)
r2_cs    = runner.R2OOS(baseline="custom", benchmark=cs_bench)

print("\n=== (1) FWD-only Ridge ===")
print("R2OOS vs naive:\n",    r2_naive.round(4))
print("R2OOS vs condmean:\n", r2_cond.round(4))
print("R2OOS vs CS OLS:\n",   r2_cs.round(4))

(1) FWD-only Ridge: 100%|██████████| 196/196 [05:41<00:00,  1.74s/step, t=202312 | train=628]



=== (1) FWD-only Ridge ===
R2OOS vs naive:
 xr_2     0.0076
xr_3     0.0942
xr_5     0.1852
xr_7     0.2312
xr_10    0.2722
dtype: float64
R2OOS vs condmean:
 xr_2     0.0053
xr_3     0.0683
xr_5     0.1304
xr_7     0.1778
xr_10    0.2289
dtype: float64
R2OOS vs CS OLS:
 xr_2     0.0808
xr_3     0.0557
xr_5     0.0259
xr_7     0.0292
xr_10    0.0214
dtype: float64


# DNN

In [None]:
# ============================================================
# (2) CSARM + Residual DNN(MLP) on FWD
# - Base: per-maturity CS OLS (xr_j ~ s_j)
# - Residual: MLP on fwd_* with very large L2 (alpha)
# - Grid: hidden_layer_sizes in {(16,), (16,8)}, alpha in {1e3, 1e5, 1e7}
# - CV: simple tscv on residual only; tqdm progress
# ============================================================
import os
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from rolling_framework import ExpandingRunner, make_strategy

# -------------------- USER CONFIG ---------------------------
DATA_DIR    = ""
DATA_FILE   = os.path.join(DATA_DIR, "dataset.csv")  # index='Time'
Y_COLS      = ["xr_2","xr_3","xr_5","xr_7","xr_10"]
SLOPE_PREFIX= "s_"
FWD_PREFIX  = "fwd_"

PERIOD      = ["197108", "202312"]
BURN_IN_END = "200609"
HORIZON     = 12

# -------------------- LOAD & PREP ---------------------------
df = pd.read_csv(DATA_FILE, index_col="Time")
df.index = df.index.astype(str)

y = df[Y_COLS].copy()
slope_map  = {yc: f"{SLOPE_PREFIX}{yc.split('_',1)[1]}" for yc in Y_COLS}
slope_cols = list(slope_map.values())
fwd_cols   = [c for c in df.columns if c.startswith(FWD_PREFIX)]
assert len(fwd_cols) > 0, "No fwd_* columns found."

X = df[slope_cols + fwd_cols].copy()

# -------------------- Strategy: CSARM (residual = MLP) ------
# 주의: CSARM가 strategies.py / make_strategy에 등록되어 있어야 함.
csarm_mlp = make_strategy(
    "CSARM",
    target_cols=Y_COLS,
    slope_map=slope_map,          # base: per-maturity s_j only
    feature_cols=fwd_cols,        # residual inputs: fwd_*
    residual_kind="mlp",
    residual_params={
        "random_state": 0,
        "max_iter": 2000,
        "early_stopping": True,
        "learning_rate_init": 0.001,
        "tol": 1e-5,
    },
    scale_res=True,
    res_cv={
        "mode": "tscv",
        "n_splits": 8,
        "grid": {
            # 요구사항: 레이어 (16,), (16,8)
            "hidden_layer_sizes": [(16,), (16, 8)],
            # 요구사항: L2(alpha) 매우 크게
            "alpha": [1e3, 1e5, 1e7],
        },
    },
)

runner = ExpandingRunner(X=X, y=y, strategy=csarm_mlp,
                         period=PERIOD, burn_in_end=BURN_IN_END, horizon=HORIZON)
runner.fit_walk(progress=True, desc="(2) CSARM + Residual MLP(FWD)")

# -------------------- Metrics (vs strict CS OLS) ------------
def build_cs_baseline(runner, df, y, Y_COLS, slope_map):
    rows = []
    for t in runner.test_times:
        tr = [s for s in runner.times if s < t]
        if not tr:
            continue
        row = {}
        for ycol in Y_COLS:
            scol = slope_map[ycol]
            y_tr = y.loc[tr, ycol].astype(float).values
            x_tr = df.loc[tr, [scol]].astype(float).values
            x_te = [[float(df.loc[t, scol])]]
            reg = LinearRegression().fit(x_tr, y_tr)
            row[ycol] = float(reg.predict(x_te).ravel()[0])
        rows.append(pd.Series(row, name=t))
    return pd.DataFrame(rows).reindex(index=runner.test_times, columns=Y_COLS)

r2_naive = runner.R2OOS(baseline="naive")
r2_cond  = runner.R2OOS(baseline="condmean")
cs_bench = build_cs_baseline(runner, df, y, Y_COLS, slope_map)
r2_cs    = runner.R2OOS(baseline="custom", benchmark=cs_bench)

print("\n=== (2) CSARM + Residual MLP(FWD) ===")
print("R2OOS vs naive:\n",    r2_naive.round(4))
print("R2OOS vs condmean:\n", r2_cond.round(4))
print("R2OOS vs CS OLS:\n",   r2_cs.round(4))