# Ridge

In [None]:
# ---------- QUIET MODE (put these lines at very top) ----------
import os, sys, types, warnings
os.environ["TQDM_NOTEBOOK"] = "0"  # tqdm.auto가 노트북 모드로 안 가게
# tqdm IProgress 경고 억제용: auto를 표준 tqdm으로 바인딩
try:
    import tqdm as _tqdm
    _auto = types.ModuleType("tqdm.auto"); _auto.tqdm = _tqdm.tqdm
    sys.modules["tqdm.auto"] = _auto
except Exception:
    pass
from sklearn.exceptions import DataConversionWarning, ConvergenceWarning
warnings.filterwarnings("ignore", category=DataConversionWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", message=".*IProgress not found.*")
# ★ 핵심: LinearRegression feature-names 경고 억제
warnings.filterwarnings("ignore", message=".*X does not have valid feature names.*", category=UserWarning)
# -------------------------------------------------------------

import os
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from rolling_framework import ExpandingRunner, make_strategy

# ---------------- CONFIG ----------------
DATA_FILE    = os.path.join("", "dataset.csv")     # index='Time'
Y_COLS       = ["xr_2","xr_3","xr_5","xr_7","xr_10"]
SLOPE_PREFIX = "s_"
FWD_PREFIX   = "fwd_"
MACRO_PREFIX = "F"

PERIOD       = ["197108", "202312"]
BURN_IN_END  = "200609"
HORIZON      = 12
SHOW_PROGRESS = True

RIDGE_PARAMS = dict(random_state=0)
RIDGE_CV     = {"mode":"tscv", "n_splits":10,
                "grid":{"alpha":[1e-4,1e-3,1e-2,1e-1,1.0,10.0]}}

# ------------- mini utils -------------
def read_df(path: str) -> pd.DataFrame:
    df = pd.read_csv(path, index_col="Time"); df.index = df.index.astype(str)
    return df

def features(df: pd.DataFrame, *, use_fwd=True, use_macro=True) -> pd.DataFrame:
    parts = []
    if use_fwd:   parts.append(df.loc[:, df.columns.str.startswith(FWD_PREFIX)])
    if use_macro: parts.append(df.loc[:, df.columns.str.startswith(MACRO_PREFIX)])
    if not parts: raise ValueError("No features selected (fwd/macro).")
    return pd.concat(parts, axis=1)

def cs_baseline(runner: ExpandingRunner, df: pd.DataFrame, y: pd.DataFrame) -> pd.DataFrame:
    # per-maturity CS: xr_j ~ s_j
    rows = []
    for t in runner.test_times:
        tr = [s for s in runner.times if s < t]
        if not tr: continue
        row = {}
        for ycol in Y_COLS:
            mat  = ycol.split("_",1)[1]
            scol = f"{SLOPE_PREFIX}{mat}"
            X_tr = df.loc[tr, [scol]].astype(float)        # DataFrame (이름 유지)
            y_tr = y.loc[tr, ycol].astype(float).values
            reg  = LinearRegression().fit(X_tr, y_tr)
            x_te = df.loc[[t], [scol]].astype(float)       # DataFrame (이름 유지)
            row[ycol] = float(reg.predict(x_te)[0])
        rows.append(pd.Series(row, name=t))
    return pd.DataFrame(rows).reindex(index=runner.test_times, columns=Y_COLS)

# ---------------- RUN -------------------
if __name__ == "__main__":
    df = read_df(DATA_FILE)
    y  = df[Y_COLS].copy()

    # X = [fwd, macro]
    X  = features(df, use_fwd=True, use_macro=True)

    ridge = make_strategy("Ridge",
        target_cols=Y_COLS, params=RIDGE_PARAMS, scale=True, cv=RIDGE_CV)

    runner = ExpandingRunner(X=X, y=y, strategy=ridge,
                             period=PERIOD, burn_in_end=BURN_IN_END, horizon=HORIZON)
    runner.fit_walk(progress=SHOW_PROGRESS, desc="Ridge [fwd, macro]")

    y_cs_hat = cs_baseline(runner, df, y)

    print("\n=== Ridge [fwd, macro] ===")
    print("R2OOS vs naive:\n",    runner.R2OOS(baseline="naive").round(4))
    print("R2OOS vs condmean:\n", runner.R2OOS(baseline="condmean").round(4))
    print("R2OOS vs CS OLS:\n",   runner.R2OOS(baseline="custom", benchmark=y_cs_hat).round(4))

    runner.to_mat("ridge_fwd_macro.mat", baseline="custom", benchmark=y_cs_hat)

(1) FWD-only Ridge: 100%|██████████| 196/196 [05:41<00:00,  1.74s/step, t=202312 | train=628]



=== (1) FWD-only Ridge ===
R2OOS vs naive:
 xr_2     0.0076
xr_3     0.0942
xr_5     0.1852
xr_7     0.2312
xr_10    0.2722
dtype: float64
R2OOS vs condmean:
 xr_2     0.0053
xr_3     0.0683
xr_5     0.1304
xr_7     0.1778
xr_10    0.2289
dtype: float64
R2OOS vs CS OLS:
 xr_2     0.0808
xr_3     0.0557
xr_5     0.0259
xr_7     0.0292
xr_10    0.0214
dtype: float64


# DNN

In [None]:
# ---------- QUIET MODE (put these lines at very top) ----------
import os, sys, types, warnings
os.environ["TQDM_NOTEBOOK"] = "0"
try:
    import tqdm as _tqdm
    _auto = types.ModuleType("tqdm.auto"); _auto.tqdm = _tqdm.tqdm
    sys.modules["tqdm.auto"] = _auto
except Exception:
    pass
from sklearn.exceptions import DataConversionWarning, ConvergenceWarning
warnings.filterwarnings("ignore", category=DataConversionWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", message=".*IProgress not found.*")
# ★ 핵심: LinearRegression feature-names 경고 억제
warnings.filterwarnings("ignore", message=".*X does not have valid feature names.*", category=UserWarning)
# -------------------------------------------------------------

import os
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from rolling_framework import ExpandingRunner, make_strategy

# ---------------- CONFIG ----------------
DATA_FILE    = os.path.join("", "dataset.csv")     # index='Time'
Y_COLS       = ["xr_2","xr_3","xr_5","xr_7","xr_10"]
SLOPE_PREFIX = "s_"
FWD_PREFIX   = "fwd_"
MACRO_PREFIX = "F"

PERIOD       = ["197108", "202312"]
BURN_IN_END  = "199001"
HORIZON      = 12
SHOW_PROGRESS = True

# Residual = MLP (요청: (16,), (16,8) / L2 매우 크게)
RES_MLP_PARAMS = dict(
    random_state=0,
    max_iter=2000,
    early_stopping=True,
    learning_rate_init=1e-3,
    tol=1e-5,
)
# CSARM 내부는 MLP에 직접 set_params → 접두사 없이 키 사용
RES_MLP_CV = {
    "mode": "tscv",
    "n_splits": 8,
    "grid": {
        "hidden_layer_sizes": [(16,), (16, 8)],
        "alpha": [1e3, 1e5, 1e7],
    },
}

# ------------- mini utils -------------
def read_df(path: str) -> pd.DataFrame:
    df = pd.read_csv(path, index_col="Time"); df.index = df.index.astype(str)
    return df

def cols_by_prefix(df: pd.DataFrame, prefix: str) -> pd.DataFrame:
    return df.loc[:, df.columns.str.startswith(prefix)]

def slope_map(df: pd.DataFrame) -> dict:
    m = {}
    for ycol in Y_COLS:
        mat = ycol.split("_",1)[1]
        scol = f"{SLOPE_PREFIX}{mat}"
        if scol not in df.columns:
            raise KeyError(f"Missing slope column: {scol}")
        m[ycol] = scol
    return m

def cs_baseline(runner: ExpandingRunner, df: pd.DataFrame, y: pd.DataFrame) -> pd.DataFrame:
    rows = []
    for t in runner.test_times:
        tr = [s for s in runner.times if s < t]
        if not tr: continue
        row = {}
        for ycol in Y_COLS:
            mat  = ycol.split("_",1)[1]
            scol = f"{SLOPE_PREFIX}{mat}"
            X_tr = df.loc[tr, [scol]].astype(float)        # DataFrame
            y_tr = y.loc[tr, ycol].astype(float).values
            reg  = LinearRegression().fit(X_tr, y_tr)
            x_te = df.loc[[t], [scol]].astype(float)       # DataFrame
            row[ycol] = float(reg.predict(x_te)[0])
        rows.append(pd.Series(row, name=t))
    return pd.DataFrame(rows).reindex(index=runner.test_times, columns=Y_COLS)

# ---------------- RUN -------------------
if __name__ == "__main__":
    df = read_df(DATA_FILE)
    y  = df[Y_COLS].copy()

    # X = [slope | residual_features=[fwd, macro]]
    s_map   = slope_map(df)
    slopes  = df[list(s_map.values())]
    fwd     = cols_by_prefix(df, FWD_PREFIX)
    macro   = cols_by_prefix(df, MACRO_PREFIX)
    res_X   = pd.concat([fwd, macro], axis=1)
    X       = pd.concat([slopes, res_X], axis=1)
    assert res_X.shape[1] > 0, "No residual features (fwd_* or F*) found."

    csarm = make_strategy(
        "CSARM",
        target_cols=Y_COLS,
        slope_map=s_map,                     # base: 각 만기별 s_j만 사용
        feature_cols=res_X.columns.tolist(), # residual: [fwd, macro]
        residual_kind="mlp",
        residual_params=RES_MLP_PARAMS,
        scale_res=True,
        res_cv=RES_MLP_CV,                   # ← 접두사 없이 alpha/hidden_layer_sizes
    )

    runner = ExpandingRunner(X=X, y=y, strategy=csarm,
                             period=PERIOD, burn_in_end=BURN_IN_END, horizon=HORIZON)
    runner.fit_walk(progress=SHOW_PROGRESS, desc="CSARM + Residual MLP [fwd, macro]")

    y_cs_hat = cs_baseline(runner, df, y)

    print("\n=== CSARM + Residual MLP [fwd, macro] ===")
    print("R2OOS vs naive:\n",    runner.R2OOS(baseline="naive").round(4))
    print("R2OOS vs condmean:\n", runner.R2OOS(baseline="condmean").round(4))
    print("R2OOS vs CS OLS:\n",   runner.R2OOS(baseline="custom", benchmark=y_cs_hat).round(4))

    runner.to_mat("arm_resmlp_fwd_macro.mat", baseline="custom", benchmark=y_cs_hat)