# Ridge

In [None]:
# ============================================================
# (1) Ridge on [FWD + MACRO(F*)]
# - X = [fwd, macro]  (구성은 한 줄로 직관적으로)
# - CS OLS baseline 저장(y_cs_hat)
# - tqdm 진행바는 ExpandingRunner에서 처리
# ============================================================
import os
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from rolling_framework import ExpandingRunner, make_strategy

# -------------------- USER CONFIG ---------------------------
DATA_DIR     = ""
DATA_FILE    = os.path.join(DATA_DIR, "dataset.csv")  # index='Time'
Y_COLS       = ["xr_2","xr_3","xr_5","xr_7","xr_10"]
FWD_PREFIX   = "fwd_"
MACRO_PREFIX = "F"          # 매크로는 'F'로 시작
SLOPE_PREFIX = "s_"

PERIOD       = ["197108", "202312"]
BURN_IN_END  = "200609"
HORIZON      = 12

# -------------------- Helpers -------------------------------
def cols_by_prefix(df: pd.DataFrame, prefix: str) -> pd.DataFrame:
    return df.loc[:, df.columns.str.startswith(prefix)]

def slope_map_from_targets(df: pd.DataFrame, ycols, slope_prefix: str):
    suffix = [c.split("_", 1)[1] for c in ycols]
    slope_cols = [f"{slope_prefix}{s}" for s in suffix]
    missing = [c for c in slope_cols if c not in df.columns]
    if missing:
        raise KeyError(f"Missing slope columns: {missing}")
    return dict(zip(ycols, slope_cols))

def build_cs_baseline(runner, df, y, slope_map):
    rows = []
    for t in runner.test_times:
        tr = [s for s in runner.times if s < t]
        if not tr:
            continue
        row = {}
        for ycol, scol in slope_map.items():
            reg = LinearRegression().fit(
                df.loc[tr, [scol]].astype(float), y.loc[tr, ycol].astype(float).values
            )
            x_te = pd.DataFrame([[df.loc[t, scol]]], columns=[scol], dtype=float)
            row[ycol] = float(reg.predict(x_te).ravel()[0])
        rows.append(pd.Series(row, name=t))
    return pd.DataFrame(rows).reindex(index=runner.test_times, columns=list(slope_map.keys()))

# -------------------- LOAD & PREP ---------------------------
df = pd.read_csv(DATA_FILE, index_col="Time"); df.index = df.index.astype(str)
y = df[Y_COLS].copy()

# 직관적 구성: 파트 만들고 한 줄로 합치기
fwd   = cols_by_prefix(df, FWD_PREFIX)
macro = cols_by_prefix(df, MACRO_PREFIX)
X = pd.concat([fwd, macro], axis=1)              # ← X = [fwd, macro]
assert X.shape[1] > 0, "No features found (fwd_* or F*)."

# CS baseline에 필요한 slope 매핑
slope_map = slope_map_from_targets(df, Y_COLS, SLOPE_PREFIX)

# -------------------- Strategy & Runner ---------------------
ridge = make_strategy(
    "Ridge",
    target_cols=Y_COLS,
    params={"random_state": 0},
    scale=True,
    cv={"mode": "tscv", "n_splits": 10,
        "grid": {"alpha": [1e-4, 1e-3, 1e-2, 1e-1, 1.0, 10.0]}},
)

runner = ExpandingRunner(X=X, y=y, strategy=ridge,
                         period=PERIOD, burn_in_end=BURN_IN_END, horizon=HORIZON)
runner.fit_walk(progress=True, desc="(1) Ridge on [fwd, macro]")

# -------------------- Metrics & .mat ------------------------
r2_naive = runner.R2OOS(baseline="naive")
r2_cond  = runner.R2OOS(baseline="condmean")
cs_bench = build_cs_baseline(runner, df, y, slope_map)   # strict CS OLS
r2_cs    = runner.R2OOS(baseline="custom", benchmark=cs_bench)

print("\n=== (1) Ridge on [fwd, macro] ===")
print("R2OOS vs naive:\n",    r2_naive.round(4))
print("R2OOS vs condmean:\n", r2_cond.round(4))
print("R2OOS vs CS OLS:\n",   r2_cs.round(4))

# 엔진의 to_mat가 benchmark 제공 시 Y_cs_hat 저장하도록 (최소 패치 적용되어 있다고 가정)
runner.to_mat("ridge_fwd_macro.mat", baseline="custom", benchmark=cs_bench)

(1) FWD-only Ridge: 100%|██████████| 196/196 [05:41<00:00,  1.74s/step, t=202312 | train=628]



=== (1) FWD-only Ridge ===
R2OOS vs naive:
 xr_2     0.0076
xr_3     0.0942
xr_5     0.1852
xr_7     0.2312
xr_10    0.2722
dtype: float64
R2OOS vs condmean:
 xr_2     0.0053
xr_3     0.0683
xr_5     0.1304
xr_7     0.1778
xr_10    0.2289
dtype: float64
R2OOS vs CS OLS:
 xr_2     0.0808
xr_3     0.0557
xr_5     0.0259
xr_7     0.0292
xr_10    0.0214
dtype: float64


# DNN

In [None]:
# ============================================================
# (2) CSARM + Residual MLP on [FWD + MACRO(F*)]
# - Base: per-maturity CS OLS (xr_j ~ s_j)
# - Residual features: [fwd, macro]
# - Grid: hidden_layer_sizes ∈ {(16,), (16,8)}, alpha ∈ {1e3, 1e5, 1e7}
# - 실험파일에서 자주 바꾸는 MLP 파라미터만 명시(나머지는 기본값)
# ============================================================
import os
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from rolling_framework import ExpandingRunner, make_strategy

# -------------------- USER CONFIG ---------------------------
DATA_DIR     = ""
DATA_FILE    = os.path.join(DATA_DIR, "dataset.csv")
Y_COLS       = ["xr_2","xr_3","xr_5","xr_7","xr_10"]
SLOPE_PREFIX = "s_"
FWD_PREFIX   = "fwd_"
MACRO_PREFIX = "F"

PERIOD       = ["197108", "202312"]
BURN_IN_END  = "200609"
HORIZON      = 12

# -------------------- Helpers -------------------------------
def cols_by_prefix(df: pd.DataFrame, prefix: str) -> pd.DataFrame:
    return df.loc[:, df.columns.str.startswith(prefix)]

def slope_map_from_targets(df: pd.DataFrame, ycols, slope_prefix: str):
    suffix = [c.split("_", 1)[1] for c in ycols]
    slope_cols = [f"{slope_prefix}{s}" for s in suffix]
    missing = [c for c in slope_cols if c not in df.columns]
    if missing:
        raise KeyError(f"Missing slope columns: {missing}")
    return dict(zip(ycols, slope_cols))

def build_cs_baseline(runner, df, y, slope_map):
    rows = []
    for t in runner.test_times:
        tr = [s for s in runner.times if s < t]
        if not tr:
            continue
        row = {}
        for ycol, scol in slope_map.items():
            reg = LinearRegression().fit(
                df.loc[tr, [scol]].astype(float), y.loc[tr, ycol].astype(float).values
            )
            x_te = pd.DataFrame([[df.loc[t, scol]]], columns=[scol], dtype=float)
            row[ycol] = float(reg.predict(x_te).ravel()[0])
        rows.append(pd.Series(row, name=t))
    return pd.DataFrame(rows).reindex(index=runner.test_times, columns=list(slope_map.keys()))

# -------------------- LOAD & PREP ---------------------------
df = pd.read_csv(DATA_FILE, index_col="Time"); df.index = df.index.astype(str)
y = df[Y_COLS].copy()

# 직관적 구성: 파트 만들고 한 줄로 합치기
slope_map  = slope_map_from_targets(df, Y_COLS, SLOPE_PREFIX)
slope_cols = list(slope_map.values())
slope = df[slope_cols]
fwd   = cols_by_prefix(df, FWD_PREFIX)
macro = cols_by_prefix(df, MACRO_PREFIX)

# 잔차 입력 = [fwd, macro], 최종 X = [slope, fwd, macro]
residual_features = pd.concat([fwd, macro], axis=1)
X = pd.concat([slope, residual_features], axis=1)

assert residual_features.shape[1] > 0, "No residual features (fwd_* or F*) found."

# -------------------- Strategy: CSARM (Residual = MLP) ------
# 자주 바꾸는 파라미터만 명시 (나머지 MLPRegressor 기본값 사용)
mlp_params = dict(
    random_state=0,
    max_iter=2000,
    early_stopping=True,
    learning_rate_init=1e-3,
    tol=1e-5,
)

csarm_mlp = make_strategy(
    "CSARM",
    target_cols=Y_COLS,
    slope_map=slope_map,                           # base: per-maturity s_j only
    feature_cols=residual_features.columns.tolist(),  # residual inputs: [fwd, macro]
    residual_kind="mlp",
    residual_params=mlp_params,                    # 실험파일에서 명시
    scale_res=True,
    res_cv={
        "mode": "tscv",
        "n_splits": 8,
        "grid": {
            "hidden_layer_sizes": [(16,), (16, 8)],  # 요구된 구조
            "alpha": [1e3, 1e5, 1e7],                # 매우 큰 L2
        },
    },
)

runner = ExpandingRunner(X=X, y=y, strategy=csarm_mlp,
                         period=PERIOD, burn_in_end=BURN_IN_END, horizon=HORIZON)
runner.fit_walk(progress=True, desc="(2) CSARM + Residual MLP on [fwd, macro]")

# -------------------- Metrics & .mat ------------------------
r2_naive = runner.R2OOS(baseline="naive")
r2_cond  = runner.R2OOS(baseline="condmean")
cs_bench = build_cs_baseline(runner, df, y, slope_map)  # strict CS OLS per maturity
r2_cs    = runner.R2OOS(baseline="custom", benchmark=cs_bench)

print("\n=== (2) CSARM + Residual MLP on [fwd, macro] ===")
print("R2OOS vs naive:\n",    r2_naive.round(4))
print("R2OOS vs condmean:\n", r2_cond.round(4))
print("R2OOS vs CS OLS:\n",   r2_cs.round(4))

runner.to_mat("arm_resmlp_fwd_macro.mat", baseline="custom", benchmark=cs_bench)  # y_cs_hat 포함 저장