# Slope Directly Connected MLP

In [None]:
# ================= QUIET HEADER =================
import sys, types, warnings
try:
    import tqdm as _tqdm
    _auto = types.ModuleType("tqdm.auto"); _auto.tqdm = _tqdm.tqdm
    sys.modules["tqdm.auto"] = _auto
except Exception:
    pass
from sklearn.exceptions import DataConversionWarning, ConvergenceWarning
warnings.filterwarnings("ignore", category=DataConversionWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", message=".*IProgress not found.*")
# =================================================

# ================= USER CONFIG ===================
DATA_FILE     = "dataset.csv"                 # index='Time'
Y_COLS        = ["xr_2","xr_3","xr_5","xr_7","xr_10"]
SLOPE_PREFIX  = "s_"
FWD_PREFIX    = "fwd_"
MACRO_PREFIX  = "F"

PERIOD        = ["197108", "202312"]
BURN_IN_END   = "199001"
HORIZON       = 12
SHOW_PROGRESS = True

# MLP(네트워크 가지) 기본값: 자주 바꿀 것만 명시
MLP_PARAMS = dict(
    random_state=0,
    max_iter=2000,
    early_stopping=True,
    learning_rate_init=1e-3,
    tol=1e-5,
)

# CV 그리드: 네트워크 하이퍼 + (옵션) slope_scale까지 동시 탐색
MLP_CV = {
    "mode": "tscv",
    "n_splits": 8,
    "grid": {
        "hidden_layer_sizes": [(16,), (16, 8)],
        "alpha": [1e3, 1e5, 1e7],
    },
    # ↓ 있으면 step-CV 때 slope_scale도 함께 탐색(없으면 고정값 사용)
    # "slope_scale": [1.0, 0.9, 0.8],   # 원치 않으면 이 줄 삭제/주석
}

# 슬로프 가지 동결/느린 업데이트 제어
FREEZE_SLOPE = True     # True면 완전 동결
EMA_RHO      = 0.0      # 0이면 동결과 동일, 0<rho<1이면 느린 업데이트
FIXED_SLOPE_SCALE = 1.0 # CV로 탐색하지 않을 때 사용(스칼라 or {ycol: float})

# 저장 옵션
SAVE_MAT   = True
MAT_PATH_1 = "slopeskip_mlp_fwd.mat"
MAT_PATH_2 = "slopeskip_mlp_fwd_macro.mat"
# =================================================

import numpy as np
import pandas as pd
from typing import Dict, List, Optional

from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import ParameterGrid

from scipy.io import savemat

from rolling_framework import ExpandingRunner
from rolling_framework.strategies import Strategy


# ------------------- helpers -------------------
def read_df(path: str) -> pd.DataFrame:
    df = pd.read_csv(path, index_col="Time"); df.index = df.index.astype(str)
    return df

def cols_by_prefix(df: pd.DataFrame, prefix: str) -> pd.DataFrame:
    return df.loc[:, df.columns.str.startswith(prefix)]

def make_features(df: pd.DataFrame, *, use_slope: bool, use_macro: bool, use_fwd: bool) -> pd.DataFrame:
    parts=[]
    if use_slope: parts.append(cols_by_prefix(df, SLOPE_PREFIX))
    if use_macro: parts.append(cols_by_prefix(df, MACRO_PREFIX))
    if use_fwd:   parts.append(cols_by_prefix(df, FWD_PREFIX))
    if not parts: raise ValueError("No features selected.")
    X = pd.concat(parts, axis=1)
    return X.loc[:, ~X.columns.duplicated(keep="first")]

def slope_map_from_targets(df: pd.DataFrame, ycols: List[str], prefix: str) -> Dict[str, str]:
    suf = [c.split("_",1)[1] for c in ycols]
    mapping = {yc: f"{prefix}{s}" for yc,s in zip(ycols, suf)}
    miss = [c for c in mapping.values() if c not in df.columns]
    if miss: raise KeyError(f"Missing slope columns: {miss}")
    return mapping

def cs_baseline(times, all_times, df, y, slope_map: Dict[str,str]) -> pd.DataFrame:
    rows=[]
    for t in times:
        tr=[s for s in all_times if s<t]
        if not tr: continue
        row={}
        for ycol,scol in slope_map.items():
            X_tr=df.loc[tr,[scol]].astype(float)
            y_tr=y.loc[tr,ycol].astype(float).values
            reg=LinearRegression().fit(X_tr, y_tr)
            x_te=pd.DataFrame([[df.loc[t,scol]]], columns=[scol], dtype=float)
            row[ycol]=float(reg.predict(x_te))
        rows.append(pd.Series(row, name=t))
    return pd.DataFrame(rows).reindex(index=times, columns=list(slope_map.keys()))

def save_mat_with_components(path: str, runner: ExpandingRunner, y_cs_hat: pd.DataFrame):
    Y_true, Y_pred = runner.collect_frames()
    # y_net_hat = total - cs
    y_net_hat = (Y_pred - y_cs_hat).reindex_like(Y_pred)
    savemat(path, {
        "Y_true": Y_true.to_numpy(float),
        "Y_pred": Y_pred.to_numpy(float),
        "Y_cs_hat": y_cs_hat.to_numpy(float),
        "Y_net_hat": y_net_hat.to_numpy(float),
        "dates":  np.array(Y_true.index.tolist(), dtype=object),
        "maturities": np.array(Y_true.columns.tolist(), dtype=object),
        "horizon": np.array([runner.horizon]),
        "burn_in_end": np.array([runner.burn_in_end], dtype=object),
    })


# ----------- Slope-skip + MLP custom Strategy -----------
class SlopeSkipMLPStrategy(Strategy):
    """
    y_hat = (a_j + b_j * slope_j * scale_j) + MLP(other_features).
    - (a_j,b_j): 학습구간 OLS. FREEZE/EMA/scale 지원.
    - step-CV: MLP grid + (옵션) slope_scale grid 동시 탐색.
    """
    def __init__(
        self,
        target_cols: List[str],
        slope_map: Dict[str, str],
        feature_cols: List[str],
        mlp_params: Dict,
        mlp_cv: Optional[Dict] = None,
        freeze_slope: bool = True,
        ema_rho: float = 0.0,
        slope_scale: float | Dict[str, float] = 1.0,
        scale_features: bool = True,
    ):
        super().__init__(target_cols)
        self.slope_map    = dict(slope_map)
        self.feature_cols = list(feature_cols)
        self.mlp_params   = dict(mlp_params)
        self.mlp_cv       = mlp_cv
        self.freeze_slope = bool(freeze_slope)
        self.ema_rho      = float(ema_rho)
        self.slope_scale  = slope_scale
        self.scale_features = bool(scale_features)

        self._prev_a: Dict[str,float] = {}
        self._prev_b: Dict[str,float] = {}

    # --- utils ---
    def _scale_for(self, ycol: str, maybe_scale) -> float:
        if isinstance(maybe_scale, dict):
            return float(maybe_scale.get(ycol, 1.0))
        return float(maybe_scale)

    def _ols_ab(self, Xs: pd.DataFrame, ytr: pd.DataFrame) -> Dict[str,tuple]:
        out={}
        for ycol, scol in self.slope_map.items():
            reg=LinearRegression().fit(Xs[[scol]].to_numpy(float), ytr[ycol].to_numpy(float))
            a=float(reg.intercept_); b=float(reg.coef_.ravel()[0])
            if (self.ema_rho>0) and (ycol in self._prev_b) and (not self.freeze_slope):
                a=(1-self.ema_rho)*a + self.ema_rho*self._prev_a[ycol]
                b=(1-self.ema_rho)*b + self.ema_rho*self._prev_b[ycol]
            out[ycol]=(a,b)
        self._prev_a={k:v[0] for k,v in out.items()}
        self._prev_b={k:v[1] for k,v in out.items()}
        return out

    def _make_mlp_pipe(self, params: Dict) -> Pipeline:
        est=MLPRegressor(**{**self.mlp_params, **params})
        steps=([("scaler", StandardScaler())] if self.scale_features else []) + [("model", est)]
        return Pipeline(steps)

    def _tscv(self, n:int, n_splits:int) -> List[tuple]:
        test_size=max(1, n//(n_splits+1))
        splits=[]
        for k in range(n_splits):
            tr_end = n - (n_splits-k)*test_size
            tr_end = max(1, tr_end)
            va_s, va_e = tr_end, min(n, tr_end+test_size)
            if va_e-va_s>0: splits.append((np.arange(0,tr_end), np.arange(va_s,va_e)))
        return splits

    # --- core ---
    def fit_predict(self, X_tr: pd.DataFrame, y_tr: pd.DataFrame, x_te: pd.Series) -> pd.Series:
        Xs_tr = X_tr[list(self.slope_map.values())]
        ab    = self._ols_ab(Xs_tr, y_tr[self.target_cols])

        # 네트워크 가지가 있으면 step-CV
        if self.feature_cols:
            Xf_tr = X_tr[self.feature_cols]
            n = len(Xf_tr)
            # CV 설정
            if self.mlp_cv and self.mlp_cv.get("grid"):
                n_splits = int(self.mlp_cv.get("n_splits", 5))
                splits   = self._tscv(n, n_splits)
                grid     = list(ParameterGrid(self.mlp_cv["grid"]))
                scales   = self.mlp_cv.get("slope_scale", [self.slope_scale])
                best=(np.inf, None, None)  # (mse, params, scale_choice)

                for sc in scales:
                    # 타깃 재계산: y - (a + b*s*scale)
                    Yt = []
                    for ycol, scol in self.slope_map.items():
                        a,b = ab[ycol]; k = self._scale_for(ycol, sc)
                        Yt.append(y_tr[ycol].to_numpy(float) - (a + k*b*Xs_tr[scol].to_numpy(float)))
                    Yt = pd.DataFrame(np.column_stack(Yt), index=Xf_tr.index, columns=self.target_cols)

                    for params in grid:
                        fold=[]
                        for tr_idx, va_idx in splits:
                            pipe = self._make_mlp_pipe(params)
                            pipe.fit(Xf_tr.iloc[tr_idx], Yt.iloc[tr_idx])
                            pred = pipe.predict(Xf_tr.iloc[va_idx])
                            mse  = float(np.mean((Yt.iloc[va_idx].to_numpy(float) - pred)**2))
                            fold.append(mse)
                        m = float(np.mean(fold)) if fold else np.inf
                        if m < best[0]:
                            best = (m, params, sc)

                best_params, best_scale = best[1], best[2]
                # 최적 스케일로 전체 적합
                Yt_full=[]
                for ycol, scol in self.slope_map.items():
                    a,b = ab[ycol]; k = self._scale_for(ycol, best_scale)
                    Yt_full.append(y_tr[ycol].to_numpy(float) - (a + k*b*Xs_tr[scol].to_numpy(float)))
                Yt_full = pd.DataFrame(np.column_stack(Yt_full), index=Xf_tr.index, columns=self.target_cols)

                self._best_scale_ = best_scale
                mlp = self._make_mlp_pipe(best_params or {})
                mlp.fit(Xf_tr, Yt_full)
                y_net_te = mlp.predict(x_te[self.feature_cols].to_frame().T)[0]
            else:
                # CV 없음: 고정 스케일 사용
                Yt=[]
                for ycol, scol in self.slope_map.items():
                    a,b = ab[ycol]; k = self._scale_for(ycol, self.slope_scale)
                    Yt.append(y_tr[ycol].to_numpy(float) - (a + k*b*Xs_tr[scol].to_numpy(float)))
                Yt = pd.DataFrame(np.column_stack(Yt), index=Xf_tr.index, columns=self.target_cols)
                mlp = self._make_mlp_pipe({})
                mlp.fit(Xf_tr, Yt)
                self._best_scale_ = self.slope_scale
                y_net_te = mlp.predict(x_te[self.feature_cols].to_frame().T)[0]
        else:
            self._best_scale_ = self.slope_scale
            y_net_te = np.zeros(len(self.target_cols), dtype=float)

        # 테스트 시점의 slope 가지 출력
        y_slope=[]
        for ycol, scol in self.slope_map.items():
            a,b = ab[ycol]; k = self._scale_for(ycol, self._best_scale_)
            y_slope.append(a + k*b*float(x_te[scol]))
        y_slope = np.array(y_slope, dtype=float)

        y_hat = y_slope + y_net_te
        return pd.Series(y_hat, index=self.target_cols)


# ===================== RUN (two setups) =====================
if __name__ == "__main__":
    df = read_df(DATA_FILE)
    y  = df[Y_COLS].copy()

    # 특징세트
    X_fwd       = make_features(df, use_slope=True, use_macro=False, use_fwd=True)
    X_fwd_macro = make_features(df, use_slope=True, use_macro=True,  use_fwd=True)

    slope_map = slope_map_from_targets(df, Y_COLS, SLOPE_PREFIX)

    # ---- (1) Slope-skip + MLP on [fwd] ----
    strat1 = SlopeSkipMLPStrategy(
        target_cols=Y_COLS,
        slope_map=slope_map,
        feature_cols=[c for c in X_fwd.columns if not c.startswith(SLOPE_PREFIX)],
        mlp_params=MLP_PARAMS,
        mlp_cv=MLP_CV,                   # slope_scale 동시 탐색 포함
        freeze_slope=FREEZE_SLOPE,
        ema_rho=EMA_RHO,
        slope_scale=FIXED_SLOPE_SCALE,   # MLP_CV에 slope_scale가 있으면 무시됨(그리드 우선)
        scale_features=True,
    )
    runner1 = ExpandingRunner(X=X_fwd, y=y, strategy=strat1,
                              period=PERIOD, burn_in_end=BURN_IN_END, horizon=HORIZON)
    runner1.fit_walk(progress=SHOW_PROGRESS, desc="Slope-skip + MLP [fwd]")

    y_cs_hat_1 = cs_baseline(runner1.test_times, runner1.times, df, y, slope_map)
    print("\n=== Slope-skip + MLP [fwd] ===")
    print("R2OOS vs naive:\n",    runner1.R2OOS(baseline="naive").round(4))
    print("R2OOS vs condmean:\n", runner1.R2OOS(baseline="condmean").round(4))
    print("R2OOS vs CS OLS:\n",   runner1.R2OOS(baseline="custom", benchmark=y_cs_hat_1).round(4))

    if SAVE_MAT:
        save_mat_with_components(MAT_PATH_1, runner1, y_cs_hat_1)

    # ---- (2) Slope-skip + MLP on [fwd + macro] ----
    strat2 = SlopeSkipMLPStrategy(
        target_cols=Y_COLS,
        slope_map=slope_map,
        feature_cols=[c for c in X_fwd_macro.columns if not c.startswith(SLOPE_PREFIX)],
        mlp_params=MLP_PARAMS,
        mlp_cv=MLP_CV,
        freeze_slope=FREEZE_SLOPE,
        ema_rho=EMA_RHO,
        slope_scale=FIXED_SLOPE_SCALE,
        scale_features=True,
    )
    runner2 = ExpandingRunner(X=X_fwd_macro, y=y, strategy=strat2,
                              period=PERIOD, burn_in_end=BURN_IN_END, horizon=HORIZON)
    runner2.fit_walk(progress=SHOW_PROGRESS, desc="Slope-skip + MLP [fwd+macro]")

    y_cs_hat_2 = cs_baseline(runner2.test_times, runner2.times, df, y, slope_map)
    print("\n=== Slope-skip + MLP [fwd+macro] ===")
    print("R2OOS vs naive:\n",    runner2.R2OOS(baseline="naive").round(4))
    print("R2OOS vs condmean:\n", runner2.R2OOS(baseline="condmean").round(4))
    print("R2OOS vs CS OLS:\n",   runner2.R2OOS(baseline="custom", benchmark=y_cs_hat_2).round(4))

    if SAVE_MAT:
        save_mat_with_components(MAT_PATH_2, runner2, y_cs_hat_2)