In [None]:
#!/usr/bin/env python3
"""
run_compare_dnn_min.py ─ DNN vs Static-Dual(DNN_dual) (한 번 실행 → CSV 저장)

• DATA_ROOT에 있는 CSV를 읽어 A/B predictor 세트로 학습
• 모델: DNN, DNN_dual(고정 slope + 나머지 MLP)
• 지표: 만기별 R²_OOS, MSE → 한 CSV로 저장

사용:
  $ python run_compare_dnn_min.py                 # 기본 파일명 저장
  $ python run_compare_dnn_min.py --out result.csv
"""

import os, sys, warnings, argparse
import pandas as pd
warnings.filterwarnings("ignore")

from rolling_framework import Machine

# ─────────────────────────────────────────────────── user config ──
DATA_ROOT   = "./data/19712023"    # exrets_19712023.csv 등 파일 위치
OUTPUT_ROOT = "./output"           # CSV 저장 폴더
OUT_DEFAULT = "results_dnn_vs_dual.csv"

# 샘플 구간(YYYYMM)
BURN_IN_START = "197108"
BURN_IN_END   = "198009"
PERIOD_START  = "199009"
PERIOD_END    = "202312"
OFFSET        = 12                 # months ahead

# 평가할 만기 (파일에 있으면만 사용)
MATURITIES = ["xr_2", "xr_3", "xr_5", "xr_7", "xr_10"]

# 하이퍼파라미터(간단)
param_grid_dnn = {
    "dnn__module__hidden"          : [(3,)],
    "dnn__module__dropout"         : [0.1],
    "dnn__optimizer__lr"           : [1e-3],
    "dnn__optimizer__weight_decay" : [1e-4],
}
param_grid_dual = {
    "dnn__module__hidden2"         : [(3,)],
    "dnn__module__drop2"           : [0.1],
    "dnn__optimizer__lr"           : [1e-3],
    "dnn__optimizer__weight_decay" : [1e-4],
}

# ───────────────────────────────────────────────────────── helpers ──
def _load_data():
    """DATA_ROOT에서 정해진 파일명으로 읽음."""
    try:
        y  = pd.read_csv(f"{DATA_ROOT}/exrets_19712023.csv", index_col="Time")
        fw = pd.read_csv(f"{DATA_ROOT}/fwds_19712023.csv", index_col="Time")
        ma = pd.read_csv(f"{DATA_ROOT}/MacroFactors_19712023.csv", index_col="Time")
        ls = pd.read_csv(f"{DATA_ROOT}/lsc_19712023.csv", index_col="Time")
        cp = pd.read_csv(f"{DATA_ROOT}/cp_19712023.csv", index_col="Time")
    except FileNotFoundError as e:
        sys.exit(f"[ERROR] CSV not found → {e.filename}\n  check DATA_ROOT path.")

    use_cols = [c for c in MATURITIES if c in y.columns]
    if not use_cols:
        sys.exit(f"[ERROR] None of {MATURITIES} found in exrets columns.")
    y = y[use_cols]
    return y, fw, ma, ls, cp

def _build_predictors(fw, ma, ls, cp):
    """A/B predictor 빌드 + Static-Dual용 slope 포함 버전도 함께."""
    A_base = pd.concat([cp, fw], axis=1)   # CP+FWDS
    B_base = pd.concat([ma, fw], axis=1)   # MACRO+FWDS
    slope  = ls[["slope"]]

    def _attach_slope(X):
        Z = pd.concat([slope, X], axis=1)
        return Z.loc[:, ~Z.columns.duplicated()]

    return {"A": (A_base, _attach_slope(A_base)),
            "B": (B_base, _attach_slope(B_base)),
            "slope_col": "slope"}

def _oos_true_pred(machine, cols):
    """Machine.rec 에서 OOS y_true/y_pred 복원."""
    preds = []
    for ds in machine.test_dates:
        if ds in machine.rec.oos_pred:
            block = machine.rec.oos_pred[ds].loc[ds, cols]
            if isinstance(block, pd.Series):
                block = block.to_frame().T
            block.index = pd.Index([ds], name="Time")
            preds.append(block)
    if not preds:
        raise RuntimeError("No OOS predictions recorded.")
    yhat = pd.concat(preds, axis=0).sort_index()
    ytru = machine.y.loc[yhat.index, cols]
    return ytru, yhat

def _mse_by_col(y_true: pd.DataFrame, y_pred: pd.DataFrame) -> pd.Series:
    return ((y_true - y_pred) ** 2).mean(axis=0)

def run_one(X, y, model_type, option=None, grid=None):
    """한 모델 학습 → R2, MSE 반환."""
    m = Machine(
        X, y,
        model_type,
        option           = option,
        params_grid      = grid,
        burn_in_start    = BURN_IN_START,
        burn_in_end      = BURN_IN_END,
        period           = [PERIOD_START, PERIOD_END],
        forecast_horizon = OFFSET,
    )
    m.training()
    r2 = m.R2OOS()                      # Series (만기별)
    ytru, yhat = _oos_true_pred(m, r2.index.tolist())
    mse = _mse_by_col(ytru, yhat)       # Series (만기별)
    return r2, mse

# ─────────────────────────────────────────────────────────────── main ──
def main():
    ap = argparse.ArgumentParser("DNN vs DNN_dual (single run → CSV)")
    ap.add_argument("--out", default=None, help="CSV 저장 경로(기본값: output/results_dnn_vs_dual.csv)")
    args, _ = ap.parse_known_args()     # Jupyter --f=... 무시

    os.makedirs(OUTPUT_ROOT, exist_ok=True)
    out_path = args.out or os.path.join(OUTPUT_ROOT, OUT_DEFAULT)

    y, fw, ma, ls, cp = _load_data()
    pk = _build_predictors(fw, ma, ls, cp)
    slope_col = pk["slope_col"]

    rows = []
    for set_name in ["A", "B"]:
        X_base, X_dual = pk[set_name]

        # DNN
        r2_dnn, mse_dnn = run_one(
            X_base, y,
            model_type="DNN",
            option=None,
            grid=param_grid_dnn,
        )

        # DNN_dual (slope 고정)
        grp2_cols = [c for c in X_dual.columns if c != slope_col]
        r2_sdl, mse_sdl = run_one(
            X_dual, y,
            model_type="DNN_DUAL",
            option={"slope": slope_col, "grp2": grp2_cols},
            grid=param_grid_dual,
        )

        for mty in r2_dnn.index:
            rows.append({"pred_set": set_name, "maturity": mty,
                        "model": "DNN", "R2_OOS": float(r2_dnn[mty]), "MSE": float(mse_dnn[mty])})
            rows.append({"pred_set": set_name, "maturity": mty,
                        "model": "DNN_DUAL", "R2_OOS": float(r2_sdl[mty]), "MSE": float(mse_sdl[mty])})

    pd.DataFrame(rows).sort_values(["pred_set", "maturity", "model"]).to_csv(out_path, index=False)
    print(f"\n★ Saved → {out_path}")

if __name__ == "__main__":
    main()

▶ OLS-SL_nonDNN


OLS rolling:  93%|█████████▎| 485/520 [00:08<00:00, 59.08it/s]