In [None]:
# ============================================
# Experiment Harness: complexity / L2 / LR-fixed
# ============================================
import os, sys, re, warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

from rolling_framework import Machine  # 프로젝트 핵심 API

# -------------------- 파일 경로 --------------------
DATA_DIR      = "data/"
Y_FILE        = os.path.join(DATA_DIR, "exrets.csv")
SLOPE_FILE    = os.path.join(DATA_DIR, "slope.csv")
YL_FILE       = os.path.join(DATA_DIR, "yl_all.csv")
MACRO_FILE    = os.path.join(DATA_DIR, "MacroFactors.csv")
OUT_DIR       = "./output"; os.makedirs(OUT_DIR, exist_ok=True)

# -------------------- 기간/타깃 --------------------
BURN_START, BURN_END     = "197108", "199001"
PERIOD_START, PERIOD_END = "197108", "202312"
HORIZON = 12
MATURITIES = ["xr_2","xr_3","xr_5","xr_7","xr_10"]

# -------------------- 유틸 --------------------
def _load_csv(path, name):
    try:  return pd.read_csv(path, index_col="Time")
    except FileNotFoundError as e:
        sys.exit(f"[ERROR] missing {name} → {e.filename}")

def _align_time(*dfs):
    idx=None
    for d in dfs: idx = d.index if idx is None else idx.intersection(d.index)
    return [d.loc[idx].sort_index() for d in dfs]

def _set_global_seed(seed: int):
    try:
        import torch
        torch.manual_seed(seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(seed)
    except Exception:
        pass
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)

def _extract_metric_MSE(mach) -> float:
    # 선호 경로: Machine.MSEOOS()
    if hasattr(mach, "MSEOOS"):
        try:
            mse = mach.MSEOOS()
            return float(mse.mean() if hasattr(mse, "mean") else mse)
        except Exception:
            pass
    # 백업: recorder/core에서 y_true/pred를 찾아 직접 계산
    yhat = None; ytrue = None
    for a in ["rec","recorder","core","results"]:
        obj = getattr(mach, a, None)
        if obj is None: continue
        for k in ["oos_pred","oos_preds","pred_oos","pred_test","pred","yhat_oos"]:
            if hasattr(obj, k):
                val = getattr(obj, k)
                if isinstance(val, pd.DataFrame): yhat = val; break
        for k in ["oos_true","y_oos","y_test","y_true_oos"]:
            if hasattr(obj, k):
                val = getattr(obj, k)
                if isinstance(val, pd.DataFrame): ytrue = val; break
    if yhat is None:
        raise RuntimeError("OOS predictions not found in Machine recorder/core.")
    if ytrue is None:
        raise RuntimeError("OOS ground truth not found; expose it in recorder/core.")
    ytrue = ytrue.loc[yhat.index, yhat.columns]
    return float(((ytrue - yhat)**2).values.mean())

def _extract_metric_R2(mach) -> float:
    if hasattr(mach, "R2OOS"):
        try:
            r2 = mach.R2OOS()
            return float(r2.mean() if hasattr(r2, "mean") else r2)
        except Exception:
            pass
    # 백업: 직접 계산
    yhat = None; ytrue = None
    for a in ["rec","recorder","core","results"]:
        obj = getattr(mach, a, None)
        if obj is None: continue
        for k in ["oos_pred","oos_preds","pred_oos","pred_test","pred","yhat_oos"]:
            if hasattr(obj, k):
                val = getattr(obj, k)
                if isinstance(val, pd.DataFrame): yhat = val; break
        for k in ["oos_true","y_oos","y_test","y_true_oos"]:
            if hasattr(obj, k):
                val = getattr(obj, k)
                if isinstance(val, pd.DataFrame): ytrue = val; break
    if (yhat is None) or (ytrue is None): return np.nan
    ytrue = ytrue.loc[yhat.index, yhat.columns]
    ss_res = ((ytrue - yhat)**2).sum().sum()
    mu = ytrue.mean()
    ss_tot = ((ytrue - mu)**2).sum().sum()
    return float(1 - ss_res/ss_tot)

# -------------------- 데이터 로드 --------------------
y     = _load_csv(Y_FILE,   "exrets")
slope = _load_csv(SLOPE_FILE, "slope")
yl    = _load_csv(YL_FILE,   "yl_all")
macro = _load_csv(MACRO_FILE,"MacroFactors")

y_cols = [c for c in MATURITIES if c in y.columns]
if not y_cols: sys.exit("[ERROR] MATURITIES not in exrets")
y = y[y_cols]
y, slope, yl, macro = _align_time(y, slope, yl, macro)

# 입력 구성
X_macro = pd.concat([slope, macro], axis=1)

print("✓ Loaded:",
      {k:v.shape for k,v in [("y",y),("slope",slope),("yl",yl),("macro",macro)]})

# ====================================================
# 실험 축 정의
# ====================================================

# (1) 네트워크 복잡도: 간단 vs 복잡
NETWORKS = {
    # label: (hidden, dropout)
    "simple":  ((3,), 0.1),
    "medium":  ((16, ), 0.1),
    "complex": ((16, 8), 0.2),
}

# (2) L2 강도 sweep
L2_LIST = [0.0, 5e-5, 1e-4, 5e-4, 1e-3]

# (3) Learning rate “고정” 블록
LR_LIST = [1e-3, 5e-4, 2e-4]

# (4) 실험 모델 유형 (원하면 하나만 남겨도 됨)
MODEL_TYPES = [
    "ARM",       # Base=OLS(slope), Residual=MLP(macro)
    "TorchDNN",  # 순수 DNN
]

# (5) seed 반복 (개수 조절 가능)
SEEDS = list(range(1, 6))  # 예: 5회 반복; 필요하면 10/20으로 늘리면 됨

# ====================================================
# 공통 옵션 템플릿
# ====================================================
def make_option(model_type, hidden, dropout, lr, wd):
    if model_type == "ARM":
        return {
            "base_on": True,
            "base_cols": list(slope.columns),
            "target_cols": list(y.columns),
            "residual_kind": "mlp",
            "feature_cols": list(macro.columns),
            "standardize_res": True,
            "mlp_hidden": hidden,
            "mlp_dropout": dropout,
            "mlp_lr": lr,
            "mlp_wd": wd,
            "mlp_epochs": 200,
            "mlp_patience": 20,
            # "seed": seed  # 아래 루프에서 주입
        }
    elif model_type == "TorchDNN":
        return {
            "scaler": "standard",
            "hidden": hidden,
            "dropout": dropout,
            "lr": lr,
            "wd": wd,
            "epochs": 200,
            "patience": 20,
            # "seed": seed
        }
    else:
        raise ValueError("Unknown model_type")

def make_grid(model_type, hidden, dropout, lr, wd):
    # 실험 제어: 그리드에 “단일값”으로 넣어 GridSearchCV가 딱 그 값으로 고정되도록 함
    if model_type == "ARM":
        return {
            "arm__residual_model__module__hidden":          [hidden],
            "arm__residual_model__module__dropout":         [dropout],
            "arm__residual_model__optimizer__lr":           [lr],
            "arm__residual_model__optimizer__weight_decay": [wd],
            "arm__residual_model__batch_size":              [32],
            "arm__residual_model__max_epochs":              [200],
            "arm__residual_model__train_split":             [None],  # 내부 ValidSplit 디폴트 사용
            "arm__residual_model__patience":                [20],
        }
    elif model_type == "TorchDNN":
        return {
            "dnn__module__hidden":          [hidden],
            "dnn__module__dropout":         [dropout],
            "dnn__optimizer__lr":           [lr],
            "dnn__optimizer__weight_decay": [wd],
            "dnn__batch_size":              [32],
            "dnn__max_epochs":              [200],
            "dnn__train_split":             [None],
            "dnn__patience":                [20],
        }
    else:
        raise ValueError("Unknown model_type")

# ====================================================
# 실행 루프
# ====================================================
records = []  # 한 run 당 1행 기록

for model_type in MODEL_TYPES:
    for net_label, (hidden, dropout) in NETWORKS.items():
        for wd in L2_LIST:
            for lr in LR_LIST:
                # 러닝레이트 고정 블록: (이 블록 내에서는 hidden/dropout/wd만 변경)
                for seed in SEEDS:
                    _set_global_seed(seed)
                    opt  = make_option(model_type, hidden, dropout, lr, wd)
                    opt["seed"] = seed
                    grid = make_grid(model_type, hidden, dropout, lr, wd)

                    if model_type == "ARM":
                        X_used = X_macro
                        model_name = "ARM"
                    else:
                        X_used = pd.concat([slope, macro], axis=1)  # 자유롭게 사용
                        model_name = "TorchDNN"

                    m = Machine(
                        X_used, y, model_name,
                        option=opt, params_grid=grid,
                        burn_in_start=BURN_START, burn_in_end=BURN_END,
                        period=[PERIOD_START, PERIOD_END], forecast_horizon=HORIZON
                    )
                    print(f"[RUN] {model_type:8s} | net={net_label:7s} | wd={wd:.1e} | lr={lr:.1e} | seed={seed}")
                    m.training()

                    mse = _extract_metric_MSE(m)
                    r2  = _extract_metric_R2(m)
                    records.append({
                        "model": model_type,
                        "net": net_label,
                        "hidden": str(hidden),
                        "dropout": dropout,
                        "lr": lr,
                        "wd(L2)": wd,
                        "seed": seed,
                        "OOS_MSE": mse,
                        "OOS_R2": r2,
                    })

# ====================================================
# 결과 집계 & 저장
# ====================================================
res = pd.DataFrame.from_records(records)

# 설정별 평균/표준편차 요약
group_cols = ["model","net","hidden","dropout","lr","wd(L2)"]
summary = (res.groupby(group_cols)
             .agg(OOS_MSE_mean=("OOS_MSE","mean"),
                  OOS_MSE_std =("OOS_MSE","std"),
                  OOS_R2_mean =("OOS_R2","mean"),
                  OOS_R2_std  =("OOS_R2","std"),
                  n=("seed","count"))
             .reset_index()
          ).sort_values(["model","net","wd(L2)","lr"])

# 네트워크 단순/복잡 비교용 피벗 (선택)
pivot_net = (summary
             .groupby(["model","net"])
             [["OOS_MSE_mean","OOS_R2_mean"]]
             .mean()
             .reset_index())

# L2 강도 비교용 피벗 (선택)
pivot_l2 = (summary
            .groupby(["model","wd(L2)"])
            [["OOS_MSE_mean","OOS_R2_mean"]]
            .mean()
            .reset_index())

# LR 고정 블록 비교용 피벗 (선택)
pivot_lr = (summary
            .groupby(["model","lr"])
            [["OOS_MSE_mean","OOS_R2_mean"]]
            .mean()
            .reset_index())

# 저장
res_path      = os.path.join(OUT_DIR, "exp_runs_detail.csv")
summary_path  = os.path.join(OUT_DIR, "exp_runs_summary.csv")
pivot_net_path= os.path.join(OUT_DIR, "exp_pivot_network.csv")
pivot_l2_path = os.path.join(OUT_DIR, "exp_pivot_l2.csv")
pivot_lr_path = os.path.join(OUT_DIR, "exp_pivot_lr.csv")

res.to_csv(res_path, index=False)
summary.to_csv(summary_path, index=False)
pivot_net.to_csv(pivot_net_path, index=False)
pivot_l2.to_csv(pivot_l2_path, index=False)
pivot_lr.to_csv(pivot_lr_path, index=False)

print("\n✓ Saved:")
print(" - detailed runs   :", res_path)
print(" - summary         :", summary_path)
print(" - pivot (network) :", pivot_net_path)
print(" - pivot (L2)      :", pivot_l2_path)
print(" - pivot (LR)      :", pivot_lr_path)

# ============================================
# 해석 가이드 (간단)
# 1) pivot_network: simple/medium/complex 간 평균 성능 비교
# 2) pivot_l2: L2(=weight_decay) 강도별 평균 성능 비교
# 3) pivot_lr: 러닝레이트 값을 고정했을 때 성능 비교
# ============================================

▶ OLS-SL_nonDNN


OLS rolling:  93%|█████████▎| 485/520 [00:08<00:00, 59.08it/s]