In [None]:
# experiment_arm_runs.py
import os, json, copy, time
import numpy as np
import pandas as pd

from rolling_framework import Machine  # 기존과 동일

# ==== 공통 경로/데이터 로드는 네가 쓰던 그대로 ====
DATA_DIR      = "data/"
Y_FILE        = os.path.join(DATA_DIR, "exrets.csv")
SLOPE_FILE    = os.path.join(DATA_DIR, "slope.csv")
YL_FILE       = os.path.join(DATA_DIR, "yl_all.csv")
MACRO_FILE    = os.path.join(DATA_DIR, "MacroFactors.csv")
OUT_BASE      = "./output_arm_runs"   # run 별 결과가 들어갈 루트
os.makedirs(OUT_BASE, exist_ok=True)

BURN_START, BURN_END     = "197108", "199001"
PERIOD_START, PERIOD_END = "197108", "202312"
HORIZON = 12
MATURITIES = ["xr_2","xr_3","xr_5","xr_7","xr_10"]

def _load_csv(path, name):
    try:
        return pd.read_csv(path, index_col="Time")
    except FileNotFoundError as e:
        raise SystemExit(f"[ERROR] missing {name} → {e.filename}")

def _align_time(*dfs):
    idx=None
    for d in dfs: idx = d.index if idx is None else idx.intersection(d.index)
    return [d.loc[idx].sort_index() for d in dfs]

def _direct_pairs(slope_cols, y_cols):
    import re
    mk = lambda s: re.search(r"(\d+)", s).group(1) if re.search(r"(\d+)", s) else None
    y_map = {mk(c): c for c in y_cols}
    return [(sc, y_map[mk(sc)]) for sc in slope_cols if mk(sc) in y_map]

# ===== 데이터 로드 =====
y     = _load_csv(Y_FILE,   "exrets")
slope = _load_csv(SLOPE_FILE, "slope")
yl    = _load_csv(YL_FILE,   "yl_all")
macro = _load_csv(MACRO_FILE,"MacroFactors")

y_cols = [c for c in MATURITIES if c in y.columns]
if not y_cols: raise SystemExit("[ERROR] MATURITIES not in exrets")
y = y[y_cols]
y, slope, yl, macro = _align_time(y, slope, yl, macro)

# 실험 ①: base = slope(OLS), residual = macro(MLP)
X_macro = pd.concat([slope, macro], axis=1)

opt_base = {
    "base_on": True,
    "base_cols":   list(slope.columns),
    "target_cols": list(y.columns),
    "residual_kind": "mlp",
    "feature_cols": list(macro.columns),
    "standardize_res": True,
    # 기본 러닝 설정(그리드로 덮어씀)
    "mlp_hidden": (16, 8),
    "mlp_dropout": 0.2,
    "mlp_lr": 1e-3,
    "mlp_wd": 1e-4,
    "mlp_epochs": 200,
    "mlp_patience": 20,
    "seed": 0,  # 각 run에서 덮어씌움
}

grid = {
    "arm__residual_model__module__hidden": [(16, 8)],
    "arm__residual_model__module__dropout": [0.2, 0.4],
    "arm__residual_model__optimizer__lr": [1e-3, 5e-4],
    "arm__residual_model__optimizer__weight_decay": [1e-4],
}

# ===== 핵심: “한 번 돌리고 → 저장” 유틸 =====
def run_once_and_save(run_id: str, X: pd.DataFrame, y: pd.DataFrame,
                      option: dict, params_grid: dict,
                      burn_in=(BURN_START, BURN_END),
                      period=(PERIOD_START, PERIOD_END),
                      horizon=HORIZON,
                      out_base=OUT_BASE):

    run_dir = os.path.join(out_base, run_id)
    os.makedirs(run_dir, exist_ok=True)

    m = Machine(
        X, y, "ARM",
        option=option, params_grid=params_grid,
        burn_in_start=burn_in[0], burn_in_end=burn_in[1],
        period=[period[0], period[1]],
        forecast_horizon=horizon
    )
    t0 = time.time()
    m.training()
    elapsed = time.time() - t0

    # Machine이 직접 제공하는 OOS metrics 사용
    r2oos  = m.R2OOS()
    mseoos = m.MSEOOS()

    # 가능하면 best_params 추출 (GridSearchCV 사용 시)
    best_params = None
    try:
        # strategies/BaseStrategy 구현에 따라 다름: 보통 last best_estimator_.get_params()
        # 안전하게 Machine 또는 strategy 객체의 속성에서 유추
        best_params = getattr(m, "best_params_", None) or getattr(getattr(m, "strategy", None), "best_params_", None)
    except Exception:
        pass

    summary = {
        "run_id": run_id,
        "elapsed_sec": round(elapsed, 2),
        "R2OOS": float(r2oos) if hasattr(r2oos, "__float__") else r2oos,
        "MSEOOS": float(mseoos) if hasattr(mseoos, "__float__") else mseoos,
        "best_params": best_params,
    }
    # 저장
    with open(os.path.join(run_dir, "summary.json"), "w", encoding="utf-8") as f:
        json.dump(summary, f, ensure_ascii=False, indent=2)

    # (선택) Machine/Recorder가 자동 저장한 예측/메타가 있다면 그대로 두고,
    # 없더라도 최소 요약은 남아 데이터 충돌 없이 재현 가능.
    return summary

# ===== 20번 돌리고 상위 10개 선택하여 앙상블 =====
def run_20_and_ensemble_top10():
    summaries = []
    for k in range(1, 21):
        seed = 1000 + k  # run 별 시드
        opt = copy.deepcopy(opt_base)
        opt["seed"] = seed

        run_id = f"ARM_macroMLP_seed{seed}"
        print(f"\n▶ Run {k:02d}/20  (run_id={run_id})")
        summ = run_once_and_save(run_id, X_macro, y, opt, grid)
        print("  - R2OOS:", summ["R2OOS"], "  MSEOOS:", summ["MSEOOS"])
        summaries.append(summ)

    # 상위 10개 R2OOS 기준 선택 (클수록 좋다고 가정)
    df = pd.DataFrame(summaries).sort_values("R2OOS", ascending=False)
    df.to_csv(os.path.join(OUT_BASE, "all_runs_summary.csv"), index=False)

    top10 = df.head(10)
    top10.to_csv(os.path.join(OUT_BASE, "top10_summary.csv"), index=False)

    print("\n=== Done. Top 10 by R2OOS ===")
    print(top10[["run_id", "R2OOS", "MSEOOS"]])

if __name__ == "__main__":
    run_20_and_ensemble_top10()

DNN_DUAL rolling:   4%|▍         | 20/520 [03:47<1:34:56, 11.39s/it]


