In [84]:
# ======== COMMON CONFIG (run once) ==========================================
import os, sys, re, warnings, argparse, pandas as pd
warnings.filterwarnings("ignore")

from rolling_framework import Machine           # <-- 프로젝트의 핵심 API

# ---- 유틸 함수 --------------------------------------------------------------
def _load_csv(path, name):
    try:  return pd.read_csv(path, index_col="Time")
    except FileNotFoundError as e:
        sys.exit(f"[ERROR] missing {name} → {e.filename}")

def _align_time(*dfs):
    idx=None
    for d in dfs: idx = d.index if idx is None else idx.intersection(d.index)
    return [d.loc[idx].sort_index() for d in dfs]

def _direct_pairs(slope_cols, y_cols):
    mk = lambda s: re.search(r"(\d+)", s).group(1) if re.search(r"(\d+)", s) else None
    y_map = {mk(c): c for c in y_cols}
    return [(sc, y_map[mk(sc)]) for sc in slope_cols if mk(sc) in y_map]

# ---- Edit This ------------------------------------------------------
DATA_DIR      = "data/"
Y_FILE        = os.path.join(DATA_DIR, "exrets.csv")
SLOPE_FILE    = os.path.join(DATA_DIR, "slope.csv")
YL_FILE       = os.path.join(DATA_DIR, "yl_all.csv")
MACRO_FILE    = os.path.join(DATA_DIR, "MacroFactors.csv")
IV_FILE =    os.path.join(DATA_DIR, "imp_vol.csv")
LSC_FILE = os.path.join(DATA_DIR, "lsc.csv")
RV_FILE = os.path.join(DATA_DIR, "real_vol.csv")
FWD_FILE = os.path.join(DATA_DIR, "fwds.csv")
CP_FILE = os.path.join(DATA_DIR, "cp.csv")

OUT_DIR       = "./output";  os.makedirs(OUT_DIR, exist_ok=True)

# Horizon 
BURN_START, BURN_END   = "199009", "200609"
PERIOD_START, PERIOD_END = "199009", "202312"
HORIZON = 12                           # months ahead

MATURITIES = ["xr_2","xr_3","xr_5","xr_7","xr_10"]

y     = _load_csv(Y_FILE,   "exrets")
slope = _load_csv(SLOPE_FILE, "slope") # This slope includes all maturities (i.e 2-1, 3-1, ..., 10-1)
lsc = _load_csv(LSC_FILE, "lsc")
yl    = _load_csv(YL_FILE,   "yl_all")
macro = _load_csv(MACRO_FILE,"MacroFactors")
iv = _load_csv(IV_FILE, "imp_vol")
rv = _load_csv(RV_FILE, "real_vol")
fwd = _load_csv(FWD_FILE, "fwd")
cp = _load_csv(CP_FILE, "cp")

# --------------------------------------------------------------------------

    # 타깃 열 필터
y_cols = [c for c in MATURITIES if c in y.columns]
if not y_cols: sys.exit("[ERROR] MATURITIES not in exrets")
y = y[y_cols]

# 시간축 맞추기
y, slope, yl, macro, iv, lsc, rv, fwd, cp = _align_time(y, slope, yl, macro, iv, lsc, rv, fwd, cp)

In [85]:
# ────────────────────────────
# Random-Forest
# ────────────────────────────
param_grid_rf = {
    "model__estimator__n_estimators":      [300],
    "model__estimator__max_depth":         [2, 8],
    "model__estimator__min_samples_split": [2, 4],
    "model__estimator__min_samples_leaf":  [1, 2, 4],
    "model__estimator__max_features":      [0.25, 0.5,1],
}

# ────────────────────────────
# Extra-Trees
# ────────────────────────────
param_grid_et = {
    "model__estimator__n_estimators":      [300],
    "model__estimator__max_depth":         [2, 8],
    "model__estimator__min_samples_split": [2, 4],
    "model__estimator__min_samples_leaf":  [1, 2, 4],
    "model__estimator__max_features":      [0.25, 0.5, 1],
}

# ────────────────────────────
# XGBoost  (XGBRegressor 사용 가정)
# ────────────────────────────
param_grid_xgb = {
    "model__estimator__n_estimators":  [300],
    "model__estimator__max_depth":     [2, 4],
    "model__estimator__learning_rate": [0.01],
    "model__estimator__subsample":     [0.7, 0.5],
    "model__estimator__reg_lambda":    [0.1, 1.0],
}

param_grid_lasso = {'reg__alpha': [1,10,100]}    
param_grid_ridge      = {'reg__alpha': [1,10,100]}
param_grid_elasticnet = {
    'reg__alpha':   [0.01, 0.1, 1, 10],
    'reg__l1_ratio':[0.1, 0.3, 0.5],
}

## Modulate Predictor Variable

In [81]:
slope_10 = lsc[['slope']] # 10yr slope
X1 = pd.concat([iv, macro], axis=1)

param_grid_ridge = {'reg__alpha': np.logspace(-3,2,30)}    
param_grid_lasso = {'reg__alpha': np.logspace(-3,2,30)}    

In [82]:
m1 = Machine(X1, y, 'Penalized', option="ridge", params_grid=param_grid_ridge,
             burn_in_start=BURN_START, burn_in_end=BURN_END,
             period=[PERIOD_START, PERIOD_END], forecast_horizon=HORIZON)
m1.training()
print(m1.R2OOS())

m2 = Machine(X1, y, 'Penalized', option="lasso", params_grid=param_grid_lasso,
             burn_in_start=BURN_START, burn_in_end=BURN_END,
             period=[PERIOD_START, PERIOD_END], forecast_horizon=HORIZON)
m2.training()
print(m2.R2OOS())

Penalized rolling: 100%|██████████| 208/208 [00:18<00:00, 11.20it/s]


xr_2    -5.471404
xr_3    -5.698766
xr_5    -5.503903
xr_7    -4.891030
xr_10   -3.906169
dtype: float64


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

xr_2    -0.051416
xr_3    -0.072270
xr_5    -0.088658
xr_7    -0.131619
xr_10   -0.186948
dtype: float64


In [104]:
import numpy as np

param_grid_lasso = {'reg__alpha': [0.001,0.01,0.1]}    
param_grid_ridge = {'reg__alpha': [0.001,0.01,0.1]}    

In [105]:
# Ridge and Lasso 

import pandas as pd
from itertools import combinations

def make_X1_sets(iv: pd.DataFrame, rv: pd.DataFrame, dropna: bool = True):
    """
    Generate all X1 candidates following the 'same-position pairing' rule.
    - Adds 'IV_only' = all 3 IV columns
    - For each non-empty subset of positions S among [1, 2, 3], creates IVS+RVS by concatenating
      iv[S] and rv[S] (column positions are 1-based in the name only).

    Parameters
    ----------
    iv, rv : DataFrame
        DataFrames with the same monthly index 'Time'. Columns ordered as given.
    dropna : bool
        If True, drop rows with any NaNs after concatenation.

    Returns
    -------
    dict[str, DataFrame]
        Mapping from name -> X1 DataFrame
    """
    # 1) Align indices (inner join on Time)
    common_idx = iv.index.intersection(rv.index)
    iv_aln = iv.loc[common_idx]
    rv_aln = rv.loc[common_idx]

    out = {}

    # Case 1: IV only (all three IV columns)
    X_iv = iv_aln.copy()
    out["IV_only"] = X_iv.dropna() if dropna else X_iv

    # 2) All non-empty subsets of positions among the first p columns in common
    p = min(iv_aln.shape[1], rv_aln.shape[1])  # usually 3
    positions = list(range(p))                 # 0-based

    def pos_label(pos_list):
        # Convert 0-based positions to a compact 1-based label string: [0,1] -> "12"
        return "".join(str(i + 1) for i in pos_list)

    for r in range(1, p + 1):                  # subset size: 1, 2, ..., p
        for pos in combinations(positions, r):
            cols = list(pos)
            X = pd.concat([iv_aln.iloc[:, cols], rv_aln.iloc[:, cols]], axis=1)
            name = f"IV{pos_label(cols)}+RV{pos_label(cols)}"
            out[name] = X.dropna() if dropna else X

    return out

# ─────────────────────────── USAGE EXAMPLE ───────────────────────────
# Assume iv, rv, y, param_grid_ridge, and scheduling constants are already defined.

X_sets = make_X1_sets(iv, rv, dropna=True)

results = {}
for name, X1 in X_sets.items():
    m1 = Machine(
        X1, y, 'Penalized', option="ridge", params_grid=param_grid_ridge,
        burn_in_start=BURN_START, burn_in_end=BURN_END,
        period=[PERIOD_START, PERIOD_END], forecast_horizon=HORIZON
    )
    m1.training()
    results[name] = m1.R2OOS()

# Quick view
for k, v in results.items():
    print(k, v)

Penalized rolling: 100%|██████████| 208/208 [00:03<00:00, 52.96it/s]
Penalized rolling: 100%|██████████| 208/208 [00:03<00:00, 58.55it/s]
Penalized rolling: 100%|██████████| 208/208 [00:03<00:00, 52.65it/s]
Penalized rolling: 100%|██████████| 208/208 [00:03<00:00, 57.80it/s]
Penalized rolling: 100%|██████████| 208/208 [00:03<00:00, 57.92it/s]
Penalized rolling: 100%|██████████| 208/208 [00:03<00:00, 57.94it/s]
Penalized rolling: 100%|██████████| 208/208 [00:03<00:00, 57.37it/s]
Penalized rolling: 100%|██████████| 208/208 [00:03<00:00, 58.14it/s]

IV_only xr_2    -0.182920
xr_3    -0.207746
xr_5    -0.159850
xr_7    -0.132421
xr_10   -0.048206
dtype: float64
IV1+RV1 xr_2    -0.063498
xr_3    -0.057790
xr_5    -0.049980
xr_7    -0.052461
xr_10   -0.046308
dtype: float64
IV2+RV2 xr_2    -0.082334
xr_3    -0.090623
xr_5    -0.088815
xr_7    -0.086812
xr_10   -0.064053
dtype: float64
IV3+RV3 xr_2    -0.072188
xr_3    -0.087968
xr_5    -0.092908
xr_7    -0.098872
xr_10   -0.101679
dtype: float64
IV12+RV12 xr_2    -0.090955
xr_3    -0.108651
xr_5    -0.114551
xr_7    -0.121486
xr_10   -0.091953
dtype: float64
IV13+RV13 xr_2    -0.073111
xr_3    -0.087411
xr_5    -0.064511
xr_7    -0.063916
xr_10   -0.046359
dtype: float64
IV23+RV23 xr_2    -0.208440
xr_3    -0.223669
xr_5    -0.154435
xr_7    -0.126113
xr_10   -0.042508
dtype: float64
IV123+RV123 xr_2    -0.219121
xr_3    -0.240308
xr_5    -0.176638
xr_7    -0.145102
xr_10   -0.050555
dtype: float64





### Only Imp_Vol Cases

In [112]:
import numpy as np

param_grid_lasso = {'reg__alpha': [0.001,0.01,0.1]}    
param_grid_ridge = {'reg__alpha':[1,10,100]}    

In [113]:
import pandas as pd
from itertools import combinations

def make_X1_iv_sets(iv: pd.DataFrame, dropna: bool = True):
    """
    Generate all IV-only X1 candidates.

    • 'IV_only'  : uses all IV columns.
    • For every non-empty subset S of column positions, creates 'IV{S}' that
      contains only iv[S].  (Column positions are 1-based in the name.)

    Parameters
    ----------
    iv : DataFrame
        Monthly-indexed DataFrame whose columns are the IV series
        (ordered as given).
    dropna : bool, default True
        If True, drop rows with any NaNs after sub-selection.

    Returns
    -------
    dict[str, DataFrame]
        Mapping from name → X1 DataFrame.
    """
    out = {}

    # 1) IV_only  (all columns)
    out["IV_only"] = iv.dropna() if dropna else iv.copy()

    # 2) All non-empty column subsets
    p = iv.shape[1]                 # e.g. 3
    positions = list(range(p))      # 0-based

    def pos_label(pos_list):
        # 0-based → compact 1-based label: [0,2] → "13"
        return "".join(str(i + 1) for i in pos_list)

    for r in range(1, p + 1):       # subset sizes 1 … p
        for pos in combinations(positions, r):
            cols = list(pos)
            X = iv.iloc[:, cols]
            name = f"IV{pos_label(cols)}"
            out[name] = X.dropna() if dropna else X

    return out

# ──────────────── USAGE EXAMPLE ────────────────
# Assume iv, y, param_grid_ridge, and scheduling constants already defined.

X_sets = make_X1_iv_sets(iv, dropna=True)

results = {}
for name, X1 in X_sets.items():
    m1 = Machine(
        X1, y, 'Penalized', option="ridge", params_grid=param_grid_ridge,
        # X1, y, 'Penalized', option="lasso", params_grid=param_grid_lasso,
        burn_in_start=BURN_START, burn_in_end=BURN_END,
        period=[PERIOD_START, PERIOD_END], forecast_horizon=HORIZON
    )
    m1.training()
    results[name] = m1.R2OOS()

# Quick view
for k, v in results.items():
    print(k, v)

Penalized rolling: 100%|██████████| 208/208 [00:03<00:00, 55.84it/s]
Penalized rolling: 100%|██████████| 208/208 [00:03<00:00, 57.71it/s]
Penalized rolling: 100%|██████████| 208/208 [00:04<00:00, 47.55it/s]
Penalized rolling: 100%|██████████| 208/208 [00:03<00:00, 55.39it/s]
Penalized rolling: 100%|██████████| 208/208 [00:03<00:00, 55.10it/s]
Penalized rolling: 100%|██████████| 208/208 [00:03<00:00, 55.55it/s]
Penalized rolling: 100%|██████████| 208/208 [00:03<00:00, 56.19it/s]
Penalized rolling: 100%|██████████| 208/208 [00:03<00:00, 57.43it/s]

IV_only xr_2     0.014564
xr_3     0.006193
xr_5    -0.004307
xr_7    -0.018805
xr_10   -0.013963
dtype: float64
IV1 xr_2    -0.056072
xr_3    -0.050655
xr_5    -0.046342
xr_7    -0.052343
xr_10   -0.044974
dtype: float64
IV2 xr_2    -0.079815
xr_3    -0.079267
xr_5    -0.074825
xr_7    -0.072511
xr_10   -0.053381
dtype: float64
IV3 xr_2    -0.062284
xr_3    -0.076055
xr_5    -0.079634
xr_7    -0.082322
xr_10   -0.080060
dtype: float64
IV12 xr_2    -0.080704
xr_3    -0.078180
xr_5    -0.074303
xr_7    -0.077239
xr_10   -0.061919
dtype: float64
IV13 xr_2    -0.027505
xr_3    -0.042772
xr_5    -0.044689
xr_7    -0.049478
xr_10   -0.041993
dtype: float64
IV23 xr_2     0.050104
xr_3     0.040791
xr_5     0.032784
xr_7     0.019066
xr_10    0.028831
dtype: float64
IV123 xr_2     0.014564
xr_3     0.006193
xr_5    -0.004307
xr_7    -0.018805
xr_10   -0.013963
dtype: float64



