In [55]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, root_mean_squared_error
from sklearn.model_selection import TimeSeriesSplit
from sklearn.feature_selection import VarianceThreshold

In [56]:
df = pd.read_csv("kaggle/input/hull-tactical-market-prediction/train.csv")
EXCLUDED_COLS = {'date', 'date_id', 'forward_returns', 'risk_free_rate', 'market_forward_excess_returns', 'log_market_forward_excess_returns'}
ARITH_TARGET_COL = "market_forward_excess_returns"
TARGET_COL = "log_market_forward_excess_returns"
DATE_COL = "date_id"
SENTINAL_VALUE = -9999
START_DATE = "1990-01-01"
N_SAMPLES = len(df)

def add_date_col(df, start_date, periods):
    df = df.copy()
    dates = pd.date_range(start=start_date, periods=periods, freq="B")
    df["date"] = dates.values
    return df

def sort_data_by_date_id(df, date_col):
    return df.sort_values(date_col).reset_index(drop=True)

def create_target_log(df, target_col, arith_target_col):
    df = df.copy()
    df[target_col] = np.log1p(df[arith_target_col])
    return df

def create_category_groups(df, excluded_col):
    category_groups = {
        col[0]: []
        for col in df.columns
        if col[0].isalpha() and col not in excluded_col
    }
    for dfcol in df.columns:
        if dfcol in excluded_col:
            continue
        if dfcol and dfcol[0].isalpha() and dfcol[0] in category_groups:
            category = dfcol[0]
            category_groups[category].append(dfcol)

    return category_groups

def construct_momentum_cols(
        df, target_col, sentinal, category_groups,
        windows=[5, 10, 21, 63, 126, 252],
        lags=[1, 2, 3, 5, 10, 21, 63, 126]
        ):
    df_momentum = pd.DataFrame(index=df.index)

    target_shifted = df[target_col].shift(1)

    for window in windows:
        df_momentum[f'MOM_mean_{window}'] = target_shifted.rolling(window).mean()
        df_momentum[f'MOM_std_{window}'] = target_shifted.rolling(window).std()
        df_momentum[f'MOM_roc_{window}'] = target_shifted / target_shifted.shift(window) - 1
        df_momentum[f'MOM_cum_{window}'] = target_shifted.rolling(window).sum()

    df_momentum['MOM_ema_fast'] = target_shifted.ewm(span=10, adjust=False).mean()
    df_momentum['MOM_ema_slow'] = target_shifted.ewm(span=50, adjust=False).mean()
    df_momentum['MOM_ema_diff'] = df_momentum['MOM_ema_fast'] - df_momentum['MOM_ema_slow']
    df_momentum['MOM_ema_ratio'] = df_momentum['MOM_ema_fast'] / df_momentum['MOM_ema_slow'] - 1

    df_momentum['MOM_std_21_252_ratio'] = df_momentum['MOM_std_21'] / df_momentum['MOM_std_252']

    for lag in lags:
        df_momentum[f'MOM_lag_{lag}'] = df[target_col].shift(lag)

    df_momentum = df_momentum.fillna(sentinal)

    mom_cols = df_momentum.columns

    if 'MOM' not in category_groups:
        category_groups["MOM"] = []
    category_groups["MOM"].extend(mom_cols)

    df_out = pd.concat([df, df_momentum], axis=1)

    return df_out, category_groups

def construct_volatility_indicators(
            df, target_col, sentinal, category_groups
        ):
    df_volatility_indicators = pd.DataFrame(index=df.index)

    target_shifted = df[target_col].shift(1)

    df_volatility_indicators["VI_regime_highvol"] = (df["MOM_std_21_252_ratio"] > 1.2).astype("int8")

    z = target_shifted / (target_shifted.shift(1).rolling(21).std())
    df_volatility_indicators["VI_regime_shock"] = (z.abs() > 2.5).astype("int8")

    mkt = (1 + target_shifted.fillna(sentinal)).cumprod()
    sma200 = mkt.rolling(200).mean()
    df_volatility_indicators["VI_regime_bull"] = (mkt > sma200).astype("int8")

    df_volatility_indicators["VI_volofvol_21"] = df["MOM_std_21"].pct_change().fillna(0)

    for col in ["MOM_ema_diff", "MOM_roc_21", "MOM_mean_21"]:
        if col in df:
            df_volatility_indicators[f"VI_{col}_x_bull"] = df[col] * df_volatility_indicators["VI_regime_bull"]
            df_volatility_indicators[f"VI_{col}_x_highvol"] = df[col] * df_volatility_indicators["VI_regime_highvol"]

    df_volatility_indicators["VI_regime_highvol_x_ema"] = df_volatility_indicators["VI_regime_highvol"] * df.get("MOM_ema_diff", 0)

    vi_cols = df_volatility_indicators.columns

    if 'VI' not in category_groups:
        category_groups["VI"] = []
    category_groups["VI"].extend(vi_cols)

    df_out = pd.concat([df, df_volatility_indicators], axis=1)

    return df_out, category_groups

def impute_independent_variables(df, sentinal, excluded):
    df = df.copy()
    feature_cols = [col for col in df.columns if col not in excluded]
    flag_cols = []

    for col in feature_cols:
        if df[col].isnull().any():
            flag_col_name = f'{col}_is_missing'
            flag_cols.append(flag_col_name)
            df[flag_col_name] = df[col].isnull().astype(int)

    df[feature_cols] = df[feature_cols].fillna(sentinal)
    
    return df

def extend_features(df, sentinal, cols,
        windows=[5, 10, 21, 63, 126, 252],
        lags=[1, 2, 3, 5, 10, 21, 63, 126]
    ):
    df = df.copy()
    feat_frames = []
    new_cols = []

    for col in [
        c for c in cols 
        if not c.startswith(('MOM', 'VI'))]:
            col_raw = df[col].astype(float)
            col_shifted = col_raw.shift(1)

            category_extensions = {}
            
            for window in windows:
                roll = col_shifted.rolling(window, min_periods=5)
                category_extensions[f'{col}_mean_{window}'] = roll.mean()
                category_extensions[f'{col}_std_{window}'] = roll.std()
                category_extensions[f'{col}_roc_{window}'] = col_shifted / col_shifted.shift(window) - 1
                category_extensions[f'{col}_cum_{window}'] = roll.sum()

            for lag in lags:
                category_extensions[f'{col}_lag_{lag}'] = col_shifted.shift(lag-1)

            ema_fast = col_shifted.ewm(span=10, adjust=False).mean()
            ema_slow = col_shifted.ewm(span=50, adjust=False).mean()
            category_extensions[f'{col}_ema_fast'] = ema_fast
            category_extensions[f'{col}_ema_slow'] = ema_slow
            category_extensions[f'{col}_ema_diff'] = ema_fast - ema_slow
            category_extensions[f'{col}_ema_ratio'] = ema_fast / ema_slow - 1

            feat_df = pd.DataFrame(category_extensions, index=df.index)
            feat_frames.append(feat_df)
            new_cols.extend(feat_df.columns.tolist())

    feats = pd.concat(feat_frames, axis=1)

    feats = feats.fillna(sentinal)

    df_out = pd.concat([df, feats], axis=1)
            
    return df_out, new_cols

def extend_all_features(df, sentinal, category_groups):
    df_out = df.copy()
    for category in [
        c for c in category_groups
        if not c.startswith(("MOM", "VI"))]:
            print(category)
            df_out, new_cols = extend_features(df_out, sentinal=sentinal, cols=category_groups[category])
            print(f"Added {len(new_cols)} new {category} features")
    return df_out

def data_guards(df, sentinal, excluded):
    df = df.copy()
    excluded_cols = [c for c in excluded if c in df.columns]
    df_features = df.drop(columns=excluded_cols)

    X_all = df_features.select_dtypes(include=['number']).copy()
    X_all = X_all.replace([np.inf, -np.inf], np.nan).fillna(sentinal)

    const_cols = X_all.columns[X_all.nunique(dropna=False) <= 1]
    X_all.drop(columns=const_cols, inplace=True)

    variance_threshold = VarianceThreshold(threshold=1e-10)
    X_reduced = pd.DataFrame(variance_threshold.fit_transform(X_all), index=X_all.index)
    X_reduced.columns = X_all.columns[variance_threshold.get_support()]

    df_out = pd.concat([df[excluded_cols], X_reduced], axis=1)
    return df_out



print(df.shape)

df = add_date_col(df, start_date=START_DATE, periods=N_SAMPLES)
df = sort_data_by_date_id(df, date_col=DATE_COL)
df = create_target_log(df, target_col=TARGET_COL, arith_target_col=ARITH_TARGET_COL)
category_groups = create_category_groups(df, excluded_col=EXCLUDED_COLS)
df, category_groups = construct_momentum_cols(df, target_col=TARGET_COL, sentinal=SENTINAL_VALUE, category_groups=category_groups)
df, category_groups = construct_volatility_indicators(df, target_col=TARGET_COL, sentinal=SENTINAL_VALUE, category_groups=category_groups)
df = impute_independent_variables(df, sentinal=SENTINAL_VALUE, excluded=EXCLUDED_COLS)
df = extend_all_features(df, sentinal=SENTINAL_VALUE, category_groups=category_groups)
df = data_guards(df, sentinal=SENTINAL_VALUE, excluded=EXCLUDED_COLS)

print(df.shape)


(8990, 98)
D
Added 324 new D features
E
Added 720 new E features
I
Added 324 new I features
M
Added 648 new M features
P
Added 468 new P features
S
Added 432 new S features
V
Added 468 new V features
(8990, 3617)


In [57]:
MIN_INVESTMENT = 0.0
MAX_INVESTMENT = 2.0
TRADING_DAYS_PER_YR = 252

class ParticipantVisibleError(Exception):
    pass

def adjusted_sharpe_from_df(solution: pd.DataFrame, submission: pd.DataFrame, row_id_col: str) -> float:
    """
    solution: columns = [row_id_col, 'forward_returns', 'risk_free_rate']
    submission: columns = [row_id_col, 'prediction'] where prediction ∈ [0,2]
    """
    if not pd.api.types.is_numeric_dtype(submission['prediction']):
        raise ParticipantVisibleError('Predictions must be numeric')
    sol = solution.copy()
    sub = submission.copy()

    # align on row_id
    sol = sol.merge(sub[[row_id_col, 'prediction']], on=row_id_col, how='inner')
    sol.rename(columns={'prediction': 'position'}, inplace=True)

    if sol['position'].max() > MAX_INVESTMENT:
        raise ParticipantVisibleError(f'Position of {sol["position"].max()} exceeds maximum of {MAX_INVESTMENT}')
    if sol['position'].min() < MIN_INVESTMENT:
        raise ParticipantVisibleError(f'Position of {sol["position"].min()} below minimum of {MIN_INVESTMENT}')

    # Strategy daily returns: convex combo of RF and market forward return
    sol['strategy_returns'] = sol['risk_free_rate'] * (1 - sol['position']) + sol['position'] * sol['forward_returns']

    # Strategy Sharpe (excess mean / std) annualized
    strategy_excess = sol['strategy_returns'] - sol['risk_free_rate']
    # geometric to arithmetic mean per-day
    cum = (1 + strategy_excess).prod()
    mean_excess_daily = cum ** (1 / len(sol)) - 1
    std_daily = sol['strategy_returns'].std()
    if std_daily == 0:
        raise ParticipantVisibleError('Division by zero, strategy std is zero')

    sharpe = mean_excess_daily / std_daily * np.sqrt(TRADING_DAYS_PER_YR)
    strategy_vol_annual = float(std_daily * np.sqrt(TRADING_DAYS_PER_YR) * 100)

    # Market stats (forward_returns vs risk_free_rate)
    market_excess = sol['forward_returns'] - sol['risk_free_rate']
    market_cum = (1 + market_excess).prod()
    market_mean_excess_daily = market_cum ** (1 / len(sol)) - 1
    market_std_daily = sol['forward_returns'].std()
    if market_std_daily == 0:
        raise ParticipantVisibleError('Division by zero, market std is zero')
    market_vol_annual = float(market_std_daily * np.sqrt(TRADING_DAYS_PER_YR) * 100)

    # Penalties (vol & return gap)
    excess_vol = max(0, strategy_vol_annual / market_vol_annual - 1.2) if market_vol_annual > 0 else 0
    vol_penalty = 1 + excess_vol

    return_gap = max(0, (market_mean_excess_daily - mean_excess_daily) * 100 * TRADING_DAYS_PER_YR)
    return_penalty = 1 + (return_gap ** 2) / 100

    adjusted = sharpe / (vol_penalty * return_penalty)
    return float(min(adjusted, 1_000_000.0))


In [58]:
def signal_to_position(
    train_pred: np.ndarray,
    test_pred: np.ndarray,
    vol_factor_te: float,
    rmse_factor_te: float,
    regime_te: np.ndarray | None = None,
    bad_regime_scale: float = 0.5):
    """
    Standardize predictions on TRAIN, map to positions: pos = 1 + beta * z
    Then clip to [0, 2].
    beta controls aggressiveness; increase to take more risk.
    """
    
    beta_dynamic = 0.25 / (1 + 0.5*vol_factor_te + 0.5*rmse_factor_te)
    
    mu, sigma = np.mean(train_pred), np.std(train_pred)
    if sigma == 0 or not np.isfinite(sigma):
        pos = np.full_like(test_pred, 1.0, dtype=float)
    else:
        z = (test_pred - mu) / sigma
        pos = 1.0 + beta_dynamic * z

    if regime_te is not None:
        # regime_te should be 0/1 with 1 = "bad regime" (e.g., high vol)
        scaler = np.where(regime_te.astype(int) == 1, bad_regime_scale, 1.0)
        pos = pos * scaler
    
    return np.clip(pos, MIN_INVESTMENT, MAX_INVESTMENT)

In [61]:
N_SPLITS = 24
TEST_SIZE = 252
EARLY_STOPPING_ROUNDS = 100
gap = 1

ROW_ID_COL = "row_id"
df_eval = df.copy()
df_eval[ROW_ID_COL] = df_eval.index

y = df['log_market_forward_excess_returns']
X = df.drop(EXCLUDED_COLS, axis=1).copy()

tscv = TimeSeriesSplit(
    n_splits = N_SPLITS,
    test_size = TEST_SIZE
)

xgb_1_model = xgb.XGBRegressor(
    objective="reg:squarederror",
    tree_method="hist",
    random_state=42,
    n_estimators=5000,
    learning_rate=0.08,
    max_depth=5,
    min_child_weight=2,
    subsample=0.8,
    colsample_bytree=0.8,
    reg_lambda=5,
    reg_alpha=1.0,
    gamma=0.0,
    eval_metric="rmse",
    early_stopping_rounds=EARLY_STOPPING_ROUNDS
)

xgb_r2_scores = []
xgb_mse_scores = []
xgb_rmse_scores = []
xgb_mae_scores = []
xgb_adj_sharpes = []
predictions = []
fold_metrics = []

print(f"Starting Walk-Forward Validation with {N_SPLITS} folds (Test size: {TEST_SIZE} days)")
print("\n" + ("-" * 50) + "\n")

for fold, (train_index, test_index) in enumerate(tscv.split(X)):

    if gap > 0:
        max_train_index = test_index[0] - gap
        train_index = train_index[train_index < max_train_index]

    
    X_train_log, X_test_log = X.iloc[train_index], X.iloc[test_index]
    y_train_log, y_test_log = y.iloc[train_index], y.iloc[test_index]

    # Early stopping eval set
    eval_tail = min(len(X_train_log)//5, 252)  # up to last ~252 days or 20% of train
    eval_tail = max(eval_tail, 50)         # ensure some minimum
    X_tr, y_tr = X_train_log.iloc[:-eval_tail], y_train_log.iloc[:-eval_tail]
    X_ev, y_ev = X_train_log.iloc[-eval_tail:], y_train_log.iloc[-eval_tail:]

    xgb_1_model.fit(
        X_tr, y_tr,
        eval_set=[(X_ev, y_ev)],
        verbose=False,
    )

    y_hat_train_log = xgb_1_model.predict(X_train_log)
    y_hat_test_log =  xgb_1_model.predict(X_test_log)
    
    fold_r2 = r2_score(y_test_log, y_hat_test_log)
    fold_mse = mean_squared_error(y_test_log, y_hat_test_log)
    fold_rmse = root_mean_squared_error(y_test_log, y_hat_test_log)
    fold_mae = mean_absolute_error(y_test_log, y_hat_test_log)
    
    xgb_r2_scores.append(fold_r2)
    xgb_mse_scores.append(fold_mse)
    xgb_rmse_scores.append(fold_rmse)
    xgb_mae_scores.append(fold_mae)

    vol21 = df_eval.loc[X_test_log.index, "MOM_std_21"].astype(float)
    vol_factor_te = (vol21 / df_eval["MOM_std_21"].median())
    
    train_resid = (y_train_log - y_hat_train_log)
    sq = pd.Series(train_resid**2, index=X_train_log.index)
    
    # last-126d RMSE at the fold boundary (use min_periods to avoid NaNs)
    rmse_tail = np.sqrt(sq.rolling(126, min_periods=50).mean().iloc[-1])
    # a baseline (median) RMSE over the train period
    rmse_base = float(np.sqrt(sq.rolling(252, min_periods=50).mean()).median())
    if not np.isfinite(rmse_base) or rmse_base == 0:
        rmse_base = max(rmse_tail, 1e-6)
    
    # same scalar applied to all rows in the TEST slice (no look-ahead)
    rmse_factor_te = np.full(len(X_test_log), rmse_tail / rmse_base)
    
    reg_te = df_eval.loc[X_test_log.index, "VI_regime_highvol"].to_numpy()
    pos_te = signal_to_position(
        y_hat_train_log,
        y_hat_test_log,
        vol_factor_te=vol_factor_te,
        rmse_factor_te=rmse_factor_te,
        regime_te=reg_te,
        bad_regime_scale=0.5)
    
    # pos_te = signal_to_position_percentile(y_hat_tr, y_hat_te, lo=5, hi=95)

    solution_fold = df_eval.loc[X_test_log.index, [ROW_ID_COL, "forward_returns", "risk_free_rate"]].copy()
    submission_fold = pd.DataFrame({ROW_ID_COL: df_eval.loc[X_test_log.index, ROW_ID_COL].values,"prediction": pos_te.astype(float)})

    # Competition metric
    fold_adjusted_sharpe = adjusted_sharpe_from_df(solution_fold, submission_fold, row_id_col=ROW_ID_COL)
    xgb_adj_sharpes.append(fold_adjusted_sharpe)

    predictions.append(pd.DataFrame({
        "date": df_eval.loc[X_test_log.index, "date"].values,
        "y_log": y_test_log.values,
        "y_arith": np.expm1(y_test_log.values),
        "y_pred_log": y_hat_test_log,
        "y_pred_arith": np.expm1(y_hat_test_log),
        "position": pos_te.astype(float),
        "fold": fold + 1,
    }))
    
    print(f"Fold {fold + 1}:")
    print(f"  Train Size: {len(X_train_log):,}")
    print(f"  Test Size:  {len(X_test_log):,}")
    print(f"  R-squared:  {fold_r2:.6f}")
    print(f"  MSE:        {fold_mse:.6f}")
    print(f"  RMSE:       {fold_rmse:.6f}")
    print(f"  MAE:        {fold_mae:.6f}")
    print(f"  Adjusted Sharpe: {fold_adjusted_sharpe}")
    print(f"  Best Iter:  {getattr(xgb_1_model, 'best_iteration', None)}")
    print("\n" + ("-" * 50) + "\n")

    fold_metrics.append({
        "fold": fold + 1,
        "train_start": df_eval.loc[train_index[0], "date"],
        "train_end":   df_eval.loc[train_index[-1], "date"],
        "test_start":  df_eval.loc[test_index[0], "date"],
        "test_end":    df_eval.loc[test_index[-1], "date"],
        "train_size": len(X_train_log),
        "test_size": len(X_test_log),
        "best_iteration": getattr(xgb_1_model, "best_iteration", None),
        "R-squared": float(fold_r2),
        "mse": float(fold_mse),
        "rmse": float(fold_rmse),
        "mae": float(fold_mae),
        "Adjusted Sharpe": float(fold_adjusted_sharpe)
    })


print("\n--- Final Walk-Forward Results ---")
print(f"Average R-squared: {np.mean(xgb_r2_scores):.6f} ± {np.std(xgb_r2_scores):.6f}")
print(f"Average MSE:       {np.mean(xgb_mse_scores):.6f} ± {np.std(xgb_mse_scores):.6f}")
print(f"Average RMSE:      {np.mean(xgb_rmse_scores):.6f} ± {np.std(xgb_rmse_scores):.6f}")
print(f"Average MAE:       {np.mean(xgb_mae_scores):.6f} ± {np.std(xgb_mae_scores):.6f}")
print(f"AdjSharpe:         {np.mean(xgb_adj_sharpes):.6f} ± {np.std(xgb_adj_sharpes):.6f}")
print(f"MaxSharpe:         {np.max(xgb_adj_sharpes):.6f}")

oof_df = pd.concat(predictions, ignore_index=True).sort_values("date").reset_index(drop=True)
fold_df = pd.DataFrame(fold_metrics)

Starting Walk-Forward Validation with 24 folds (Test size: 252 days)

--------------------------------------------------

Fold 1:
  Train Size: 2,941
  Test Size:  252
  R-squared:  -0.010177
  MSE:        0.000219
  RMSE:       0.014782
  MAE:        0.011464
  Adjusted Sharpe: -1.1673720928009763
  Best Iter:  59

--------------------------------------------------

Fold 2:
  Train Size: 3,193
  Test Size:  252
  R-squared:  -0.000474
  MSE:        0.000188
  RMSE:       0.013729
  MAE:        0.010792
  Adjusted Sharpe: 0.28671206388910697
  Best Iter:  11

--------------------------------------------------

Fold 3:
  Train Size: 3,445
  Test Size:  252
  R-squared:  -0.011433
  MSE:        0.000054
  RMSE:       0.007364
  MAE:        0.005812
  Adjusted Sharpe: 0.8307669762265186
  Best Iter:  276

--------------------------------------------------

Fold 4:
  Train Size: 3,697
  Test Size:  252
  R-squared:  0.005371
  MSE:        0.000041
  RMSE:       0.006368
  MAE:        0.005

KeyboardInterrupt: 

In [None]:
def plot_cumulative_returns(oof_df: pd.DataFrame, threshold=0.0):
    # simple long/flat strategy: go long if prediction > threshold
    pos = oof_df["position"].astype(float).clip(0, 2)
    ret = oof_df["y_arith"].astype(float)
    strat_ret = pos * ret
    cum_strat = (1 + strat_ret).cumprod()
    cum_mkt   = (1 + ret).cumprod()

    fig, ax = plt.subplots(figsize=(12, 5))
    ax.plot(oof_df["date"], cum_mkt, label="Cumulative Actual")
    ax.plot(oof_df["date"], cum_strat, label=f"Cumulative Strategy (position)")
    ax.set_title("Cumulative Returns (OOF)")
    ax.set_xlabel("Date")
    ax.set_ylabel("Growth of 1 unit")
    ax.legend()
    plt.tight_layout()
    plt.show()

def plot_predictions_over_time(oof_df, title="Actual vs Predicted (OOF, log)"):
    df = oof_df.sort_values("date")
    fig, ax = plt.subplots(figsize=(12,5))
    ax.plot(df["date"], df["y_log"],      label="Actual (log)")
    ax.plot(df["date"], df["y_pred_log"], label="Predicted (log)")
    ax.set_title(title); ax.set_xlabel("Date"); ax.set_ylabel("log(1+r)")
    ax.legend(); plt.tight_layout(); plt.show()

def plot_predictions_over_time_arith(oof_df, title="Actual vs Predicted (OOF, arith)"):
    df = oof_df.sort_values("date")
    fig, ax = plt.subplots(figsize=(12,5))
    ax.plot(df["date"], df["y_arith"],       label="Actual (arith)")
    ax.plot(df["date"], df["y_pred_arith"],  label="Predicted (arith)")
    ax.set_title(title); ax.set_xlabel("Date"); ax.set_ylabel("Return")
    ax.legend(); plt.tight_layout(); plt.show()

def plot_fold_metrics(fold_df: pd.DataFrame):
    # bar/line of per-fold RMSE over chronological test windows
    fig, ax = plt.subplots(figsize=(10, 4))
    ax.plot(fold_df["fold"], fold_df["rmse"], marker="o")
    ax.set_title("Per-fold RMSE (walk-forward)")
    ax.set_xlabel("Fold (chronological)")
    ax.set_ylabel("RMSE")
    plt.tight_layout()
    plt.show()

def plot_rolling_metrics(oof_df: pd.DataFrame, window=126):
    # rolling RMSE and rolling correlation to see stability
    df = oof_df.copy()
    df["squared_err"] = (df["y_arith"] - df["y_pred_arith"])**2
    df["roll_rmse"] = (df["squared_err"].rolling(window).mean())**0.5
    df["roll_corr"] = df["y_arith"].rolling(window).corr(df["y_pred_arith"])

    fig, ax = plt.subplots(figsize=(12, 5))
    ax.plot(df["date"], df["roll_rmse"])
    ax.set_title(f"Rolling RMSE (window={window})")
    ax.set_xlabel("Date")
    ax.set_ylabel("RMSE")
    plt.tight_layout()
    plt.show()

    fig, ax = plt.subplots(figsize=(12, 5))
    ax.plot(df["date"], df["roll_corr"])
    ax.set_title(f"Rolling Correlation (window={window})")
    ax.set_xlabel("Date")
    ax.set_ylabel("Correlation")
    plt.tight_layout()
    plt.show()


def plot_scatter_pred_vs_actual(oof_df: pd.DataFrame):
    X = oof_df["y_pred_arith"].values.reshape(-1, 1)
    y = oof_df["y_arith"].values

    lr = LinearRegression().fit(X, y)
    x_grid = np.linspace(X.min(), X.max(), 200).reshape(-1, 1)
    y_fit = lr.predict(x_grid)

    fig, ax = plt.subplots(figsize=(6, 6))
    ax.scatter(oof_df["y_pred_arith"], oof_df["y_arith"], alpha=0.4)
    ax.plot(x_grid, y_fit, linewidth=2, label=f"Fit: y={lr.coef_[0]:.2f}*ŷ + {lr.intercept_:.4f}")
    ax.axline((0,0), slope=1, linestyle="--", label="45° line (perfect)")
    ax.set_xlabel("Predicted return")
    ax.set_ylabel("Actual return")
    ax.set_title("Predicted vs Actual (OOF)")
    ax.legend()
    plt.tight_layout()
    plt.show()

def plot_smoothed(oof_df, win=21):
    df = oof_df.sort_values("date").copy()
    plt.figure(figsize=(12,5))
    plt.plot(df["date"], df["y_log"].rolling(win).mean(),      label=f"Actual {win}d MA")
    plt.plot(df["date"], df["y_pred_log"].rolling(win).mean(), label=f"Pred {win}d MA")
    plt.title("Actual vs Predicted (smoothed)"); plt.xlabel("Date")
    plt.legend(); plt.tight_layout(); plt.show()

def plot_directional_accuracy(oof_df, window=252):
    df = oof_df.copy()
    df["correct"] = (np.sign(df["y_arith"]) == np.sign(df["y_pred_arith"])).astype(int)
    df["rolling_acc"] = df["correct"].rolling(window).mean()
    plt.figure(figsize=(12,4))
    plt.plot(df["date"], df["rolling_acc"])
    plt.title(f"Rolling Directional Accuracy ({window}-day window)")
    plt.axhline(0.5, color='red', linestyle='--')
    plt.ylabel("Accuracy"); plt.xlabel("Date")
    plt.show()

plot_predictions_over_time(oof_df)
plot_predictions_over_time_arith(oof_df)
plot_fold_metrics(fold_df)
plot_rolling_metrics(oof_df)
plot_scatter_pred_vs_actual(oof_df)
plot_cumulative_returns(oof_df)
plot_smoothed(oof_df)
plot_directional_accuracy(oof_df)
xgb.plot_importance(xgb_1_model, max_num_features=20)

for i, fold in enumerate(np.unique(oof_df["fold"])):
    fold_df = oof_df[oof_df["fold"] == fold]
    ic = np.corrcoef(fold_df["y_arith"], fold_df["y_pred_arith"])[0,1]
    print(f"Fold {i+1}: IC={ic:.4f}")

In [None]:
TRADING_DAYS = 252

def perf_metrics(oof_df, df_eval):
    # Align with true market forward return and rf (arithmetic, not excess)
    base = df_eval[["date", "forward_returns", "risk_free_rate"]].copy()
    z = oof_df[["date","position"]].merge(base, on="date", how="inner").dropna()

    pos = z["position"].astype(float).clip(0, 2)
    mkt = z["forward_returns"].astype(float)
    rf  = z["risk_free_rate"].astype(float) if "risk_free_rate" in z else 0.0

    # Strategy daily return (same formula you used for Adjusted Sharpe)
    strat = rf*(1 - pos) + pos*mkt
    excess = strat - rf

    # --- CAGR / total return ---
    n_years = (z["date"].iloc[-1] - z["date"].iloc[0]).days / 365.25
    total_return = (1.0 + strat).prod() - 1.0
    cagr = (1.0 + total_return)**(1.0 / n_years) - 1.0

    # --- Sharpe (for reference) ---
    mu_e = excess.mean()
    sd   = strat.std()
    sharpe = (mu_e / sd) * np.sqrt(TRADING_DAYS) if sd > 0 else np.nan

    # --- Sortino ---
    downside = excess[excess < 0]
    dd = downside.std()  # sample std; use ddof=0 if you prefer population
    sortino = (mu_e * TRADING_DAYS) / (dd * np.sqrt(TRADING_DAYS)) if dd > 0 else np.nan

    # --- Extras: drawdown & turnover (useful context) ---
    equity = (1.0 + strat).cumprod()
    roll_max = equity.cummax()
    drawdown = equity / roll_max - 1.0
    max_dd = drawdown.min()

    turnover = pos.diff().abs().fillna(0).sum() / len(pos)  # avg daily turnover in units

    return {
        "Total Return": total_return,
        "CAGR": cagr,
        "Sharpe": sharpe,
        "Sortino": sortino,
        "Max Drawdown": max_dd,
        "Avg Daily Turnover": turnover,
        "Obs": len(z)
    }

metrics = perf_metrics(oof_df, df_eval)
for k,v in metrics.items():
    print(f"{k:>18}: {v:.6f}" if isinstance(v, (int,float,np.floating)) else f"{k:>18}: {v}")
