In [1]:
#!/usr/bin/env python

import os
import numpy as np
import pandas as pd
from datetime import datetime

# ============================================================
# CONFIG
# ============================================================
TRADES_FILE = "./13-trading_output_regression_insp500_spyfilter_cap15/13-trades_regression_insp500_spyfilter_cap15.parquet"
EQUITY_FILE = "./13-trading_output_regression_insp500_spyfilter_cap15/13-equity_curve_regression_insp500_spyfilter_cap15.parquet"

SPY_FILE    = "./8-SPY_200DMA_market_regime/8-SPY_200DMA_regime.parquet"

OUTPUT_DIR = "./17-robustness_output"
os.makedirs(OUTPUT_DIR, exist_ok=True)

TRADING_DAYS_PER_YEAR = 252
MC_N_SIM = 5000
MC_BLOCK_SIZE = 20

np.random.seed(42)

# ============================================================
# LOAD & ALIGN DATA
# ============================================================

def load_data():
    print("=== Loading data ===")

    # ---------- Equity ----------
    eq = pd.read_parquet(EQUITY_FILE)
    eq["date"] = pd.to_datetime(eq["date"])
    eq = eq.sort_values("date")

    eq["strat_ret"] = eq["portfolio_value"].pct_change()
    eq = eq.dropna(subset=["strat_ret"])

    # ---------- SPY ----------
    spy = pd.read_parquet(SPY_FILE)

    # SPY file uses Date as index
    spy = spy.reset_index().rename(columns={"Date": "date"})
    spy["date"] = pd.to_datetime(spy["date"])
    spy = spy.sort_values("date")

    # sanity check
    required = {"spy_close", "market_regime"}
    missing = required - set(spy.columns)
    if missing:
        raise ValueError(f"SPY file missing required columns: {missing}")

    spy["spy_ret"] = spy["spy_close"].pct_change()
    spy = spy.dropna(subset=["spy_ret"])

    # ---------- Merge ----------
    df = pd.merge(
        eq[["date", "strat_ret"]],
        spy[["date", "spy_ret", "market_regime"]],
        on="date",
        how="inner"
    ).sort_values("date").reset_index(drop=True)

    print(
        f"Aligned sample size: {len(df):,} days "
        f"({df['date'].min().date()} → {df['date'].max().date()})"
    )

    print(
        f"Market regime split → "
        f"Bull: {(df['market_regime']==1).sum()}, "
        f"Bear: {(df['market_regime']==0).sum()}"
    )

    return df


# ============================================================
# METRICS
# ============================================================

def cagr_from_returns(returns):
    years = len(returns) / TRADING_DAYS_PER_YEAR
    if years <= 0:
        return np.nan
    return (1 + returns).prod() ** (1 / years) - 1

def sharpe_ratio(returns):
    if returns.std() == 0:
        return np.nan
    return returns.mean() / returns.std() * np.sqrt(TRADING_DAYS_PER_YEAR)

def sortino_ratio(returns):
    downside = returns[returns < 0]
    if downside.std() == 0:
        return np.nan
    return returns.mean() / downside.std() * np.sqrt(TRADING_DAYS_PER_YEAR)

def max_drawdown(returns):
    eq = (1 + returns).cumprod()
    peak = eq.cummax()
    return (eq / peak - 1).min()

def basic_stats(returns, label):
    return {
        "label": label,
        "cagr": cagr_from_returns(returns),
        "sharpe": sharpe_ratio(returns),
        "sortino": sortino_ratio(returns),
        "maxdd": max_drawdown(returns)
    }

# ============================================================
# 1) EXCESS RETURN MONTE CARLO
# ============================================================

def monte_carlo_excess(df):
    print("\n=== Excess-Return Monte Carlo ===")

    excess = df["strat_ret"] - df["spy_ret"]
    excess = excess.dropna().values
    n = len(excess)

    true_cagr = cagr_from_returns(pd.Series(excess))
    true_sharpe = sharpe_ratio(pd.Series(excess))

    mc_cagrs = []
    mc_sharpes = []

    for _ in range(MC_N_SIM):
        idx = []
        while len(idx) < n:
            start = np.random.randint(0, n - MC_BLOCK_SIZE + 1)
            idx.extend(range(start, start + MC_BLOCK_SIZE))
        sim = pd.Series(excess[idx[:n]])

        mc_cagrs.append(cagr_from_returns(sim))
        mc_sharpes.append(sharpe_ratio(sim))

    mc_cagrs = np.array(mc_cagrs)
    mc_sharpes = np.array(mc_sharpes)

    print(f"Excess CAGR percentile: {(mc_cagrs < true_cagr).mean()*100:.2f}%")
    print(f"Excess Sharpe percentile: {(mc_sharpes < true_sharpe).mean()*100:.2f}%")

    return mc_cagrs, mc_sharpes

# ============================================================
# 2) REGIME-AWARE MONTE CARLO
# ============================================================

def monte_carlo_regime(df):
    print("\n=== Regime-Aware Monte Carlo ===")

    excess = (df["strat_ret"] - df["spy_ret"]).values
    bull = df["market_regime"].values == 1
    bear = ~bull

    idx_bull = np.where(bull)[0]
    idx_bear = np.where(bear)[0]

    def boot(idx, size):
        out = []
        while len(out) < size:
            s = np.random.randint(0, len(idx) - MC_BLOCK_SIZE + 1)
            out.extend(idx[s:s + MC_BLOCK_SIZE])
        return np.array(out[:size])

    mc_cagrs = []
    mc_sharpes = []

    for _ in range(MC_N_SIM):
        sim = np.zeros_like(excess)

        sim[idx_bull] = excess[boot(idx_bull, len(idx_bull))]
        sim[idx_bear] = excess[boot(idx_bear, len(idx_bear))]

        sim = pd.Series(sim)
        mc_cagrs.append(cagr_from_returns(sim))
        mc_sharpes.append(sharpe_ratio(sim))

    print("Regime-aware MC complete")
    return np.array(mc_cagrs), np.array(mc_sharpes)

# ============================================================
# 3) ROLLING WINDOWS
# ============================================================

def rolling_robustness(df, windows=(3,5,10)):
    print("\n=== Rolling Robustness ===")
    df = df.copy()
    df["year"] = df["date"].dt.year

    rows = []

    for w in windows:
        for y in range(df["year"].min(), df["year"].max() - w + 2):
            sub = df[(df["year"] >= y) & (df["year"] < y + w)]
            if len(sub) < 252:
                continue

            rows.append({
                "window": w,
                "start_year": y,
                "strat_cagr": cagr_from_returns(sub["strat_ret"]),
                "strat_sharpe": sharpe_ratio(sub["strat_ret"]),
                "strat_maxdd": max_drawdown(sub["strat_ret"]),
                "spy_cagr": cagr_from_returns(sub["spy_ret"]),
                "spy_sharpe": sharpe_ratio(sub["spy_ret"]),
                "spy_maxdd": max_drawdown(sub["spy_ret"]),
            })

    out = pd.DataFrame(rows)
    ts = datetime.now().strftime("%Y%m%d-%H%M%S")
    out.to_csv(os.path.join(OUTPUT_DIR, f"rolling_robustness_{ts}.csv"), index=False)
    print("Saved rolling robustness CSV")

# ============================================================
# MAIN
# ============================================================

def main():
    df = load_data()

    strat = basic_stats(df["strat_ret"], "strategy")
    spy = basic_stats(df["spy_ret"], "spy")

    print("\n=== Full Sample Stats ===")
    print(strat)
    print(spy)

    monte_carlo_excess(df)
    monte_carlo_regime(df)
    rolling_robustness(df)

    print("\n=== ROBUSTNESS SUITE COMPLETE ===")

if __name__ == "__main__":
    main()


=== Loading data ===
Aligned sample size: 6,786 days (1999-01-05 → 2025-12-24)
Market regime split → Bull: 5053, Bear: 1733

=== Full Sample Stats ===
{'label': 'strategy', 'cagr': 0.16370052619140063, 'sharpe': 1.2230179378700787, 'sortino': 1.5214253930963135, 'maxdd': -0.20732681325482172}
{'label': 'spy', 'cagr': 0.08503227363271404, 'sharpe': 0.5185602637463045, 'sortino': 0.6644811588716772, 'maxdd': -0.5518946265778286}

=== Excess-Return Monte Carlo ===
Excess CAGR percentile: 48.86%
Excess Sharpe percentile: 48.36%

=== Regime-Aware Monte Carlo ===
Regime-aware MC complete

=== Rolling Robustness ===
Saved rolling robustness CSV

=== ROBUSTNESS SUITE COMPLETE ===
