In [1]:
#!/usr/bin/env python
import os
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt

# ============================================================
# CONFIG
# ============================================================

STRAT_EQUITY_FILE = "./13-trading_output_regression_insp500_spyfilter_cap15/13-equity_curve_regression_insp500_spyfilter_cap15.parquet"
OUT_DIR = "./16-monte_carlo_output"
os.makedirs(OUT_DIR, exist_ok=True)

N_SIMS = 10000
BLOCK_SIZE = 25 # block bootstrap length
SEED = 123
np.random.seed(SEED)

# ============================================================
# METRIC FUNCTIONS
# ============================================================

def cagr_from_path(values, start_date, end_date):
    values = np.asarray(values, dtype=float)
    if len(values) < 2:
        return np.nan
    years = (pd.Timestamp(end_date) - pd.Timestamp(start_date)).days / 365.25
    if years <= 0:
        return np.nan
    if values[0] <= 0:
        return np.nan
    return (values[-1] / values[0]) ** (1 / years) - 1

def sharpe(returns):
    r = np.asarray(returns, dtype=float)
    std = np.std(r, ddof=1)
    if std == 0 or np.isnan(std):
        return np.nan
    return np.sqrt(252) * np.mean(r) / std

def max_drawdown(values):
    arr = np.asarray(values, dtype=float)
    peaks = np.maximum.accumulate(arr)
    dd = arr / peaks - 1
    return float(dd.min())

def prob_maxdd_ge_threshold(values, threshold=0.5):
    return max_drawdown(values) <= -threshold

# ============================================================
# BLOCK BOOTSTRAP
# ============================================================

def block_bootstrap(returns, n, block_size):
    r = np.asarray(returns, dtype=float)
    L = len(r)
    if L <= 0 or n <= 0:
        return np.array([], dtype=float)
    if block_size <= 0:
        raise ValueError("BLOCK_SIZE must be > 0")
    if block_size > L:
        raise ValueError(f"BLOCK_SIZE ({block_size}) cannot exceed number of returns ({L})")

    out = []
    while len(out) < n:
        i = np.random.randint(0, L - block_size + 1)
        out.extend(r[i:i + block_size])
    return np.array(out[:n], dtype=float)

# ============================================================
# LOAD DATA
# ============================================================

print("=== Loading equity curve ===")

equity = pd.read_parquet(STRAT_EQUITY_FILE)
equity["date"] = pd.to_datetime(equity["date"])
equity["portfolio_value"] = pd.to_numeric(equity["portfolio_value"], errors="coerce")
equity = equity.dropna(subset=["date", "portfolio_value"]).sort_values("date").reset_index(drop=True)
equity = equity[equity["portfolio_value"] > 0].reset_index(drop=True)

if len(equity) < 3:
    raise ValueError("Not enough equity observations after cleaning (need at least 3 rows).")

values = equity["portfolio_value"].values  # length N
dates = equity["date"].values              # length N

# returns are N-1
rets = (values[1:] / values[:-1]) - 1.0
n_rets = len(rets)

start_date = equity["date"].iloc[0]
end_date   = equity["date"].iloc[-1]
initial_capital = float(values[0])

print(f"Loaded {n_rets} return observations.")

# ============================================================
# TRUE METRICS
# ============================================================

true_equity = values
true_cagr = cagr_from_path(true_equity, start_date, end_date)
true_sharpe = sharpe(rets)
true_dd = max_drawdown(true_equity)

print("\n=== TRUE STRATEGY RESULTS ===")
print("CAGR:", true_cagr)
print("Sharpe:", true_sharpe)
print("MaxDD:", true_dd)

# ============================================================
# MONTE CARLO SIMULATION (UNCERTAINTY BOOTSTRAP)
# ============================================================

print("\n=== Running Monte Carlo simulations ===")

sim_cagrs = np.zeros(N_SIMS, dtype=float)
sim_sharpes = np.zeros(N_SIMS, dtype=float)
sim_dds = np.zeros(N_SIMS, dtype=float)
sim_dd50 = np.zeros(N_SIMS, dtype=float)

sim_max_dd_dollars = np.zeros(N_SIMS, dtype=float)
sim_peak_equity = np.zeros(N_SIMS, dtype=float)
sim_trough_equity = np.zeros(N_SIMS, dtype=float)

sim_peak_step = np.zeros(N_SIMS, dtype=int)
sim_trough_step = np.zeros(N_SIMS, dtype=int)
sim_peak_date_indexed = np.empty(N_SIMS, dtype="datetime64[ns]")
sim_trough_date_indexed = np.empty(N_SIMS, dtype="datetime64[ns]")

for i in range(N_SIMS):
    sim_rets = block_bootstrap(rets, n_rets, BLOCK_SIZE)

    sim_curve = np.cumprod(np.r_[1.0, 1.0 + sim_rets])      # normalized
    sim_curve_dollars = sim_curve * initial_capital         # dollars

    running_peak = np.maximum.accumulate(sim_curve_dollars)
    drawdowns = running_peak - sim_curve_dollars

    dd_idx = int(np.argmax(drawdowns))  # trough index
    peak_idx = int(np.argmax(sim_curve_dollars[:dd_idx + 1])) if dd_idx >= 0 else 0

    sim_peak_equity[i] = sim_curve_dollars[peak_idx]
    sim_trough_equity[i] = sim_curve_dollars[dd_idx]
    sim_max_dd_dollars[i] = drawdowns[dd_idx]

    sim_peak_step[i] = peak_idx
    sim_trough_step[i] = dd_idx
    sim_peak_date_indexed[i] = dates[peak_idx]
    sim_trough_date_indexed[i] = dates[dd_idx]

    sim_cagrs[i]   = cagr_from_path(sim_curve_dollars, start_date, end_date)
    sim_sharpes[i] = sharpe(sim_rets)
    sim_dds[i]     = max_drawdown(sim_curve)
    sim_dd50[i]    = prob_maxdd_ge_threshold(sim_curve, threshold=0.50)

# ============================================================
# SHARPE CONFIDENCE INTERVAL (ESTIMATION UNCERTAINTY)
# ============================================================

sharpe_ci_95 = np.percentile(sim_sharpes[~np.isnan(sim_sharpes)], [2.5, 50, 97.5])

print("\n=== SHARPE ESTIMATION UNCERTAINTY (bootstrap CI) ===")
print(f"Sharpe 95% CI (2.5/50/97.5): {sharpe_ci_95[0]:.3f}, {sharpe_ci_95[1]:.3f}, {sharpe_ci_95[2]:.3f}")

# ============================================================
# SHARPE SIGNIFICANCE TEST (NULL: NO EDGE)
#   Build a null by forcing mean=0, then block bootstrap.
#   p-value = P(Sharpe_null >= Sharpe_observed)
# ============================================================

rets_centered = rets - np.mean(rets)  # remove estimated edge

sim_sharpes_null0 = np.zeros(N_SIMS, dtype=float)
for i in range(N_SIMS):
    sim_rets0 = block_bootstrap(rets_centered, n_rets, BLOCK_SIZE)
    sim_sharpes_null0[i] = sharpe(sim_rets0)

p_sharpe_gt_0 = np.mean(sim_sharpes_null0 >= true_sharpe)  # one-sided

print("\n=== SHARPE SIGNIFICANCE (block-bootstrap null) ===")
print(f"Observed Sharpe: {true_sharpe:.4f}")
print(f"p-value (H0: true Sharpe = 0, H1: Sharpe > 0): {p_sharpe_gt_0:.4f}")

# Optional: test Sharpe > 1.0 by shifting returns so null Sharpe is ~1.0
# (This uses the *observed* volatility as a plug-in, which is an approximation.)
target_sharpe = 1.0
obs_vol = np.std(rets, ddof=1)
mu_target = (target_sharpe / np.sqrt(252)) * obs_vol  # daily mean that implies target Sharpe
rets_shifted_to_sh1 = rets - np.mean(rets) + mu_target

sim_sharpes_null1 = np.zeros(N_SIMS, dtype=float)
for i in range(N_SIMS):
    sim_rets1 = block_bootstrap(rets_shifted_to_sh1, n_rets, BLOCK_SIZE)
    sim_sharpes_null1[i] = sharpe(sim_rets1)

p_sharpe_gt_1 = np.mean(sim_sharpes_null1 >= true_sharpe)

print(f"p-value (H0: true Sharpe = {target_sharpe:.1f}, H1: Sharpe > {target_sharpe:.1f}): {p_sharpe_gt_1:.4f}")

# ============================================================
# SUMMARY STATS (unchanged)
# ============================================================

stats = {
    "CAGR_mean": np.mean(sim_cagrs),
    "CAGR_median": np.median(sim_cagrs),
    "CAGR_5pct": np.percentile(sim_cagrs, 5),
    "CAGR_95pct": np.percentile(sim_cagrs, 95),

    "Sharpe_mean": np.mean(sim_sharpes),
    "Sharpe_5pct": np.percentile(sim_sharpes, 5),
    "Sharpe_95pct": np.percentile(sim_sharpes, 95),

    "DD_mean": np.mean(sim_dds),
    "DD_5pct": np.percentile(sim_dds, 5),
    "DD_95pct": np.percentile(sim_dds, 95),

    "Prob_MaxDD_ge_50pct": sim_dd50.mean(),
    "Prob_CAGR_lt_0": (sim_cagrs < 0).mean(),
    "Prob_Sharpe_lt_1": (sim_sharpes < 1).mean(),
    "Prob_Sharpe_lt_0": (sim_sharpes < 0).mean(),
}

print("\n=== MONTE CARLO STATISTICS ===")
for k, v in stats.items():
    print(f"{k}: {v:.4f}")

print("\nPercentile of True CAGR:", (sim_cagrs <= true_cagr).mean())
print("Percentile of True Sharpe:", (sim_sharpes <= true_sharpe).mean())

print("\n=== DOLLAR DRAWDOWN ANALYSIS ===\n")
print(f"Median max $ drawdown: ${np.median(sim_max_dd_dollars):,.0f}")
print(f"95th pct max $ drawdown: ${np.percentile(sim_max_dd_dollars, 95):,.0f}")
print(f"Worst case max $ drawdown: ${sim_max_dd_dollars.max():,.0f}")

dd_vs_start = sim_max_dd_dollars / initial_capital
print(f"\nMedian DD vs start: {np.median(dd_vs_start)*100:.1f}%")
print(f"95th pct DD vs start: {np.percentile(dd_vs_start,95)*100:.1f}%")

late_bad_dd = [
    sim_max_dd_dollars[i]
    for i in range(N_SIMS)
    if sim_peak_equity[i] >= 1.6 * initial_capital
    and sim_max_dd_dollars[i] >= 0.4 * sim_peak_equity[i]
]
print(f"\nProb(≥40% DD after +60% growth): {len(late_bad_dd)/N_SIMS:.3f}")

worst_idx = int(np.argmax(sim_max_dd_dollars))
print("\n=== WORST MONTE CARLO DRAWDOWN PATH ===\n")
print(f"Peak Equity:   ${sim_peak_equity[worst_idx]:,.0f}")
print(f"Peak Step:     {sim_peak_step[worst_idx]}  (indexed date anchor: {pd.Timestamp(sim_peak_date_indexed[worst_idx]).date()})")
print(f"Trough Equity: ${sim_trough_equity[worst_idx]:,.0f}")
print(f"Trough Step:   {sim_trough_step[worst_idx]} (indexed date anchor: {pd.Timestamp(sim_trough_date_indexed[worst_idx]).date()})")
print(f"Max $ Drawdown: ${sim_max_dd_dollars[worst_idx]:,.0f}")

median_idx = int(np.argsort(sim_max_dd_dollars)[N_SIMS // 2])
print("\n=== MEDIAN MONTE CARLO DRAWDOWN PATH ===\n")
print(f"Peak Equity:   ${sim_peak_equity[median_idx]:,.0f}")
print(f"Peak Step:     {sim_peak_step[median_idx]}  (indexed date anchor: {pd.Timestamp(sim_peak_date_indexed[median_idx]).date()})")
print(f"Trough Equity: ${sim_trough_equity[median_idx]:,.0f}")
print(f"Trough Step:   {sim_trough_step[median_idx]} (indexed date anchor: {pd.Timestamp(sim_trough_date_indexed[median_idx]).date()})")
print(f"Max $ Drawdown: ${sim_max_dd_dollars[median_idx]:,.0f}")

# ============================================================
# SAVE CSV
# ============================================================

timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

results = pd.DataFrame({
    "sim_cagr": sim_cagrs,
    "sim_sharpe": sim_sharpes,
    "sim_maxdd": sim_dds,
    "prob_maxdd_ge_50pct": sim_dd50,
    "sim_sharpe_null_mean0": sim_sharpes_null0,
    "sim_sharpe_null_target1": sim_sharpes_null1,
})

out_csv = os.path.join(OUT_DIR, f"16-mc_results_{timestamp}.csv")
results.to_csv(out_csv, index=False)

print(f"\nSaved Monte Carlo results → {out_csv}")

# ============================================================
# PLOTS
# ============================================================

def save_hist(data, title, filename, true_value=None):
    plt.figure(figsize=(10, 5))
    plt.hist(data, bins=60, alpha=0.7)
    if true_value is not None:
        plt.axvline(true_value, color="red", linewidth=2, label="Actual")
        plt.legend()
    plt.title(title)
    plt.tight_layout()
    plt.savefig(os.path.join(OUT_DIR, filename))
    plt.close()

save_hist(sim_cagrs, "CAGR Distribution", f"mc_cagr_{timestamp}.png", true_cagr)
save_hist(sim_sharpes, "Sharpe Distribution", f"mc_sharpe_{timestamp}.png", true_sharpe)
save_hist(sim_dds, "Max Drawdown Distribution", f"mc_dd_{timestamp}.png", true_dd)
save_hist(sim_sharpes_null0, "Sharpe Null (mean=0) Distribution", f"mc_sharpe_null_mean0_{timestamp}.png", true_sharpe)

print("\nAll plots saved.")
print("\n=== COMPLETE ===")





=== Loading equity curve ===
Loaded 6789 return observations.

=== TRUE STRATEGY RESULTS ===
CAGR: 0.15575292794223938
Sharpe: 1.221383001257762
MaxDD: -0.19626913543183888

=== Running Monte Carlo simulations ===

=== SHARPE ESTIMATION UNCERTAINTY (bootstrap CI) ===
Sharpe 95% CI (2.5/50/97.5): 0.841, 1.224, 1.610

=== SHARPE SIGNIFICANCE (block-bootstrap null) ===
Observed Sharpe: 1.2214
p-value (H0: true Sharpe = 0, H1: Sharpe > 0): 0.0000
p-value (H0: true Sharpe = 1.0, H1: Sharpe > 1.0): 0.1264

=== MONTE CARLO STATISTICS ===
CAGR_mean: 0.1561
CAGR_median: 0.1557
CAGR_5pct: 0.1110
CAGR_95pct: 0.2036
Sharpe_mean: 1.2227
Sharpe_5pct: 0.9005
Sharpe_95pct: 1.5432
DD_mean: -0.2239
DD_5pct: -0.3138
DD_95pct: -0.1587
Prob_MaxDD_ge_50pct: 0.0001
Prob_CAGR_lt_0: 0.0000
Prob_Sharpe_lt_1: 0.1280
Prob_Sharpe_lt_0: 0.0000

Percentile of True CAGR: 0.5009
Percentile of True Sharpe: 0.497

=== DOLLAR DRAWDOWN ANALYSIS ===

Median max $ drawdown: $2,102,469
95th pct max $ drawdown: $6,528,527
Wor

In [2]:
import numpy as np
import pandas as pd

TRADING_DAYS = 252

def ols_alpha_beta(y, x):
    """y = alpha + beta*x + eps"""
    x = np.asarray(x, float)
    y = np.asarray(y, float)
    X = np.column_stack([np.ones(len(x)), x])
    # OLS: (X'X)^-1 X'y
    b = np.linalg.lstsq(X, y, rcond=None)[0]
    alpha, beta = b[0], b[1]
    return alpha, beta

def block_bootstrap_pairs(strat_ret, spy_ret, block_size, n_sims=10000, seed=123):
    rng = np.random.default_rng(seed)
    strat_ret = np.asarray(strat_ret, float)
    spy_ret   = np.asarray(spy_ret, float)
    n = len(strat_ret)
    assert len(spy_ret) == n
    if block_size > n:
        raise ValueError("block_size > number of observations")

    alphas = np.empty(n_sims, float)
    betas  = np.empty(n_sims, float)

    for i in range(n_sims):
        idx = []
        while len(idx) < n:
            start = rng.integers(0, n - block_size + 1)
            idx.extend(range(start, start + block_size))
        idx = np.array(idx[:n])

        y = strat_ret[idx]
        x = spy_ret[idx]
        a, b = ols_alpha_beta(y, x)
        alphas[i] = a
        betas[i]  = b

    return alphas, betas

# --- Load your data (edit paths) ---
EQUITY_FILE = "./13-trading_output_regression_insp500_spyfilter_cap15/13-equity_curve_regression_insp500_spyfilter_cap15.parquet"
SPY_FILE    = r"C:\TWS API\source\pythonclient\TradingIdeas\MomentumSystem\8-SPY_200DMA_market_regime\8-SPY_200DMA_regime.parquet"

eq = pd.read_parquet(EQUITY_FILE).copy()
eq["date"] = pd.to_datetime(eq["date"])
eq = eq.sort_values("date")
eq["strat_ret"] = eq["portfolio_value"].pct_change()
eq = eq.dropna(subset=["strat_ret"])

spy = pd.read_parquet(SPY_FILE).copy()
spy = spy.reset_index().rename(columns={"Date":"date", "index":"date"}) if "date" not in spy.columns else spy
spy["date"] = pd.to_datetime(spy["date"])
spy = spy.sort_values("date")
spy["spy_ret"] = spy["spy_close"].pct_change()

df = eq.merge(spy[["date","spy_ret"]], on="date", how="inner").dropna()
strat_ret = df["strat_ret"].to_numpy()
spy_ret   = df["spy_ret"].to_numpy()

# --- Observed alpha/beta ---
alpha_d, beta = ols_alpha_beta(strat_ret, spy_ret)
alpha_ann = alpha_d * TRADING_DAYS
print(f"Observed beta: {beta:.3f}")
print(f"Observed alpha (daily): {alpha_d:.6f}  | annualized: {alpha_ann:.4f}")

# --- Bootstrap uncertainty + significance ---
BLOCK_SIZE = 60
N_SIMS = 10000
alphas, betas = block_bootstrap_pairs(strat_ret, spy_ret, BLOCK_SIZE, N_SIMS)

alpha_ci = np.percentile(alphas, [2.5, 50, 97.5])
print("\n=== ALPHA UNCERTAINTY (paired block bootstrap) ===")
print(f"Alpha daily 95% CI: {alpha_ci[0]:.6f}, {alpha_ci[1]:.6f}, {alpha_ci[2]:.6f}")
print(f"Alpha annualized 95% CI: {(alpha_ci*TRADING_DAYS)[0]:.4f}, {(alpha_ci*TRADING_DAYS)[1]:.4f}, {(alpha_ci*TRADING_DAYS)[2]:.4f}")

# one-sided p-value for alpha > 0
p_alpha_gt0 = (alphas <= 0).mean()
print("\n=== ALPHA SIGNIFICANCE ===")
print(f"p-value (H0: alpha=0, H1: alpha>0): {p_alpha_gt0:.4f}")


Observed beta: 0.323
Observed alpha (daily): 0.000478  | annualized: 0.1206

=== ALPHA UNCERTAINTY (paired block bootstrap) ===
Alpha daily 95% CI: 0.000298, 0.000470, 0.000651
Alpha annualized 95% CI: 0.0750, 0.1185, 0.1641

=== ALPHA SIGNIFICANCE ===
p-value (H0: alpha=0, H1: alpha>0): 0.0000
