In [1]:
"""
Regime-Based Performance Analysis: Strategy vs SPY (v2)

Compares strategy performance against SPY during:
- Bull markets (SPY above 200-day MA, regime=1)
- Bear markets (SPY below 200-day MA, regime=0)

Key improvements over v1:
- Contiguous segment analysis (no artificial stitching)
- Correct excess return calculation: ann_return(strat - spy)
- Bootstrap on full series, then compute regime stats (preserves time structure)
- Regime lag option to avoid look-ahead bias
- Per-segment breakdown for bear/bull markets
"""

import pandas as pd
import numpy as np
from typing import Dict, List, Tuple
from dataclasses import dataclass

TRADING_DAYS = 252

# =========================
# CONFIG
# =========================
EQUITY_FILE = "./13b-wfo/walkforward_top136_equity_curve.csv"
SPY_PARQUET = "./8-SPY_200DMA_market_regime/8-SPY_200DMA_regime.parquet"

N_BOOT = 5000
BLOCK_LEN = 21  # Primary block length for bootstrap
BLOCK_LENS = [5, 10, 21, 42, 63]  # For sensitivity check (simple method)
SEED = 7

# Set to True if your strategy uses the regime signal for trading decisions
# This will lag the regime by 1 day to avoid look-ahead bias
LAG_REGIME = False


# =========================
# DATA CLASSES
# =========================
@dataclass
class SegmentStats:
    """Statistics for a contiguous regime segment."""
    start_date: pd.Timestamp
    end_date: pd.Timestamp
    n_days: int
    strat_total_return: float
    spy_total_return: float
    strat_ann_return: float
    spy_ann_return: float
    excess_total_return: float
    strat_max_dd: float
    spy_max_dd: float


# =========================
# HELPERS
# =========================
def ann_sharpe(r: np.ndarray) -> float:
    """Annualized Sharpe ratio (assuming zero risk-free rate)."""
    r = np.asarray(r, float)
    r = r[~np.isnan(r)]
    if r.size < 2:
        return np.nan
    sd = r.std(ddof=1)
    if sd <= 0 or np.isnan(sd):
        return 0.0
    return np.sqrt(TRADING_DAYS) * r.mean() / sd


def ann_return(r: np.ndarray) -> float:
    """Annualized return from daily returns."""
    r = np.asarray(r, float)
    r = r[~np.isnan(r)]
    if r.size < 2:
        return np.nan
    total = np.prod(1 + r) - 1
    years = r.size / TRADING_DAYS
    if years <= 0:
        return np.nan
    return (1 + total) ** (1 / years) - 1


def total_return(r: np.ndarray) -> float:
    """Total cumulative return from daily returns."""
    r = np.asarray(r, float)
    r = r[~np.isnan(r)]
    if r.size < 1:
        return np.nan
    return np.prod(1 + r) - 1


def ann_volatility(r: np.ndarray) -> float:
    """Annualized volatility from daily returns."""
    r = np.asarray(r, float)
    r = r[~np.isnan(r)]
    if r.size < 2:
        return np.nan
    return r.std(ddof=1) * np.sqrt(TRADING_DAYS)


def max_drawdown(r: np.ndarray) -> float:
    """Maximum drawdown from daily returns."""
    r = np.asarray(r, float)
    r = r[~np.isnan(r)]
    if r.size < 1:
        return np.nan
    cum = np.cumprod(1 + r)
    running_max = np.maximum.accumulate(cum)
    dd = (cum - running_max) / running_max
    return dd.min()


def win_rate(r: np.ndarray) -> float:
    """Percentage of positive return days."""
    r = np.asarray(r, float)
    r = r[~np.isnan(r)]
    if r.size < 1:
        return np.nan
    return np.mean(r > 0)


def find_contiguous_segments(df: pd.DataFrame, regime_col: str = "market_regime") -> List[Tuple[int, int, int]]:
    """
    Find contiguous segments of the same regime.
    Returns list of (start_idx, end_idx, regime_value) tuples.
    """
    segments = []
    if len(df) == 0:
        return segments
    
    regime = df[regime_col].values
    start_idx = 0
    current_regime = regime[0]
    
    for i in range(1, len(regime)):
        if regime[i] != current_regime:
            segments.append((start_idx, i - 1, int(current_regime)))
            start_idx = i
            current_regime = regime[i]
    
    # Don't forget the last segment
    segments.append((start_idx, len(regime) - 1, int(current_regime)))
    
    return segments


def compute_segment_stats(df: pd.DataFrame, start_idx: int, end_idx: int) -> SegmentStats:
    """Compute statistics for a single contiguous segment."""
    segment = df.iloc[start_idx:end_idx + 1]
    
    strat_ret = segment["strat_ret"].values
    spy_ret = segment["spy_ret"].values
    n_days = len(segment)
    years = n_days / TRADING_DAYS
    
    strat_total = total_return(strat_ret)
    spy_total = total_return(spy_ret)
    
    # Annualize only if segment is long enough
    if years > 0.1:  # At least ~25 days
        strat_ann = (1 + strat_total) ** (1 / years) - 1
        spy_ann = (1 + spy_total) ** (1 / years) - 1
    else:
        strat_ann = np.nan
        spy_ann = np.nan
    
    return SegmentStats(
        start_date=segment["date"].iloc[0],
        end_date=segment["date"].iloc[-1],
        n_days=n_days,
        strat_total_return=strat_total,
        spy_total_return=spy_total,
        strat_ann_return=strat_ann,
        spy_ann_return=spy_ann,
        excess_total_return=strat_total - spy_total,
        strat_max_dd=max_drawdown(strat_ret),
        spy_max_dd=max_drawdown(spy_ret),
    )


def block_bootstrap_idx(n: int, block_len: int, rng: np.random.Generator) -> np.ndarray:
    """
    Stationary-ish block bootstrap via concatenating random contiguous blocks.
    Falls back to IID bootstrap if block_len<=1 or block_len>n.
    """
    if n <= 0:
        return np.array([], dtype=int)

    if block_len <= 1 or block_len > n:
        return rng.integers(0, n, size=n, dtype=int)

    idx = []
    max_start = n - block_len
    while len(idx) < n:
        s = int(rng.integers(0, max_start + 1))
        idx.extend(range(s, s + block_len))
    return np.array(idx[:n], dtype=int)


def bootstrap_ir_simple(df: pd.DataFrame, n_boot: int = 5000, block_len: int = 21, seed: int = 7) -> np.ndarray:
    """
    Simple bootstrap distribution of IR = Sharpe(excess), where excess = strat_ret - spy_ret.
    This is the METHOD FROM THE ORIGINAL SCRIPT for comparison.
    """
    rng = np.random.default_rng(seed)
    ex = (df["strat_ret"] - df["spy_ret"]).to_numpy(dtype=float)
    ex = ex[~np.isnan(ex)]
    n = len(ex)
    if n < 2:
        return np.array([], dtype=float)

    dist = np.empty(n_boot, dtype=float)
    for i in range(n_boot):
        b = block_bootstrap_idx(n, block_len, rng)
        dist[i] = ann_sharpe(ex[b])
    return dist


def compute_regime_stats_from_returns(
    strat_ret: np.ndarray, 
    spy_ret: np.ndarray, 
    regime: np.ndarray
) -> Dict[str, Dict]:
    """
    Compute regime statistics from return arrays.
    Used for both point estimates and bootstrap samples.
    """
    excess_ret = strat_ret - spy_ret
    
    bull_mask = regime == 1
    bear_mask = regime == 0
    
    results = {}
    
    for name, mask in [("bull", bull_mask), ("bear", bear_mask), ("overall", np.ones(len(regime), dtype=bool))]:
        if mask.sum() < 2:
            results[name] = {
                "strat_sharpe": np.nan,
                "spy_sharpe": np.nan,
                "ir": np.nan,
                "excess_ann_return": np.nan,
                "strat_ann_return": np.nan,
                "spy_ann_return": np.nan,
            }
            continue
        
        s = strat_ret[mask]
        p = spy_ret[mask]
        e = excess_ret[mask]
        
        results[name] = {
            "strat_sharpe": ann_sharpe(s),
            "spy_sharpe": ann_sharpe(p),
            "ir": ann_sharpe(e),
            "excess_ann_return": ann_return(e),  # CORRECT: annualize the excess return stream
            "strat_ann_return": ann_return(s),
            "spy_ann_return": ann_return(p),
        }
    
    return results


def bootstrap_regime_stats(
    df: pd.DataFrame, 
    n_boot: int = 5000, 
    block_len: int = 21, 
    seed: int = 7
) -> Dict[str, np.ndarray]:
    """
    Bootstrap on the FULL contiguous series, then compute regime stats per sample.
    This preserves the time structure properly.
    
    Returns dict with keys like 'bull_ir', 'bear_ir', 'overall_ir', etc.
    """
    rng = np.random.default_rng(seed)
    
    strat_ret = df["strat_ret"].to_numpy(dtype=float)
    spy_ret = df["spy_ret"].to_numpy(dtype=float)
    regime = df["market_regime"].to_numpy(dtype=int)
    
    n = len(df)
    
    # Initialize result arrays
    metrics = ["ir", "strat_sharpe", "spy_sharpe", "excess_ann_return"]
    regimes = ["overall", "bull", "bear"]
    
    results = {f"{r}_{m}": np.empty(n_boot, dtype=float) for r in regimes for m in metrics}
    
    for i in range(n_boot):
        idx = block_bootstrap_idx(n, block_len, rng)
        
        # Resample all arrays with the same indices (preserves alignment)
        s_boot = strat_ret[idx]
        p_boot = spy_ret[idx]
        r_boot = regime[idx]
        
        stats = compute_regime_stats_from_returns(s_boot, p_boot, r_boot)
        
        for reg in regimes:
            for met in metrics:
                results[f"{reg}_{met}"][i] = stats[reg][met]
    
    return results


def print_segment_summary(segments: List[SegmentStats], regime_name: str):
    """Print summary of contiguous segments for a regime."""
    if not segments:
        print(f"\n  No {regime_name} segments found.")
        return
    
    print(f"\n  Found {len(segments)} contiguous {regime_name} segment(s):")
    print(f"  {'#':<4} {'Start':<12} {'End':<12} {'Days':<6} {'Strat Tot':<12} {'SPY Tot':<12} {'Excess':<12} {'Strat DD':<10} {'SPY DD':<10}")
    print(f"  {'-'*4} {'-'*12} {'-'*12} {'-'*6} {'-'*12} {'-'*12} {'-'*12} {'-'*10} {'-'*10}")
    
    for i, seg in enumerate(segments, 1):
        print(
            f"  {i:<4} "
            f"{seg.start_date.strftime('%Y-%m-%d'):<12} "
            f"{seg.end_date.strftime('%Y-%m-%d'):<12} "
            f"{seg.n_days:<6} "
            f"{seg.strat_total_return*100:>10.2f}% "
            f"{seg.spy_total_return*100:>10.2f}% "
            f"{seg.excess_total_return*100:>10.2f}% "
            f"{seg.strat_max_dd*100:>8.2f}% "
            f"{seg.spy_max_dd*100:>8.2f}%"
        )
    
    # Summary statistics across segments
    print(f"\n  Summary across {regime_name} segments:")
    
    excess_returns = [s.excess_total_return for s in segments]
    strat_dds = [s.strat_max_dd for s in segments]
    spy_dds = [s.spy_max_dd for s in segments]
    
    print(f"    Excess return: mean={np.mean(excess_returns)*100:.2f}%, median={np.median(excess_returns)*100:.2f}%")
    print(f"    Strategy beat SPY in {sum(1 for e in excess_returns if e > 0)}/{len(segments)} segments ({sum(1 for e in excess_returns if e > 0)/len(segments)*100:.1f}%)")
    print(f"    Strategy max DD: worst={min(strat_dds)*100:.2f}%, median={np.median(strat_dds)*100:.2f}%")
    print(f"    SPY max DD: worst={min(spy_dds)*100:.2f}%, median={np.median(spy_dds)*100:.2f}%")


def print_conditional_stats(df: pd.DataFrame, regime_name: str, regime_value: int):
    """Print conditional statistics (average behavior on days with this regime)."""
    subset = df[df["market_regime"] == regime_value]
    
    if len(subset) < 2:
        print(f"\n  Insufficient data for {regime_name} regime.")
        return
    
    strat_ret = subset["strat_ret"].to_numpy()
    spy_ret = subset["spy_ret"].to_numpy()
    excess_ret = strat_ret - spy_ret
    
    print(f"\n  Conditional Statistics ({regime_name} days, n={len(subset):,}):")
    print(f"  NOTE: These are 'conditional on regime' stats, not tradable regime returns.")
    print(f"  ")
    print(f"  {'Metric':<30} {'Strategy':>15} {'SPY':>15} {'Diff':>15}")
    print(f"  {'-'*30} {'-'*15} {'-'*15} {'-'*15}")
    
    # Daily stats (more meaningful for conditional analysis)
    print(f"  {'Mean daily return':<30} {strat_ret.mean()*100:>14.4f}% {spy_ret.mean()*100:>14.4f}% {excess_ret.mean()*100:>14.4f}%")
    print(f"  {'Daily volatility':<30} {strat_ret.std()*100:>14.4f}% {spy_ret.std()*100:>14.4f}% {(strat_ret.std()-spy_ret.std())*100:>14.4f}%")
    print(f"  {'Win rate':<30} {win_rate(strat_ret)*100:>14.2f}% {win_rate(spy_ret)*100:>14.2f}% {(win_rate(strat_ret)-win_rate(spy_ret))*100:>14.2f}%")
    print(f"  ")
    
    # Annualized (with caveat)
    print(f"  {'Ann. Sharpe (conditional)':<30} {ann_sharpe(strat_ret):>15.3f} {ann_sharpe(spy_ret):>15.3f} {ann_sharpe(strat_ret)-ann_sharpe(spy_ret):>15.3f}")
    print(f"  {'IR (Sharpe of excess)':<30} {ann_sharpe(excess_ret):>15.3f}")
    print(f"  {'Ann. excess return':<30} {ann_return(excess_ret)*100:>14.2f}%")


def print_bootstrap_results(boot_results: Dict[str, np.ndarray], regime: str, label: str):
    """Print bootstrap results for a regime."""
    ir_dist = boot_results[f"{regime}_ir"]
    ir_dist = ir_dist[~np.isnan(ir_dist)]
    
    if len(ir_dist) < 100:
        print(f"\n  Bootstrap results for {label}: insufficient valid samples.")
        return
    
    ci = np.percentile(ir_dist, [2.5, 97.5])
    point_ir = np.median(ir_dist)
    
    p_one_sided = np.mean(ir_dist <= 0.0)
    p_two_sided = 2 * min(p_one_sided, 1 - p_one_sided)
    
    ci_excludes_zero = (ci[0] > 0) or (ci[1] < 0)
    sig_marker = "Yes **" if ci_excludes_zero else "No"
    
    print(f"\n  Bootstrap Results for {label} (n_boot={len(ir_dist)}, block_len={BLOCK_LEN}):")
    print(f"    IR point estimate (median): {point_ir:.3f}")
    print(f"    95% CI: [{ci[0]:.3f}, {ci[1]:.3f}]")
    print(f"    One-sided p-value (H0: IR ≤ 0): {p_one_sided:.4f}")
    print(f"    Two-sided p-value (H0: IR = 0): {p_two_sided:.4f}")
    print(f"    Significant at α=0.05 (two-sided): {sig_marker}")


# =========================
# MAIN ANALYSIS
# =========================
if __name__ == "__main__":
    # --- Load equity curve ---
    # Handle both CSV and parquet formats
    if EQUITY_FILE.endswith(".csv"):
        eq = pd.read_csv(EQUITY_FILE, parse_dates=["date"]).copy()
    else:
        eq = pd.read_parquet(EQUITY_FILE).copy()
        # Handle index vs column for date
        if "date" not in eq.columns and eq.index.name == "date":
            eq = eq.reset_index()
        elif "date" not in eq.columns and "Date" in eq.columns:
            eq = eq.rename(columns={"Date": "date"})
    
    eq["date"] = pd.to_datetime(eq["date"])
    eq = eq.sort_values("date").drop_duplicates("date")
    
    # Handle different column names for equity value
    if "equity" in eq.columns:
        eq["strat_ret"] = eq["equity"].pct_change().fillna(0.0)
    elif "portfolio_value" in eq.columns:
        eq["strat_ret"] = eq["portfolio_value"].pct_change().fillna(0.0)
    else:
        raise ValueError(f"Could not find equity column. Available: {eq.columns.tolist()}")

    # --- Load SPY file with market regime ---
    spy = pd.read_parquet(SPY_PARQUET).copy()
    spy = spy.reset_index().rename(columns={"Date": "date", "index": "date"})
    spy["date"] = pd.to_datetime(spy["date"])
    spy = spy.sort_values("date")
    spy["spy_ret"] = spy["spy_close"].pct_change().fillna(0.0)
    
    # --- Optional: Lag regime to avoid look-ahead bias ---
    if LAG_REGIME:
        spy["market_regime"] = spy["market_regime"].shift(1).fillna(0).astype(int)
        print("NOTE: Regime lagged by 1 day to avoid look-ahead bias.\n")

    # --- Align on common dates ---
    df = eq.merge(spy[["date", "spy_ret", "market_regime"]], on="date", how="inner").dropna()
    df = df.sort_values("date").reset_index(drop=True)

    if len(df) < 2:
        raise RuntimeError("Not enough aligned data points between equity curve and SPY.")

    # --- Compute excess returns (CORRECT way) ---
    df["excess_ret"] = df["strat_ret"] - df["spy_ret"]

    # =========================
    # HEADER
    # =========================
    print("=" * 80)
    print(" REGIME-BASED PERFORMANCE ANALYSIS: STRATEGY vs SPY (v2)")
    print("=" * 80)
    print(f"\nData range: {df['date'].min().strftime('%Y-%m-%d')} to {df['date'].max().strftime('%Y-%m-%d')}")
    print(f"Total aligned days: {len(df):,} ({len(df)/TRADING_DAYS:.2f} years)")
    
    n_bull = (df["market_regime"] == 1).sum()
    n_bear = (df["market_regime"] == 0).sum()
    print(f"Bull market days (SPY > 200 DMA): {n_bull:,} ({n_bull/len(df)*100:.1f}%)")
    print(f"Bear market days (SPY < 200 DMA): {n_bear:,} ({n_bear/len(df)*100:.1f}%)")
    
    if LAG_REGIME:
        print("\n⚠️  REGIME LAGGED BY 1 DAY (look-ahead bias prevention enabled)")

    # =========================
    # OVERALL STATISTICS (Full Series)
    # =========================
    print("\n" + "=" * 80)
    print(" SECTION 1: OVERALL STATISTICS (Full Contiguous Series)")
    print("=" * 80)
    
    strat_ret = df["strat_ret"].to_numpy()
    spy_ret = df["spy_ret"].to_numpy()
    excess_ret = df["excess_ret"].to_numpy()
    
    print(f"\n  {'Metric':<30} {'Strategy':>15} {'SPY':>15} {'Diff':>15}")
    print(f"  {'-'*30} {'-'*15} {'-'*15} {'-'*15}")
    print(f"  {'Ann. Return':<30} {ann_return(strat_ret)*100:>14.2f}% {ann_return(spy_ret)*100:>14.2f}% {ann_return(excess_ret)*100:>14.2f}%")
    print(f"  {'Ann. Volatility':<30} {ann_volatility(strat_ret)*100:>14.2f}% {ann_volatility(spy_ret)*100:>14.2f}% {ann_volatility(excess_ret)*100:>14.2f}%")
    print(f"  {'Sharpe Ratio':<30} {ann_sharpe(strat_ret):>15.3f} {ann_sharpe(spy_ret):>15.3f} {ann_sharpe(strat_ret)-ann_sharpe(spy_ret):>15.3f}")
    print(f"  {'Max Drawdown':<30} {max_drawdown(strat_ret)*100:>14.2f}% {max_drawdown(spy_ret)*100:>14.2f}% {(max_drawdown(strat_ret)-max_drawdown(spy_ret))*100:>14.2f}%")
    print(f"  {'Win Rate':<30} {win_rate(strat_ret)*100:>14.2f}% {win_rate(spy_ret)*100:>14.2f}% {(win_rate(strat_ret)-win_rate(spy_ret))*100:>14.2f}%")
    print(f"  ")
    print(f"  {'Information Ratio':<30} {ann_sharpe(excess_ret):>15.3f}")
    print(f"  {'Ann. Excess Return (correct)':<30} {ann_return(excess_ret)*100:>14.2f}%")

    # =========================
    # CONTIGUOUS SEGMENT ANALYSIS
    # =========================
    print("\n" + "=" * 80)
    print(" SECTION 2: CONTIGUOUS SEGMENT ANALYSIS")
    print("=" * 80)
    print("\n  This analyzes each continuous bull/bear period separately,")
    print("  avoiding the pitfalls of stitching non-contiguous days together.")
    
    segments = find_contiguous_segments(df)
    
    bull_segments = []
    bear_segments = []
    
    for start_idx, end_idx, regime_val in segments:
        stats = compute_segment_stats(df, start_idx, end_idx)
        if regime_val == 1:
            bull_segments.append(stats)
        else:
            bear_segments.append(stats)
    
    print("\n" + "-" * 80)
    print(" BEAR MARKET SEGMENTS (SPY < 200 DMA)")
    print("-" * 80)
    print_segment_summary(bear_segments, "bear")
    
    print("\n" + "-" * 80)
    print(" BULL MARKET SEGMENTS (SPY > 200 DMA)")
    print("-" * 80)
    # Only show summary for bull (too many segments usually)
    if bull_segments:
        print(f"\n  Found {len(bull_segments)} contiguous bull segment(s).")
        excess_returns = [s.excess_total_return for s in bull_segments]
        strat_dds = [s.strat_max_dd for s in bull_segments]
        print(f"\n  Summary across bull segments:")
        print(f"    Excess return: mean={np.mean(excess_returns)*100:.2f}%, median={np.median(excess_returns)*100:.2f}%")
        print(f"    Strategy beat SPY in {sum(1 for e in excess_returns if e > 0)}/{len(bull_segments)} segments ({sum(1 for e in excess_returns if e > 0)/len(bull_segments)*100:.1f}%)")
        print(f"    Strategy max DD: worst={min(strat_dds)*100:.2f}%, median={np.median(strat_dds)*100:.2f}%")

    # =========================
    # CONDITIONAL STATISTICS (for reference)
    # =========================
    print("\n" + "=" * 80)
    print(" SECTION 3: CONDITIONAL STATISTICS (Days Labeled as Bull/Bear)")
    print("=" * 80)
    print("\n  These stats describe behavior conditional on regime, NOT tradable returns.")
    
    print_conditional_stats(df, "Bull", 1)
    print_conditional_stats(df, "Bear", 0)

    # =========================
    # BOOTSTRAP ANALYSIS - SIMPLE METHOD (Original Script)
    # =========================
    print("\n" + "=" * 80)
    print(" SECTION 4A: SIMPLE BOOTSTRAP (Original Method - For Comparison)")
    print("=" * 80)
    print("\n  This uses the SIMPLE method from the original script:")
    print("  - Bootstrap the excess returns directly")
    print("  - Compute Sharpe(excess) on each bootstrap sample")
    print("  - Does NOT preserve regime structure")
    
    # Point estimates (same as original script)
    ir_point = ann_sharpe(df["strat_ret"] - df["spy_ret"])
    sd_point = ann_sharpe(df["strat_ret"]) - ann_sharpe(df["spy_ret"])
    
    print(f"\n  IR point estimate (Sharpe of excess): {ir_point:.3f}")
    print(f"  Sharpe(strat) - Sharpe(SPY): {sd_point:.3f}")
    
    print("\n  Block bootstrap sensitivity (IR > 0 one-sided test):")
    print(f"  {'Block':<8} {'95% CI':<24} {'p(IR<=0)':<12} {'p (2-sided)':<12} {'Sig?':<8}")
    print(f"  {'-'*8} {'-'*24} {'-'*12} {'-'*12} {'-'*8}")
    
    for bl in BLOCK_LENS:
        dist_ir = bootstrap_ir_simple(df, n_boot=N_BOOT, block_len=bl, seed=SEED)
        if dist_ir.size == 0:
            print(f"  {bl:>6}: not enough data to bootstrap.")
            continue

        ci = np.percentile(dist_ir, [2.5, 97.5])
        p_one_sided = np.mean(dist_ir <= 0.0)  # H0: IR <= 0
        p_two_sided = 2 * min(p_one_sided, 1 - p_one_sided)
        ci_excludes_zero = (ci[0] > 0) or (ci[1] < 0)
        sig_marker = "Yes **" if ci_excludes_zero else "No"

        print(
            f"  {bl:>6}   [{ci[0]: .3f}, {ci[1]: .3f}]       "
            f"{p_one_sided:.4f}       {p_two_sided:.4f}       {sig_marker}"
        )

    # =========================
    # BOOTSTRAP ANALYSIS - REGIME METHOD (Correct Method)
    # =========================
    print("\n" + "=" * 80)
    print(" SECTION 4B: REGIME-AWARE BOOTSTRAP (v2 Method)")
    print("=" * 80)
    print("\n  Bootstrap performed on FULL contiguous series, then regime stats computed.")
    print("  This preserves the time-series dependence structure correctly.")
    
    print("\n  Running bootstrap (this may take a moment)...")
    boot_results = bootstrap_regime_stats(df, n_boot=N_BOOT, block_len=BLOCK_LEN, seed=SEED)
    
    print_bootstrap_results(boot_results, "overall", "Overall")
    print_bootstrap_results(boot_results, "bull", "Bull Market")
    print_bootstrap_results(boot_results, "bear", "Bear Market")
    
    # =========================
    # COMPARISON OF METHODS
    # =========================
    print("\n" + "=" * 80)
    print(" SECTION 4C: METHOD COMPARISON - WHY THE DISCREPANCY?")
    print("=" * 80)
    
    # Get the simple method CI at block_len=21 for direct comparison
    dist_simple = bootstrap_ir_simple(df, n_boot=N_BOOT, block_len=BLOCK_LEN, seed=SEED)
    ci_simple = np.percentile(dist_simple, [2.5, 97.5])
    p_simple_one = np.mean(dist_simple <= 0.0)
    p_simple_two = 2 * min(p_simple_one, 1 - p_simple_one)
    
    # Get the regime method overall CI
    overall_dist = boot_results["overall_ir"][~np.isnan(boot_results["overall_ir"])]
    ci_regime = np.percentile(overall_dist, [2.5, 97.5])
    p_regime_one = np.mean(overall_dist <= 0.0)
    p_regime_two = 2 * min(p_regime_one, 1 - p_regime_one)
    
    print(f"\n  Comparison at block_len={BLOCK_LEN}:")
    print(f"  {'Method':<25} {'IR':<10} {'95% CI':<24} {'p (1-side)':<12} {'p (2-side)':<12}")
    print(f"  {'-'*25} {'-'*10} {'-'*24} {'-'*12} {'-'*12}")
    print(f"  {'Simple (excess only)':<25} {ir_point:<10.3f} [{ci_simple[0]:.3f}, {ci_simple[1]:.3f}]       {p_simple_one:.4f}       {p_simple_two:.4f}")
    print(f"  {'Regime-aware (overall)':<25} {np.median(overall_dist):<10.3f} [{ci_regime[0]:.3f}, {ci_regime[1]:.3f}]       {p_regime_one:.4f}       {p_regime_two:.4f}")
    
    print(f"""
  EXPLANATION OF DISCREPANCY:
  
  Both methods bootstrap the SAME underlying data, but:
  
  1. SIMPLE METHOD: 
     - Resamples excess returns (strat - spy) directly
     - Computes Sharpe of the resampled excess returns
     - Each bootstrap sample has the SAME number of observations
     
  2. REGIME-AWARE METHOD:
     - Resamples the full series (strat, spy, AND regime labels together)
     - Then computes regime-specific stats on each bootstrap sample
     - The "overall" IR should theoretically match, but...
     - Regime proportions can vary between bootstrap samples
     
  The discrepancy likely comes from:
  - Random seed handling differences between the two bootstrap loops
  - The regime method resamples regime labels too, which can slightly 
    change the effective weighting
  - Numerical precision in how returns are combined
  
  For OVERALL IR, the SIMPLE method is actually more direct and appropriate.
  The REGIME method is designed for comparing bull vs bear performance.
""")
    

    # =========================
    # KEY FINDINGS SUMMARY
    # =========================
    print("\n" + "=" * 80)
    print(" SECTION 5: KEY FINDINGS SUMMARY")
    print("=" * 80)
    
    # Overall
    overall_ir = ann_sharpe(excess_ret)
    overall_ci = np.percentile(boot_results["overall_ir"][~np.isnan(boot_results["overall_ir"])], [2.5, 97.5])
    overall_sig = "YES" if (overall_ci[0] > 0 or overall_ci[1] < 0) else "NO"
    
    # Bull
    bull_mask = df["market_regime"] == 1
    bull_ir = ann_sharpe(excess_ret[bull_mask])
    bull_ci = np.percentile(boot_results["bull_ir"][~np.isnan(boot_results["bull_ir"])], [2.5, 97.5])
    bull_sig = "YES" if (bull_ci[0] > 0 or bull_ci[1] < 0) else "NO"
    
    # Bear
    bear_mask = df["market_regime"] == 0
    bear_ir = ann_sharpe(excess_ret[bear_mask])
    bear_ci = np.percentile(boot_results["bear_ir"][~np.isnan(boot_results["bear_ir"])], [2.5, 97.5])
    bear_sig = "YES" if (bear_ci[0] > 0 or bear_ci[1] < 0) else "NO"
    
    print(f"\n  {'Regime':<15} {'IR':<10} {'95% CI':<25} {'Significant?':<15}")
    print(f"  {'-'*15} {'-'*10} {'-'*25} {'-'*15}")
    print(f"  {'Overall':<15} {overall_ir:<10.3f} [{overall_ci[0]:.3f}, {overall_ci[1]:.3f}]{'':>10} {overall_sig:<15}")
    print(f"  {'Bull':<15} {bull_ir:<10.3f} [{bull_ci[0]:.3f}, {bull_ci[1]:.3f}]{'':>10} {bull_sig:<15}")
    print(f"  {'Bear':<15} {bear_ir:<10.3f} [{bear_ci[0]:.3f}, {bear_ci[1]:.3f}]{'':>10} {bear_sig:<15}")
    
    # Segment-based insights
    if bear_segments:
        bear_wins = sum(1 for s in bear_segments if s.excess_total_return > 0)
        bear_total = len(bear_segments)
        print(f"\n  Bear market segment win rate: {bear_wins}/{bear_total} ({bear_wins/bear_total*100:.1f}%)")
        
        worst_bear_strat = min(s.strat_total_return for s in bear_segments)
        worst_bear_spy = min(s.spy_total_return for s in bear_segments)
        print(f"  Worst bear segment: Strategy {worst_bear_strat*100:.1f}% vs SPY {worst_bear_spy*100:.1f}%")
    
    print("\n" + "-" * 80)
    print(" INTERPRETATION NOTES:")
    print("-" * 80)
    print("""
  1. OVERALL IR: Tests whether strategy has risk-adjusted alpha vs SPY across
     the full time period. This is the most reliable test.
  
  2. REGIME IRs: These test conditional performance, but interpretation requires
     care - the bootstrap resamples the full series, so regime proportions may
     vary across samples.
  
  3. SEGMENT ANALYSIS: Shows actual performance in each contiguous bear/bull
     market. This is the most intuitive way to see "what happened in 2008" etc.
  
  4. If you trade based on the regime signal, enable LAG_REGIME=True to avoid
     look-ahead bias in this analysis.
""")
    
    print("\nAnalysis complete.")

 REGIME-BASED PERFORMANCE ANALYSIS: STRATEGY vs SPY (v2)

Data range: 2001-01-02 to 2022-12-30
Total aligned days: 4,024 (15.97 years)
Bull market days (SPY > 200 DMA): 3,014 (74.9%)
Bear market days (SPY < 200 DMA): 1,010 (25.1%)

 SECTION 1: OVERALL STATISTICS (Full Contiguous Series)

  Metric                                Strategy             SPY            Diff
  ------------------------------ --------------- --------------- ---------------
  Ann. Return                             17.89%          10.74%           4.84%
  Ann. Volatility                         15.80%          18.13%          15.12%
  Sharpe Ratio                             1.121           0.654           0.468
  Max Drawdown                           -28.21%         -33.72%           5.51%
  Win Rate                                48.31%          54.62%          -6.31%
  
  Information Ratio                        0.388
  Ann. Excess Return (correct)             4.84%

 SECTION 2: CONTIGUOUS SEGMENT ANALYSIS

 