In [5]:
"""
Regime-Based Performance Analysis: Strategy vs SPY (v2)

Compares strategy performance against SPY during:
- Bull markets (SPY above 200-day MA, regime=1)
- Bear markets (SPY below 200-day MA, regime=0)

Key improvements over v1:
- Contiguous segment analysis (no artificial stitching)
- Correct excess return calculation: ann_return(strat - spy)
- Bootstrap on full series, then compute regime stats (preserves time structure)
- Regime lag option to avoid look-ahead bias
- Per-segment breakdown for bear/bull markets
"""

import pandas as pd
import numpy as np
from typing import Dict, List, Tuple
from dataclasses import dataclass

TRADING_DAYS = 252

# =========================
# CONFIG
# =========================
EQUITY_FILE = "./13-trading_output_regression_insp500_spyfilter_cap15/13-equity_curve_regression_insp500_spyfilter_cap15.parquet"
SPY_PARQUET = "./8-SPY_200DMA_market_regime/8-SPY_200DMA_regime.parquet"

N_BOOT = 5000
BLOCK_LEN = 21  # Primary block length for bootstrap
SEED = 7

# Set to True if your strategy uses the regime signal for trading decisions
# This will lag the regime by 1 day to avoid look-ahead bias
LAG_REGIME = True


# =========================
# DATA CLASSES
# =========================
@dataclass
class SegmentStats:
    """Statistics for a contiguous regime segment."""
    start_date: pd.Timestamp
    end_date: pd.Timestamp
    n_days: int
    strat_total_return: float
    spy_total_return: float
    strat_ann_return: float
    spy_ann_return: float
    excess_total_return: float
    strat_max_dd: float
    spy_max_dd: float


# =========================
# HELPERS
# =========================
def ann_sharpe(r: np.ndarray) -> float:
    """Annualized Sharpe ratio (assuming zero risk-free rate)."""
    r = np.asarray(r, float)
    r = r[~np.isnan(r)]
    if r.size < 2:
        return np.nan
    sd = r.std(ddof=1)
    if sd <= 0 or np.isnan(sd):
        return 0.0
    return np.sqrt(TRADING_DAYS) * r.mean() / sd


def ann_return(r: np.ndarray) -> float:
    """Annualized return from daily returns."""
    r = np.asarray(r, float)
    r = r[~np.isnan(r)]
    if r.size < 2:
        return np.nan
    total = np.prod(1 + r) - 1
    years = r.size / TRADING_DAYS
    if years <= 0:
        return np.nan
    return (1 + total) ** (1 / years) - 1


def total_return(r: np.ndarray) -> float:
    """Total cumulative return from daily returns."""
    r = np.asarray(r, float)
    r = r[~np.isnan(r)]
    if r.size < 1:
        return np.nan
    return np.prod(1 + r) - 1


def ann_volatility(r: np.ndarray) -> float:
    """Annualized volatility from daily returns."""
    r = np.asarray(r, float)
    r = r[~np.isnan(r)]
    if r.size < 2:
        return np.nan
    return r.std(ddof=1) * np.sqrt(TRADING_DAYS)


def max_drawdown(r: np.ndarray) -> float:
    """Maximum drawdown from daily returns."""
    r = np.asarray(r, float)
    r = r[~np.isnan(r)]
    if r.size < 1:
        return np.nan
    cum = np.cumprod(1 + r)
    running_max = np.maximum.accumulate(cum)
    dd = (cum - running_max) / running_max
    return dd.min()


def win_rate(r: np.ndarray) -> float:
    """Percentage of positive return days."""
    r = np.asarray(r, float)
    r = r[~np.isnan(r)]
    if r.size < 1:
        return np.nan
    return np.mean(r > 0)


def find_contiguous_segments(df: pd.DataFrame, regime_col: str = "market_regime") -> List[Tuple[int, int, int]]:
    """
    Find contiguous segments of the same regime.
    Returns list of (start_idx, end_idx, regime_value) tuples.
    """
    segments = []
    if len(df) == 0:
        return segments
    
    regime = df[regime_col].values
    start_idx = 0
    current_regime = regime[0]
    
    for i in range(1, len(regime)):
        if regime[i] != current_regime:
            segments.append((start_idx, i - 1, int(current_regime)))
            start_idx = i
            current_regime = regime[i]
    
    # Don't forget the last segment
    segments.append((start_idx, len(regime) - 1, int(current_regime)))
    
    return segments


def compute_segment_stats(df: pd.DataFrame, start_idx: int, end_idx: int) -> SegmentStats:
    """Compute statistics for a single contiguous segment."""
    segment = df.iloc[start_idx:end_idx + 1]
    
    strat_ret = segment["strat_ret"].values
    spy_ret = segment["spy_ret"].values
    n_days = len(segment)
    years = n_days / TRADING_DAYS
    
    strat_total = total_return(strat_ret)
    spy_total = total_return(spy_ret)
    
    # Annualize only if segment is long enough
    if years > 0.1:  # At least ~25 days
        strat_ann = (1 + strat_total) ** (1 / years) - 1
        spy_ann = (1 + spy_total) ** (1 / years) - 1
    else:
        strat_ann = np.nan
        spy_ann = np.nan
    
    return SegmentStats(
        start_date=segment["date"].iloc[0],
        end_date=segment["date"].iloc[-1],
        n_days=n_days,
        strat_total_return=strat_total,
        spy_total_return=spy_total,
        strat_ann_return=strat_ann,
        spy_ann_return=spy_ann,
        excess_total_return=strat_total - spy_total,
        strat_max_dd=max_drawdown(strat_ret),
        spy_max_dd=max_drawdown(spy_ret),
    )


def block_bootstrap_idx(n: int, block_len: int, rng: np.random.Generator) -> np.ndarray:
    """
    Stationary-ish block bootstrap via concatenating random contiguous blocks.
    Falls back to IID bootstrap if block_len<=1 or block_len>n.
    """
    if n <= 0:
        return np.array([], dtype=int)

    if block_len <= 1 or block_len > n:
        return rng.integers(0, n, size=n, dtype=int)

    idx = []
    max_start = n - block_len
    while len(idx) < n:
        s = int(rng.integers(0, max_start + 1))
        idx.extend(range(s, s + block_len))
    return np.array(idx[:n], dtype=int)


def compute_regime_stats_from_returns(
    strat_ret: np.ndarray, 
    spy_ret: np.ndarray, 
    regime: np.ndarray
) -> Dict[str, Dict]:
    """
    Compute regime statistics from return arrays.
    Used for both point estimates and bootstrap samples.
    """
    excess_ret = strat_ret - spy_ret
    
    bull_mask = regime == 1
    bear_mask = regime == 0
    
    results = {}
    
    for name, mask in [("bull", bull_mask), ("bear", bear_mask), ("overall", np.ones(len(regime), dtype=bool))]:
        if mask.sum() < 2:
            results[name] = {
                "strat_sharpe": np.nan,
                "spy_sharpe": np.nan,
                "ir": np.nan,
                "excess_ann_return": np.nan,
                "strat_ann_return": np.nan,
                "spy_ann_return": np.nan,
            }
            continue
        
        s = strat_ret[mask]
        p = spy_ret[mask]
        e = excess_ret[mask]
        
        results[name] = {
            "strat_sharpe": ann_sharpe(s),
            "spy_sharpe": ann_sharpe(p),
            "ir": ann_sharpe(e),
            "excess_ann_return": ann_return(e),  # CORRECT: annualize the excess return stream
            "strat_ann_return": ann_return(s),
            "spy_ann_return": ann_return(p),
        }
    
    return results


def bootstrap_regime_stats(
    df: pd.DataFrame, 
    n_boot: int = 5000, 
    block_len: int = 21, 
    seed: int = 7
) -> Dict[str, np.ndarray]:
    """
    Bootstrap on the FULL contiguous series, then compute regime stats per sample.
    This preserves the time structure properly.
    
    Returns dict with keys like 'bull_ir', 'bear_ir', 'overall_ir', etc.
    """
    rng = np.random.default_rng(seed)
    
    strat_ret = df["strat_ret"].to_numpy(dtype=float)
    spy_ret = df["spy_ret"].to_numpy(dtype=float)
    regime = df["market_regime"].to_numpy(dtype=int)
    
    n = len(df)
    
    # Initialize result arrays
    metrics = ["ir", "strat_sharpe", "spy_sharpe", "excess_ann_return"]
    regimes = ["overall", "bull", "bear"]
    
    results = {f"{r}_{m}": np.empty(n_boot, dtype=float) for r in regimes for m in metrics}
    
    for i in range(n_boot):
        idx = block_bootstrap_idx(n, block_len, rng)
        
        # Resample all arrays with the same indices (preserves alignment)
        s_boot = strat_ret[idx]
        p_boot = spy_ret[idx]
        r_boot = regime[idx]
        
        stats = compute_regime_stats_from_returns(s_boot, p_boot, r_boot)
        
        for reg in regimes:
            for met in metrics:
                results[f"{reg}_{met}"][i] = stats[reg][met]
    
    return results


def print_segment_summary(segments: List[SegmentStats], regime_name: str):
    """Print summary of contiguous segments for a regime."""
    if not segments:
        print(f"\n  No {regime_name} segments found.")
        return
    
    print(f"\n  Found {len(segments)} contiguous {regime_name} segment(s):")
    print(f"  {'#':<4} {'Start':<12} {'End':<12} {'Days':<6} {'Strat Tot':<12} {'SPY Tot':<12} {'Excess':<12} {'Strat DD':<10} {'SPY DD':<10}")
    print(f"  {'-'*4} {'-'*12} {'-'*12} {'-'*6} {'-'*12} {'-'*12} {'-'*12} {'-'*10} {'-'*10}")
    
    for i, seg in enumerate(segments, 1):
        print(
            f"  {i:<4} "
            f"{seg.start_date.strftime('%Y-%m-%d'):<12} "
            f"{seg.end_date.strftime('%Y-%m-%d'):<12} "
            f"{seg.n_days:<6} "
            f"{seg.strat_total_return*100:>10.2f}% "
            f"{seg.spy_total_return*100:>10.2f}% "
            f"{seg.excess_total_return*100:>10.2f}% "
            f"{seg.strat_max_dd*100:>8.2f}% "
            f"{seg.spy_max_dd*100:>8.2f}%"
        )
    
    # Summary statistics across segments
    print(f"\n  Summary across {regime_name} segments:")
    
    excess_returns = [s.excess_total_return for s in segments]
    strat_dds = [s.strat_max_dd for s in segments]
    spy_dds = [s.spy_max_dd for s in segments]
    
    print(f"    Excess return: mean={np.mean(excess_returns)*100:.2f}%, median={np.median(excess_returns)*100:.2f}%")
    print(f"    Strategy beat SPY in {sum(1 for e in excess_returns if e > 0)}/{len(segments)} segments ({sum(1 for e in excess_returns if e > 0)/len(segments)*100:.1f}%)")
    print(f"    Strategy max DD: worst={min(strat_dds)*100:.2f}%, median={np.median(strat_dds)*100:.2f}%")
    print(f"    SPY max DD: worst={min(spy_dds)*100:.2f}%, median={np.median(spy_dds)*100:.2f}%")


def print_conditional_stats(df: pd.DataFrame, regime_name: str, regime_value: int):
    """Print conditional statistics (average behavior on days with this regime)."""
    subset = df[df["market_regime"] == regime_value]
    
    if len(subset) < 2:
        print(f"\n  Insufficient data for {regime_name} regime.")
        return
    
    strat_ret = subset["strat_ret"].to_numpy()
    spy_ret = subset["spy_ret"].to_numpy()
    excess_ret = strat_ret - spy_ret
    
    print(f"\n  Conditional Statistics ({regime_name} days, n={len(subset):,}):")
    print(f"  NOTE: These are 'conditional on regime' stats, not tradable regime returns.")
    print(f"  ")
    print(f"  {'Metric':<30} {'Strategy':>15} {'SPY':>15} {'Diff':>15}")
    print(f"  {'-'*30} {'-'*15} {'-'*15} {'-'*15}")
    
    # Daily stats (more meaningful for conditional analysis)
    print(f"  {'Mean daily return':<30} {strat_ret.mean()*100:>14.4f}% {spy_ret.mean()*100:>14.4f}% {excess_ret.mean()*100:>14.4f}%")
    print(f"  {'Daily volatility':<30} {strat_ret.std()*100:>14.4f}% {spy_ret.std()*100:>14.4f}% {(strat_ret.std()-spy_ret.std())*100:>14.4f}%")
    print(f"  {'Win rate':<30} {win_rate(strat_ret)*100:>14.2f}% {win_rate(spy_ret)*100:>14.2f}% {(win_rate(strat_ret)-win_rate(spy_ret))*100:>14.2f}%")
    print(f"  ")
    
    # Annualized (with caveat)
    print(f"  {'Ann. Sharpe (conditional)':<30} {ann_sharpe(strat_ret):>15.3f} {ann_sharpe(spy_ret):>15.3f} {ann_sharpe(strat_ret)-ann_sharpe(spy_ret):>15.3f}")
    print(f"  {'IR (Sharpe of excess)':<30} {ann_sharpe(excess_ret):>15.3f}")
    print(f"  {'Ann. excess return':<30} {ann_return(excess_ret)*100:>14.2f}%")


def print_bootstrap_results(boot_results: Dict[str, np.ndarray], regime: str, label: str):
    """Print bootstrap results for a regime."""
    ir_dist = boot_results[f"{regime}_ir"]
    ir_dist = ir_dist[~np.isnan(ir_dist)]
    
    if len(ir_dist) < 100:
        print(f"\n  Bootstrap results for {label}: insufficient valid samples.")
        return
    
    ci = np.percentile(ir_dist, [2.5, 97.5])
    point_ir = np.median(ir_dist)
    
    p_one_sided = np.mean(ir_dist <= 0.0)
    p_two_sided = 2 * min(p_one_sided, 1 - p_one_sided)
    
    ci_excludes_zero = (ci[0] > 0) or (ci[1] < 0)
    sig_marker = "Yes **" if ci_excludes_zero else "No"
    
    print(f"\n  Bootstrap Results for {label} (n_boot={len(ir_dist)}, block_len={BLOCK_LEN}):")
    print(f"    IR point estimate (median): {point_ir:.3f}")
    print(f"    95% CI: [{ci[0]:.3f}, {ci[1]:.3f}]")
    print(f"    One-sided p-value (H0: IR ≤ 0): {p_one_sided:.4f}")
    print(f"    Two-sided p-value (H0: IR = 0): {p_two_sided:.4f}")
    print(f"    Significant at α=0.05 (two-sided): {sig_marker}")


# =========================
# MAIN ANALYSIS
# =========================
if __name__ == "__main__":
    # --- Load equity curve ---
    eq = pd.read_parquet(EQUITY_FILE).copy()
    
    # Handle index vs column for date
    if "date" not in eq.columns and eq.index.name == "date":
        eq = eq.reset_index()
    elif "date" not in eq.columns and "Date" in eq.columns:
        eq = eq.rename(columns={"Date": "date"})
    
    eq["date"] = pd.to_datetime(eq["date"])
    eq = eq.sort_values("date").drop_duplicates("date")
    eq["strat_ret"] = eq["portfolio_value"].pct_change().fillna(0.0)

    # --- Load SPY file with market regime ---
    spy = pd.read_parquet(SPY_PARQUET).copy()
    spy = spy.reset_index().rename(columns={"Date": "date", "index": "date"})
    spy["date"] = pd.to_datetime(spy["date"])
    spy = spy.sort_values("date")
    spy["spy_ret"] = spy["spy_close"].pct_change().fillna(0.0)
    
    # --- Optional: Lag regime to avoid look-ahead bias ---
    if LAG_REGIME:
        spy["market_regime"] = spy["market_regime"].shift(1).fillna(0).astype(int)
        print("NOTE: Regime lagged by 1 day to avoid look-ahead bias.\n")

    # --- Align on common dates ---
    df = eq.merge(spy[["date", "spy_ret", "market_regime"]], on="date", how="inner").dropna()
    df = df.sort_values("date").reset_index(drop=True)

    if len(df) < 2:
        raise RuntimeError("Not enough aligned data points between equity curve and SPY.")

    # --- Compute excess returns (CORRECT way) ---
    df["excess_ret"] = df["strat_ret"] - df["spy_ret"]

    # =========================
    # HEADER
    # =========================
    print("=" * 80)
    print(" REGIME-BASED PERFORMANCE ANALYSIS: STRATEGY vs SPY (v2)")
    print("=" * 80)
    print(f"\nData range: {df['date'].min().strftime('%Y-%m-%d')} to {df['date'].max().strftime('%Y-%m-%d')}")
    print(f"Total aligned days: {len(df):,} ({len(df)/TRADING_DAYS:.2f} years)")
    
    n_bull = (df["market_regime"] == 1).sum()
    n_bear = (df["market_regime"] == 0).sum()
    print(f"Bull market days (SPY > 200 DMA): {n_bull:,} ({n_bull/len(df)*100:.1f}%)")
    print(f"Bear market days (SPY < 200 DMA): {n_bear:,} ({n_bear/len(df)*100:.1f}%)")
    
    if LAG_REGIME:
        print("\n⚠️  REGIME LAGGED BY 1 DAY (look-ahead bias prevention enabled)")

    # =========================
    # OVERALL STATISTICS (Full Series)
    # =========================
    print("\n" + "=" * 80)
    print(" SECTION 1: OVERALL STATISTICS (Full Contiguous Series)")
    print("=" * 80)
    
    strat_ret = df["strat_ret"].to_numpy()
    spy_ret = df["spy_ret"].to_numpy()
    excess_ret = df["excess_ret"].to_numpy()
    
    print(f"\n  {'Metric':<30} {'Strategy':>15} {'SPY':>15} {'Diff':>15}")
    print(f"  {'-'*30} {'-'*15} {'-'*15} {'-'*15}")
    print(f"  {'Ann. Return':<30} {ann_return(strat_ret)*100:>14.2f}% {ann_return(spy_ret)*100:>14.2f}% {ann_return(excess_ret)*100:>14.2f}%")
    print(f"  {'Ann. Volatility':<30} {ann_volatility(strat_ret)*100:>14.2f}% {ann_volatility(spy_ret)*100:>14.2f}% {ann_volatility(excess_ret)*100:>14.2f}%")
    print(f"  {'Sharpe Ratio':<30} {ann_sharpe(strat_ret):>15.3f} {ann_sharpe(spy_ret):>15.3f} {ann_sharpe(strat_ret)-ann_sharpe(spy_ret):>15.3f}")
    print(f"  {'Max Drawdown':<30} {max_drawdown(strat_ret)*100:>14.2f}% {max_drawdown(spy_ret)*100:>14.2f}% {(max_drawdown(strat_ret)-max_drawdown(spy_ret))*100:>14.2f}%")
    print(f"  {'Win Rate':<30} {win_rate(strat_ret)*100:>14.2f}% {win_rate(spy_ret)*100:>14.2f}% {(win_rate(strat_ret)-win_rate(spy_ret))*100:>14.2f}%")
    print(f"  ")
    print(f"  {'Information Ratio':<30} {ann_sharpe(excess_ret):>15.3f}")
    print(f"  {'Ann. Excess Return (correct)':<30} {ann_return(excess_ret)*100:>14.2f}%")

    # =========================
    # CONTIGUOUS SEGMENT ANALYSIS
    # =========================
    print("\n" + "=" * 80)
    print(" SECTION 2: CONTIGUOUS SEGMENT ANALYSIS")
    print("=" * 80)
    print("\n  This analyzes each continuous bull/bear period separately,")
    print("  avoiding the pitfalls of stitching non-contiguous days together.")
    
    segments = find_contiguous_segments(df)
    
    bull_segments = []
    bear_segments = []
    
    for start_idx, end_idx, regime_val in segments:
        stats = compute_segment_stats(df, start_idx, end_idx)
        if regime_val == 1:
            bull_segments.append(stats)
        else:
            bear_segments.append(stats)
    
    print("\n" + "-" * 80)
    print(" BEAR MARKET SEGMENTS (SPY < 200 DMA)")
    print("-" * 80)
    print_segment_summary(bear_segments, "bear")
    
    print("\n" + "-" * 80)
    print(" BULL MARKET SEGMENTS (SPY > 200 DMA)")
    print("-" * 80)
    # Only show summary for bull (too many segments usually)
    if bull_segments:
        print(f"\n  Found {len(bull_segments)} contiguous bull segment(s).")
        excess_returns = [s.excess_total_return for s in bull_segments]
        strat_dds = [s.strat_max_dd for s in bull_segments]
        print(f"\n  Summary across bull segments:")
        print(f"    Excess return: mean={np.mean(excess_returns)*100:.2f}%, median={np.median(excess_returns)*100:.2f}%")
        print(f"    Strategy beat SPY in {sum(1 for e in excess_returns if e > 0)}/{len(bull_segments)} segments ({sum(1 for e in excess_returns if e > 0)/len(bull_segments)*100:.1f}%)")
        print(f"    Strategy max DD: worst={min(strat_dds)*100:.2f}%, median={np.median(strat_dds)*100:.2f}%")

    # =========================
    # CONDITIONAL STATISTICS (for reference)
    # =========================
    print("\n" + "=" * 80)
    print(" SECTION 3: CONDITIONAL STATISTICS (Days Labeled as Bull/Bear)")
    print("=" * 80)
    print("\n  These stats describe behavior conditional on regime, NOT tradable returns.")
    
    print_conditional_stats(df, "Bull", 1)
    print_conditional_stats(df, "Bear", 0)

    # =========================
    # BOOTSTRAP ANALYSIS (Correct Method)
    # =========================
    print("\n" + "=" * 80)
    print(" SECTION 4: BOOTSTRAP STATISTICAL SIGNIFICANCE")
    print("=" * 80)
    print("\n  Bootstrap performed on FULL contiguous series, then regime stats computed.")
    print("  This preserves the time-series dependence structure correctly.")
    
    print("\n  Running bootstrap (this may take a moment)...")
    boot_results = bootstrap_regime_stats(df, n_boot=N_BOOT, block_len=BLOCK_LEN, seed=SEED)
    
    print_bootstrap_results(boot_results, "overall", "Overall")
    print_bootstrap_results(boot_results, "bull", "Bull Market")
    print_bootstrap_results(boot_results, "bear", "Bear Market")

    # =========================
    # KEY FINDINGS SUMMARY
    # =========================
    print("\n" + "=" * 80)
    print(" SECTION 5: KEY FINDINGS SUMMARY")
    print("=" * 80)
    
    # Overall
    overall_ir = ann_sharpe(excess_ret)
    overall_ci = np.percentile(boot_results["overall_ir"][~np.isnan(boot_results["overall_ir"])], [2.5, 97.5])
    overall_sig = "YES" if (overall_ci[0] > 0 or overall_ci[1] < 0) else "NO"
    
    # Bull
    bull_mask = df["market_regime"] == 1
    bull_ir = ann_sharpe(excess_ret[bull_mask])
    bull_ci = np.percentile(boot_results["bull_ir"][~np.isnan(boot_results["bull_ir"])], [2.5, 97.5])
    bull_sig = "YES" if (bull_ci[0] > 0 or bull_ci[1] < 0) else "NO"
    
    # Bear
    bear_mask = df["market_regime"] == 0
    bear_ir = ann_sharpe(excess_ret[bear_mask])
    bear_ci = np.percentile(boot_results["bear_ir"][~np.isnan(boot_results["bear_ir"])], [2.5, 97.5])
    bear_sig = "YES" if (bear_ci[0] > 0 or bear_ci[1] < 0) else "NO"
    
    print(f"\n  {'Regime':<15} {'IR':<10} {'95% CI':<25} {'Significant?':<15}")
    print(f"  {'-'*15} {'-'*10} {'-'*25} {'-'*15}")
    print(f"  {'Overall':<15} {overall_ir:<10.3f} [{overall_ci[0]:.3f}, {overall_ci[1]:.3f}]{'':>10} {overall_sig:<15}")
    print(f"  {'Bull':<15} {bull_ir:<10.3f} [{bull_ci[0]:.3f}, {bull_ci[1]:.3f}]{'':>10} {bull_sig:<15}")
    print(f"  {'Bear':<15} {bear_ir:<10.3f} [{bear_ci[0]:.3f}, {bear_ci[1]:.3f}]{'':>10} {bear_sig:<15}")
    
    # Segment-based insights
    if bear_segments:
        bear_wins = sum(1 for s in bear_segments if s.excess_total_return > 0)
        bear_total = len(bear_segments)
        print(f"\n  Bear market segment win rate: {bear_wins}/{bear_total} ({bear_wins/bear_total*100:.1f}%)")
        
        worst_bear_strat = min(s.strat_total_return for s in bear_segments)
        worst_bear_spy = min(s.spy_total_return for s in bear_segments)
        print(f"  Worst bear segment: Strategy {worst_bear_strat*100:.1f}% vs SPY {worst_bear_spy*100:.1f}%")
    
    print("\n" + "-" * 80)
    print(" INTERPRETATION NOTES:")
    print("-" * 80)
    print("""
  1. OVERALL IR: Tests whether strategy has risk-adjusted alpha vs SPY across
     the full time period. This is the most reliable test.
  
  2. REGIME IRs: These test conditional performance, but interpretation requires
     care - the bootstrap resamples the full series, so regime proportions may
     vary across samples.
  
  3. SEGMENT ANALYSIS: Shows actual performance in each contiguous bear/bull
     market. This is the most intuitive way to see "what happened in 2008" etc.
  
  4. If you trade based on the regime signal, enable LAG_REGIME=True to avoid
     look-ahead bias in this analysis.
""")
    
    print("\nAnalysis complete.")
    
# =========================
# SECTION 6: AUTOCORRELATION ANALYSIS
# =========================
print("\n" + "=" * 80)
print(" SECTION 6: AUTOCORRELATION ANALYSIS")
print("=" * 80)
print("\n  Testing whether returns exhibit serial dependence (momentum or mean-reversion).")

from scipy import stats as scipy_stats

# --- 6.1 Autocorrelation at different lags ---
max_lags = 10
strat_autocorrs = [pd.Series(strat_ret).autocorr(lag=i) for i in range(1, max_lags + 1)]
spy_autocorrs = [pd.Series(spy_ret).autocorr(lag=i) for i in range(1, max_lags + 1)]
excess_autocorrs = [pd.Series(excess_ret).autocorr(lag=i) for i in range(1, max_lags + 1)]

# Significance threshold (approximate 95% CI for white noise)
sig_threshold = 2 / np.sqrt(len(strat_ret))

print(f"\n  Autocorrelation by Lag (significance threshold: ±{sig_threshold:.4f})")
print(f"  {'-'*70}")
print(f"  {'Lag':<6} {'Strategy':>12} {'SPY':>12} {'Excess':>12} {'Significant?':>18}")
print(f"  {'-'*70}")

for i, (s_ac, p_ac, e_ac) in enumerate(zip(strat_autocorrs, spy_autocorrs, excess_autocorrs), 1):
    sig_flags = []
    if abs(s_ac) > sig_threshold:
        sig_flags.append("Strat")
    if abs(p_ac) > sig_threshold:
        sig_flags.append("SPY")
    if abs(e_ac) > sig_threshold:
        sig_flags.append("Excess")
    sig_str = ", ".join(sig_flags) if sig_flags else "-"
    print(f"  {i:<6} {s_ac:>+12.4f} {p_ac:>+12.4f} {e_ac:>+12.4f} {sig_str:>18}")

# --- 6.2 Ljung-Box Test ---
print(f"\n  Ljung-Box Test (H0: no autocorrelation up to lag k)")
print(f"  {'-'*70}")

try:
    from statsmodels.stats.diagnostic import acorr_ljungbox
    
    lb_lags = [5, 10, 20]
    
    print(f"  {'Series':<15} {'Lag':<8} {'LB Stat':<12} {'p-value':<12} {'Significant?':<15}")
    print(f"  {'-'*70}")
    
    for series_name, series_data in [("Strategy", strat_ret), ("SPY", spy_ret), ("Excess", excess_ret)]:
        lb_results = acorr_ljungbox(series_data, lags=lb_lags, return_df=True)
        for lag in lb_lags:
            lb_stat = lb_results.loc[lag, "lb_stat"]
            lb_pval = lb_results.loc[lag, "lb_pvalue"]
            sig = "YES **" if lb_pval < 0.05 else "No"
            print(f"  {series_name:<15} {lag:<8} {lb_stat:<12.2f} {lb_pval:<12.4f} {sig:<15}")
        print(f"  {'-'*70}")
        
except ImportError:
    print("  [statsmodels not available - skipping Ljung-Box test]")

# --- 6.3 Durbin-Watson Statistic ---
print(f"\n  Durbin-Watson Statistic")
print(f"  {'-'*70}")
print(f"  Interpretation: ~2.0 = no autocorrelation, <2.0 = positive, >2.0 = negative")
print(f"  ")

try:
    from statsmodels.stats.stattools import durbin_watson
    
    dw_strat = durbin_watson(strat_ret)
    dw_spy = durbin_watson(spy_ret)
    dw_excess = durbin_watson(excess_ret)
    
    def interpret_dw(dw):
        if dw < 1.5:
            return "Positive autocorr"
        elif dw > 2.5:
            return "Negative autocorr"
        else:
            return "No strong autocorr"
    
    print(f"  {'Series':<15} {'DW Stat':<12} {'Interpretation':<25}")
    print(f"  {'-'*55}")
    print(f"  {'Strategy':<15} {dw_strat:<12.4f} {interpret_dw(dw_strat):<25}")
    print(f"  {'SPY':<15} {dw_spy:<12.4f} {interpret_dw(dw_spy):<25}")
    print(f"  {'Excess':<15} {dw_excess:<12.4f} {interpret_dw(dw_excess):<25}")
    
except ImportError:
    print("  [statsmodels not available - skipping Durbin-Watson test]")

# --- 6.4 Runs Test (Non-parametric) ---
print(f"\n  Runs Test (H0: sequence of +/- returns is random)")
print(f"  {'-'*70}")

def runs_test(series):
    """Test if sequence of +/- returns is random."""
    signs = np.sign(series)
    signs = signs[signs != 0]  # Remove zeros
    
    if len(signs) < 10:
        return np.nan, np.nan, np.nan, np.nan
    
    n_pos = int((signs > 0).sum())
    n_neg = int((signs < 0).sum())
    n = len(signs)
    
    # Count runs
    runs = 1
    for i in range(1, len(signs)):
        if signs[i] != signs[i-1]:
            runs += 1
    
    # Expected runs and std under null (use float64 to avoid overflow)
    n_pos_f = float(n_pos)
    n_neg_f = float(n_neg)
    n_f = float(n)
    
    expected_runs = (2.0 * n_pos_f * n_neg_f) / n_f + 1.0
    
    numerator = 2.0 * n_pos_f * n_neg_f * (2.0 * n_pos_f * n_neg_f - n_f)
    denominator = (n_f ** 2) * (n_f - 1.0)
    
    if denominator <= 0 or numerator < 0:
        return runs, expected_runs, np.nan, np.nan
    
    var_runs = numerator / denominator
    
    if var_runs <= 0:
        return runs, expected_runs, np.nan, np.nan
        
    std_runs = np.sqrt(var_runs)
    z_stat = (float(runs) - expected_runs) / std_runs
    p_value = 2.0 * (1.0 - scipy_stats.norm.cdf(abs(z_stat)))
    
    return runs, expected_runs, z_stat, p_value

print(f"  {'Series':<15} {'Actual Runs':<12} {'Expected':<12} {'Z-stat':<12} {'p-value':<12} {'Random?':<15}")
print(f"  {'-'*80}")

for series_name, series_data in [("Strategy", strat_ret), ("SPY", spy_ret), ("Excess", excess_ret)]:
    runs, expected, z, p = runs_test(series_data)
    if np.isnan(p):
        random_str = "N/A"
    elif p < 0.05:
        random_str = "NO **"
    else:
        random_str = "Yes"
    
    print(f"  {series_name:<15} {runs:<12.0f} {expected:<12.1f} {z:<12.2f} {p:<12.4f} {random_str:<15}")

# --- 6.5 Autocorrelation Summary ---
print(f"\n  {'-'*70}")
print(f"  AUTOCORRELATION SUMMARY")
print(f"  {'-'*70}")

lag1_strat = strat_autocorrs[0]
lag1_excess = excess_autocorrs[0]

# Strategy interpretation
if abs(lag1_strat) < sig_threshold:
    strat_interp = "Strategy returns show NO significant lag-1 autocorrelation."
elif lag1_strat > 0:
    strat_interp = f"Strategy returns show POSITIVE lag-1 autocorrelation ({lag1_strat:+.4f}) - momentum/trending."
else:
    strat_interp = f"Strategy returns show NEGATIVE lag-1 autocorrelation ({lag1_strat:+.4f}) - mean reversion."

# Excess interpretation  
if abs(lag1_excess) < sig_threshold:
    excess_interp = "Excess returns show NO significant lag-1 autocorrelation."
elif lag1_excess > 0:
    excess_interp = f"Excess returns show POSITIVE lag-1 autocorrelation ({lag1_excess:+.4f}) - alpha persists."
else:
    excess_interp = f"Excess returns show NEGATIVE lag-1 autocorrelation ({lag1_excess:+.4f}) - alpha mean-reverts."

print(f"\n  • {strat_interp}")
print(f"  • {excess_interp}")

# Trading implications
print(f"\n  Trading Implications:")
if lag1_strat > sig_threshold:
    print(f"    - Positive autocorrelation suggests winners tend to follow winners.")
    print(f"    - Consider holding winning positions longer or adding to winners.")
elif lag1_strat < -sig_threshold:
    print(f"    - Negative autocorrelation suggests mean reversion behavior.")
    print(f"    - Consider taking profits more quickly after strong moves.")
else:
    print(f"    - No significant autocorrelation - each day appears independent.")
    print(f"    - Focus on position sizing rather than timing adjustments.")

NOTE: Regime lagged by 1 day to avoid look-ahead bias.

 REGIME-BASED PERFORMANCE ANALYSIS: STRATEGY vs SPY (v2)

Data range: 1999-01-04 to 2025-12-30
Total aligned days: 6,790 (26.94 years)
Bull market days (SPY > 200 DMA): 5,057 (74.5%)
Bear market days (SPY < 200 DMA): 1,733 (25.5%)

⚠️  REGIME LAGGED BY 1 DAY (look-ahead bias prevention enabled)

 SECTION 1: OVERALL STATISTICS (Full Contiguous Series)

  Metric                                Strategy             SPY            Diff
  ------------------------------ --------------- --------------- ---------------
  Ann. Return                             16.39%           8.47%           4.67%
  Ann. Volatility                         12.96%          19.35%          17.06%
  Sharpe Ratio                             1.236           0.517           0.719
  Max Drawdown                           -19.72%         -55.19%          35.47%
  Win Rate                                47.97%          54.37%          -6.41%
  
  Information Ratio 

In [6]:
"""
Regime Confirmation Analysis: Testing Different Confirmation Periods

This script tests how the strategy performs under different regime confirmation
thresholds. Instead of flipping regime on each day SPY crosses the 200 DMA,
we require N consecutive days above/below before confirming a regime change.

This reduces whipsaw signals and creates cleaner bull/bear classifications.
"""

import pandas as pd
import numpy as np
from typing import Dict, List, Tuple
from dataclasses import dataclass

TRADING_DAYS = 252

# =========================
# CONFIG
# =========================
EQUITY_FILE = "./13-trading_output_regression_insp500_spyfilter_cap15/13-equity_curve_regression_insp500_spyfilter_cap15.parquet"
SPY_PARQUET = "./8-SPY_200DMA_market_regime/8-SPY_200DMA_regime.parquet"

# Confirmation periods to test
CONFIRMATION_DAYS = [1, 3, 5, 10, 15, 21]

N_BOOT = 2000  # Reduced for speed since we're testing multiple thresholds
BLOCK_LEN = 21
SEED = 7


# =========================
# HELPERS
# =========================
def ann_sharpe(r: np.ndarray) -> float:
    """Annualized Sharpe ratio."""
    r = np.asarray(r, float)
    r = r[~np.isnan(r)]
    if r.size < 2:
        return np.nan
    sd = r.std(ddof=1)
    if sd <= 0 or np.isnan(sd):
        return 0.0
    return np.sqrt(TRADING_DAYS) * r.mean() / sd


def ann_return(r: np.ndarray) -> float:
    """Annualized return from daily returns."""
    r = np.asarray(r, float)
    r = r[~np.isnan(r)]
    if r.size < 2:
        return np.nan
    total = np.prod(1 + r) - 1
    years = r.size / TRADING_DAYS
    if years <= 0:
        return np.nan
    return (1 + total) ** (1 / years) - 1


def total_return(r: np.ndarray) -> float:
    """Total cumulative return."""
    r = np.asarray(r, float)
    r = r[~np.isnan(r)]
    if r.size < 1:
        return np.nan
    return np.prod(1 + r) - 1


def max_drawdown(r: np.ndarray) -> float:
    """Maximum drawdown from daily returns."""
    r = np.asarray(r, float)
    r = r[~np.isnan(r)]
    if r.size < 1:
        return np.nan
    cum = np.cumprod(1 + r)
    running_max = np.maximum.accumulate(cum)
    dd = (cum - running_max) / running_max
    return dd.min()


def create_confirmed_regime(raw_regime: np.ndarray, confirm_days: int) -> np.ndarray:
    """
    Create a confirmed regime signal that requires N consecutive days
    above/below the 200 DMA before flipping the regime.
    
    Parameters:
    -----------
    raw_regime : array of 0/1 (0 = below 200 DMA, 1 = above 200 DMA)
    confirm_days : number of consecutive days required to confirm regime change
    
    Returns:
    --------
    confirmed_regime : array of 0/1 with smoothed regime signal
    """
    if confirm_days <= 1:
        return raw_regime.copy()
    
    n = len(raw_regime)
    confirmed = np.zeros(n, dtype=int)
    
    # Start with the initial regime (use first value)
    current_regime = raw_regime[0]
    consecutive_count = 1
    
    for i in range(n):
        if raw_regime[i] == current_regime:
            # Same as current confirmed regime
            consecutive_count = 0  # Reset counter for opposite regime
            confirmed[i] = current_regime
        else:
            # Different from current confirmed regime
            # Check how many consecutive days we've been in the new regime
            consecutive_count += 1
            
            if consecutive_count >= confirm_days:
                # Confirm the regime change
                current_regime = raw_regime[i]
                consecutive_count = 0
            
            confirmed[i] = current_regime
    
    return confirmed


def find_contiguous_segments(regime: np.ndarray) -> List[Tuple[int, int, int]]:
    """Find contiguous segments of the same regime."""
    segments = []
    if len(regime) == 0:
        return segments
    
    start_idx = 0
    current_regime = regime[0]
    
    for i in range(1, len(regime)):
        if regime[i] != current_regime:
            segments.append((start_idx, i - 1, int(current_regime)))
            start_idx = i
            current_regime = regime[i]
    
    segments.append((start_idx, len(regime) - 1, int(current_regime)))
    
    return segments


def compute_regime_metrics(
    df: pd.DataFrame, 
    regime_col: str = "regime"
) -> Dict:
    """Compute comprehensive metrics for a given regime column."""
    
    strat_ret = df["strat_ret"].to_numpy()
    spy_ret = df["spy_ret"].to_numpy()
    excess_ret = strat_ret - spy_ret
    regime = df[regime_col].to_numpy()
    
    bull_mask = regime == 1
    bear_mask = regime == 0
    
    # Find segments
    segments = find_contiguous_segments(regime)
    bull_segments = [(s, e) for s, e, r in segments if r == 1]
    bear_segments = [(s, e) for s, e, r in segments if r == 0]
    
    # Compute segment-level stats for bear markets
    bear_wins = 0
    bear_excess_returns = []
    for start, end in bear_segments:
        seg_strat = total_return(strat_ret[start:end+1])
        seg_spy = total_return(spy_ret[start:end+1])
        seg_excess = seg_strat - seg_spy
        bear_excess_returns.append(seg_excess)
        if seg_excess > 0:
            bear_wins += 1
    
    # Bull segment stats
    bull_wins = 0
    bull_excess_returns = []
    for start, end in bull_segments:
        seg_strat = total_return(strat_ret[start:end+1])
        seg_spy = total_return(spy_ret[start:end+1])
        seg_excess = seg_strat - seg_spy
        bull_excess_returns.append(seg_excess)
        if seg_excess > 0:
            bull_wins += 1
    
    return {
        # Counts
        "n_bull_days": bull_mask.sum(),
        "n_bear_days": bear_mask.sum(),
        "n_bull_segments": len(bull_segments),
        "n_bear_segments": len(bear_segments),
        
        # Overall
        "overall_ir": ann_sharpe(excess_ret),
        "overall_excess_return": ann_return(excess_ret),
        
        # Bull conditional
        "bull_ir": ann_sharpe(excess_ret[bull_mask]) if bull_mask.sum() > 1 else np.nan,
        "bull_excess_return": ann_return(excess_ret[bull_mask]) if bull_mask.sum() > 1 else np.nan,
        "bull_strat_sharpe": ann_sharpe(strat_ret[bull_mask]) if bull_mask.sum() > 1 else np.nan,
        "bull_spy_sharpe": ann_sharpe(spy_ret[bull_mask]) if bull_mask.sum() > 1 else np.nan,
        
        # Bear conditional
        "bear_ir": ann_sharpe(excess_ret[bear_mask]) if bear_mask.sum() > 1 else np.nan,
        "bear_excess_return": ann_return(excess_ret[bear_mask]) if bear_mask.sum() > 1 else np.nan,
        "bear_strat_sharpe": ann_sharpe(strat_ret[bear_mask]) if bear_mask.sum() > 1 else np.nan,
        "bear_spy_sharpe": ann_sharpe(spy_ret[bear_mask]) if bear_mask.sum() > 1 else np.nan,
        
        # Segment-level
        "bear_segment_win_rate": bear_wins / len(bear_segments) if bear_segments else np.nan,
        "bull_segment_win_rate": bull_wins / len(bull_segments) if bull_segments else np.nan,
        "bear_segment_excess_mean": np.mean(bear_excess_returns) if bear_excess_returns else np.nan,
        "bull_segment_excess_mean": np.mean(bull_excess_returns) if bull_excess_returns else np.nan,
    }


def block_bootstrap_idx(n: int, block_len: int, rng: np.random.Generator) -> np.ndarray:
    """Block bootstrap indices."""
    if n <= 0:
        return np.array([], dtype=int)
    if block_len <= 1 or block_len > n:
        return rng.integers(0, n, size=n, dtype=int)
    
    idx = []
    max_start = n - block_len
    while len(idx) < n:
        s = int(rng.integers(0, max_start + 1))
        idx.extend(range(s, s + block_len))
    return np.array(idx[:n], dtype=int)


def bootstrap_ir(
    strat_ret: np.ndarray,
    spy_ret: np.ndarray,
    regime: np.ndarray,
    n_boot: int,
    block_len: int,
    seed: int
) -> Dict[str, np.ndarray]:
    """Bootstrap IR distributions for overall, bull, and bear."""
    rng = np.random.default_rng(seed)
    n = len(strat_ret)
    
    results = {
        "overall_ir": np.empty(n_boot, dtype=float),
        "bull_ir": np.empty(n_boot, dtype=float),
        "bear_ir": np.empty(n_boot, dtype=float),
    }
    
    for i in range(n_boot):
        idx = block_bootstrap_idx(n, block_len, rng)
        s_boot = strat_ret[idx]
        p_boot = spy_ret[idx]
        r_boot = regime[idx]
        excess = s_boot - p_boot
        
        results["overall_ir"][i] = ann_sharpe(excess)
        
        bull_mask = r_boot == 1
        bear_mask = r_boot == 0
        
        results["bull_ir"][i] = ann_sharpe(excess[bull_mask]) if bull_mask.sum() > 1 else np.nan
        results["bear_ir"][i] = ann_sharpe(excess[bear_mask]) if bear_mask.sum() > 1 else np.nan
    
    return results


# =========================
# MAIN ANALYSIS
# =========================
if __name__ == "__main__":
    # --- Load data ---
    eq = pd.read_parquet(EQUITY_FILE).copy()
    if "date" not in eq.columns and eq.index.name == "date":
        eq = eq.reset_index()
    elif "date" not in eq.columns and "Date" in eq.columns:
        eq = eq.rename(columns={"Date": "date"})
    eq["date"] = pd.to_datetime(eq["date"])
    eq = eq.sort_values("date").drop_duplicates("date")
    eq["strat_ret"] = eq["portfolio_value"].pct_change().fillna(0.0)

    spy = pd.read_parquet(SPY_PARQUET).copy()
    spy = spy.reset_index().rename(columns={"Date": "date", "index": "date"})
    spy["date"] = pd.to_datetime(spy["date"])
    spy = spy.sort_values("date")
    spy["spy_ret"] = spy["spy_close"].pct_change().fillna(0.0)

    # Align
    df = eq.merge(spy[["date", "spy_ret", "market_regime"]], on="date", how="inner").dropna()
    df = df.sort_values("date").reset_index(drop=True)
    
    raw_regime = df["market_regime"].to_numpy()

    print("=" * 90)
    print(" REGIME CONFIRMATION ANALYSIS: TESTING DIFFERENT CONFIRMATION PERIODS")
    print("=" * 90)
    print(f"\nData range: {df['date'].min().strftime('%Y-%m-%d')} to {df['date'].max().strftime('%Y-%m-%d')}")
    print(f"Total days: {len(df):,}")
    print(f"\nTesting confirmation periods: {CONFIRMATION_DAYS}")
    print("\nA confirmation period of N means the SPY must stay above/below the 200 DMA")
    print("for N consecutive days before the regime officially changes.")

    # =========================
    # SUMMARY TABLE
    # =========================
    print("\n" + "=" * 90)
    print(" SUMMARY: REGIME METRICS BY CONFIRMATION PERIOD")
    print("=" * 90)
    
    header = (
        f"{'Confirm':<8} "
        f"{'Bull':<6} {'Bear':<6} "
        f"{'Bull':<6} {'Bear':<6} "
        f"{'Overall':<8} "
        f"{'Bull':<8} {'Bear':<8} "
        f"{'Bear Seg':<10} {'Bull Seg':<10}"
    )
    subheader = (
        f"{'Days':<8} "
        f"{'Days':<6} {'Days':<6} "
        f"{'Segs':<6} {'Segs':<6} "
        f"{'IR':<8} "
        f"{'IR':<8} {'IR':<8} "
        f"{'Win %':<10} {'Win %':<10}"
    )
    
    print(f"\n{header}")
    print(f"{subheader}")
    print("-" * 90)
    
    all_results = {}
    
    for confirm_days in CONFIRMATION_DAYS:
        # Create confirmed regime
        confirmed_regime = create_confirmed_regime(raw_regime, confirm_days)
        df[f"regime_{confirm_days}d"] = confirmed_regime
        
        # Compute metrics
        metrics = compute_regime_metrics(df, f"regime_{confirm_days}d")
        all_results[confirm_days] = metrics
        
        print(
            f"{confirm_days:<8} "
            f"{metrics['n_bull_days']:<6} {metrics['n_bear_days']:<6} "
            f"{metrics['n_bull_segments']:<6} {metrics['n_bear_segments']:<6} "
            f"{metrics['overall_ir']:<8.3f} "
            f"{metrics['bull_ir']:<8.3f} {metrics['bear_ir']:<8.3f} "
            f"{metrics['bear_segment_win_rate']*100:<9.1f}% {metrics['bull_segment_win_rate']*100:<9.1f}%"
        )

    # =========================
    # DETAILED COMPARISON
    # =========================
    print("\n" + "=" * 90)
    print(" DETAILED COMPARISON: 1-DAY vs 5-DAY CONFIRMATION")
    print("=" * 90)
    
    for confirm_days in [1, 5]:
        metrics = all_results[confirm_days]
        print(f"\n--- {confirm_days}-Day Confirmation ---")
        print(f"  Regime segments: {metrics['n_bull_segments']} bull, {metrics['n_bear_segments']} bear")
        print(f"  Days in bear: {metrics['n_bear_days']:,} ({metrics['n_bear_days']/len(df)*100:.1f}%)")
        print(f"  ")
        print(f"  Overall IR: {metrics['overall_ir']:.3f}")
        print(f"  Bull IR:    {metrics['bull_ir']:.3f}")
        print(f"  Bear IR:    {metrics['bear_ir']:.3f}")
        print(f"  ")
        print(f"  Bear segment win rate: {metrics['bear_segment_win_rate']*100:.1f}%")
        print(f"  Bull segment win rate: {metrics['bull_segment_win_rate']*100:.1f}%")
        print(f"  Bear segment avg excess: {metrics['bear_segment_excess_mean']*100:.2f}%")
        print(f"  Bull segment avg excess: {metrics['bull_segment_excess_mean']*100:.2f}%")

    # =========================
    # BOOTSTRAP FOR KEY THRESHOLDS
    # =========================
    print("\n" + "=" * 90)
    print(" BOOTSTRAP SIGNIFICANCE TESTS (1-Day vs 5-Day vs 10-Day)")
    print("=" * 90)
    
    key_thresholds = [1, 5, 10]
    
    for confirm_days in key_thresholds:
        print(f"\n--- {confirm_days}-Day Confirmation ---")
        
        confirmed_regime = create_confirmed_regime(raw_regime, confirm_days)
        
        boot_results = bootstrap_ir(
            df["strat_ret"].to_numpy(),
            df["spy_ret"].to_numpy(),
            confirmed_regime,
            N_BOOT,
            BLOCK_LEN,
            SEED
        )
        
        for regime_name, key in [("Overall", "overall_ir"), ("Bull", "bull_ir"), ("Bear", "bear_ir")]:
            dist = boot_results[key]
            dist = dist[~np.isnan(dist)]
            
            if len(dist) < 100:
                print(f"  {regime_name}: insufficient data")
                continue
            
            ci = np.percentile(dist, [2.5, 97.5])
            p_two_sided = 2 * min(np.mean(dist <= 0), np.mean(dist >= 0))
            sig = "**" if (ci[0] > 0 or ci[1] < 0) else ""
            
            print(f"  {regime_name:<8} IR: {np.median(dist):.3f}  95% CI: [{ci[0]:.3f}, {ci[1]:.3f}]  p={p_two_sided:.4f} {sig}")

    # =========================
    # BEAR MARKET SEGMENT DETAILS (5-Day Confirmation)
    # =========================
    print("\n" + "=" * 90)
    print(" BEAR MARKET SEGMENTS WITH 5-DAY CONFIRMATION")
    print("=" * 90)
    
    confirmed_5d = create_confirmed_regime(raw_regime, 5)
    df["regime_5d"] = confirmed_5d
    
    segments = find_contiguous_segments(confirmed_5d)
    bear_segments = [(s, e) for s, e, r in segments if r == 0]
    
    print(f"\n  Found {len(bear_segments)} bear market segments (vs 93 with 1-day confirmation)")
    print(f"\n  {'#':<4} {'Start':<12} {'End':<12} {'Days':<6} {'Strat Tot':<12} {'SPY Tot':<12} {'Excess':<12}")
    print(f"  {'-'*4} {'-'*12} {'-'*12} {'-'*6} {'-'*12} {'-'*12} {'-'*12}")
    
    strat_ret = df["strat_ret"].to_numpy()
    spy_ret = df["spy_ret"].to_numpy()
    
    for i, (start, end) in enumerate(bear_segments, 1):
        n_days = end - start + 1
        strat_tot = total_return(strat_ret[start:end+1])
        spy_tot = total_return(spy_ret[start:end+1])
        excess = strat_tot - spy_tot
        
        start_date = df.iloc[start]["date"].strftime("%Y-%m-%d")
        end_date = df.iloc[end]["date"].strftime("%Y-%m-%d")
        
        print(
            f"  {i:<4} {start_date:<12} {end_date:<12} {n_days:<6} "
            f"{strat_tot*100:>10.2f}% {spy_tot*100:>10.2f}% {excess*100:>10.2f}%"
        )
    
    # Summary
    bear_excess = []
    bear_wins = 0
    for start, end in bear_segments:
        strat_tot = total_return(strat_ret[start:end+1])
        spy_tot = total_return(spy_ret[start:end+1])
        excess = strat_tot - spy_tot
        bear_excess.append(excess)
        if excess > 0:
            bear_wins += 1
    
    print(f"\n  Summary:")
    print(f"    Strategy beat SPY in {bear_wins}/{len(bear_segments)} bear segments ({bear_wins/len(bear_segments)*100:.1f}%)")
    print(f"    Mean excess return: {np.mean(bear_excess)*100:.2f}%")
    print(f"    Median excess return: {np.median(bear_excess)*100:.2f}%")

    # =========================
    # FINAL RECOMMENDATIONS
    # =========================
    print("\n" + "=" * 90)
    print(" RECOMMENDATIONS")
    print("=" * 90)
    print("""
  Based on the analysis:
  
  1. NOISE REDUCTION: A 5-day confirmation reduces bear segments from 93 to ~20-30,
     filtering out the 1-3 day noise around the 200 DMA crossovers.
  
  2. STATISTICAL POWER: The IR estimates become more stable with fewer, longer
     segments. However, significance levels may change.
  
  3. TRADE-OFF: Longer confirmation periods:
     - PRO: Fewer whipsaw signals, cleaner regime classification
     - CON: Delayed regime detection (you're 5+ days late to recognize the shift)
     - CON: Fewer bear days to analyze, potentially less statistical power
  
  4. PRACTICAL USE: If you're using this regime signal for trading decisions,
     a 5-10 day confirmation is reasonable. If it's just for performance
     attribution, 1-day is fine (with the caveat about choppy periods).
""")
    
    print("\nAnalysis complete.")
    


 REGIME CONFIRMATION ANALYSIS: TESTING DIFFERENT CONFIRMATION PERIODS

Data range: 1999-01-04 to 2025-12-30
Total days: 6,790

Testing confirmation periods: [1, 3, 5, 10, 15, 21]

A confirmation period of N means the SPY must stay above/below the 200 DMA
for N consecutive days before the regime officially changes.

 SUMMARY: REGIME METRICS BY CONFIRMATION PERIOD

Confirm  Bull   Bear   Bull   Bear   Overall  Bull     Bear     Bear Seg   Bull Seg  
Days     Days   Days   Segs   Segs   IR       IR       IR       Win %      Win %     
------------------------------------------------------------------------------------------
1        5057   1733   94     93     0.353    0.115    0.718    82.8     % 27.7     %
3        5070   1720   37     36     0.353    0.723    -0.022   27.8     % 78.4     %
5        5065   1725   24     23     0.353    0.720    -0.019   21.7     % 87.5     %
10       5133   1657   15     14     0.353    0.692    -0.024   35.7     % 93.3     %
15       5214   1576   14  