# üî¨ CARIA-SR Extended Validation (1980+, Multi-Asset)

**Extended Testing:**
- US Equities (1980-present)
- Global Indices (Europe, Japan, EM)
- Crypto (BTC, ETH)

**Phases:** 8-14 + Max Drawdown Analysis

In [None]:
# @title 1. Setup
!pip install -q yfinance pandas numpy scipy scikit-learn statsmodels seaborn matplotlib pyarrow requests

from google.colab import drive
drive.mount('/content/drive')

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
import requests
import warnings
from datetime import datetime
import statsmodels.formula.api as smf
from sklearn.covariance import LedoitWolf

warnings.filterwarnings('ignore')
np.random.seed(42)
sns.set_style('whitegrid')

WORK_DIR = '/content/drive/MyDrive/CARIA_Extended'
os.makedirs(f'{WORK_DIR}/figures', exist_ok=True)
os.makedirs(f'{WORK_DIR}/tables', exist_ok=True)

# Extended date range
START_DATE = "1980-01-01"
END_DATE = datetime.now().strftime("%Y-%m-%d")

print(f"‚úÖ Output: {WORK_DIR}")
print(f"üìÖ Date Range: {START_DATE} to {END_DATE}")

In [None]:
# @title 2. Download Multi-Asset Data

# Define asset universes
ASSETS = {
    'US_Equity': {
        'main': '^GSPC',  # S&P 500
        'vol': '^VIX',
        'safe': 'TLT',
        'sectors': ['XLF', 'XLK', 'XLE', 'XLV', 'XLI', 'XLY', 'XLP', 'XLB', 'XLU', 'XLRE', 'XLC'],
        'start': '1980-01-01'
    },
    'Global_Indices': {
        'main': 'SPY',
        'vol': '^VIX',
        'safe': 'TLT',
        'sectors': ['EWJ', 'EWG', 'EWU', 'EWC', 'EWA', 'EWZ', 'EWY', 'EWT', 'EWH', 'EWS', 'EEM', 'VGK'],
        'start': '2000-01-01'
    },
    'Crypto': {
        'main': 'BTC-USD',
        'vol': '^VIX',  # Use VIX as proxy (no crypto vol index)
        'safe': 'TLT',
        'sectors': ['BTC-USD', 'ETH-USD', 'BNB-USD', 'SOL-USD', 'XRP-USD', 'ADA-USD', 'DOGE-USD', 'DOT-USD'],
        'start': '2017-01-01'
    }
}

all_data = {}

for asset_class, config in ASSETS.items():
    print(f"\nüì• Downloading {asset_class}...")
    
    # Download main index
    try:
        main = yf.download(config['main'], start=config['start'], end=END_DATE, progress=False)['Close']
        main = main.rename('price') if isinstance(main, pd.Series) else main.iloc[:, 0].rename('price')
    except:
        main = pd.Series(dtype=float)
    
    # Download volatility
    try:
        vol = yf.download(config['vol'], start=config['start'], end=END_DATE, progress=False)['Close']
        vol = vol.rename('volatility') if isinstance(vol, pd.Series) else vol.iloc[:, 0].rename('volatility')
    except:
        vol = pd.Series(dtype=float)
    
    # Download safe asset
    try:
        safe = yf.download(config['safe'], start=config['start'], end=END_DATE, progress=False)['Close']
        safe = safe.rename('safe') if isinstance(safe, pd.Series) else safe.iloc[:, 0].rename('safe')
    except:
        safe = pd.Series(dtype=float)
    
    # Download sectors/constituents for cross-sectional analysis
    sectors = yf.download(config['sectors'], start=config['start'], end=END_DATE, progress=False)['Close']
    if isinstance(sectors, pd.Series):
        sectors = sectors.to_frame()
    
    all_data[asset_class] = {
        'main': main,
        'vol': vol,
        'safe': safe,
        'sectors': sectors
    }
    
    print(f"   Main: {len(main)} days, Sectors: {sectors.shape[1]} assets")

print("\n‚úÖ All data downloaded")

In [None]:
# @title 3. Core Functions

def cov_to_corr(S):
    d = np.sqrt(np.diag(S))
    d = np.where(d == 0, 1e-10, d)
    C = S / np.outer(d, d)
    return np.nan_to_num((C + C.T) / 2)

def eig_metrics(C, k_frac=0.2):
    w = np.sort(np.linalg.eigvalsh(C))[::-1]
    w = np.maximum(w, 1e-10)
    k = max(1, int(np.ceil(k_frac * len(w))))
    ar = np.sum(w[:k]) / np.sum(w)
    p = w / np.sum(w)
    ent = -np.sum(p * np.log(p + 1e-10)) / np.log(len(w)) if len(w) > 1 else 0.5
    return float(ar), float(ent)

def calculate_structural_metrics(sectors_df, window=252, step=5, min_assets=5):
    """Calculate AR and Entropy from cross-sectional data."""
    returns = np.log(sectors_df).diff()
    good = returns.notna().mean() >= 0.8
    returns = returns.loc[:, good]
    
    if returns.shape[1] < min_assets:
        print(f"   Warning: Only {returns.shape[1]} assets with good coverage")
        return pd.DataFrame()
    
    struct = pd.DataFrame(index=returns.index, columns=['absorption_ratio', 'entropy'], dtype=float)
    lw = LedoitWolf()
    
    for t in range(window, len(returns), step):
        W = returns.iloc[t-window:t]
        W = W.loc[:, W.notna().mean() >= 0.8]
        if W.shape[1] < min_assets:
            continue
        W = W.apply(lambda s: s.fillna(s.mean()))
        X = W.values - np.nanmean(W.values, axis=0)
        try:
            C = cov_to_corr(lw.fit(X).covariance_)
        except:
            C = np.corrcoef(X, rowvar=False)
            C = np.nan_to_num((C + C.T) / 2)
        ar, ent = eig_metrics(C)
        struct.iloc[t] = [ar, ent]
    
    return struct.ffill().bfill()

def add_signals(df, window_z=252, window_memory=60):
    """Add Z-scores and Peak Memory signals."""
    roll_mean = df['absorption_ratio'].rolling(window=window_z).mean()
    roll_std = df['absorption_ratio'].rolling(window=window_z).std()
    df['absorp_z'] = (df['absorption_ratio'] - roll_mean) / roll_std
    df['caria_peak'] = df['absorp_z'].rolling(window=window_memory).max()
    return df

def get_max_drawdown(equity):
    peak = equity.cummax()
    return float((equity / peak - 1.0).min())

def run_minsky_hedge(df, entry_peak=1.0, exit_peak=0.5, vix_entry=20):
    """Run Minsky Hedge with persistence logic."""
    df = df.copy()
    df['daily_ret'] = np.log(df['price']).diff()
    df['safe_ret'] = np.log(df['safe']).diff()
    
    # State machine
    in_hedge = False
    states = []
    
    for i in range(len(df)):
        peak = df['caria_peak'].iloc[i]
        vix = df['volatility'].iloc[i] if 'volatility' in df.columns else 15
        
        if pd.isna(peak):
            states.append(False)
            continue
            
        if not in_hedge:
            if vix < vix_entry and peak > entry_peak:
                in_hedge = True
        else:
            if peak < exit_peak:
                in_hedge = False
        states.append(in_hedge)
    
    df['in_hedge'] = states
    df['strategy_ret'] = np.where(df['in_hedge'], df['safe_ret'], df['daily_ret'])
    
    df['cum_bnh'] = (1 + df['daily_ret'].fillna(0)).cumprod()
    df['cum_strat'] = (1 + df['strategy_ret'].fillna(0)).cumprod()
    
    return df

print("‚úÖ Functions defined")

In [None]:
# @title 4. Run Analysis for All Asset Classes

results = {}

for asset_class, data in all_data.items():
    print(f"\n{'='*60}")
    print(f"üìä Analyzing: {asset_class}")
    print(f"{'='*60}")
    
    sectors = data['sectors']
    if sectors.empty or sectors.shape[1] < 3:
        print(f"   ‚ö†Ô∏è Insufficient data for {asset_class}")
        continue
    
    # Calculate structural metrics
    print(f"   Calculating structural metrics ({sectors.shape[1]} assets)...")
    struct = calculate_structural_metrics(sectors, window=min(252, len(sectors)//3), min_assets=3)
    
    if struct.empty:
        print(f"   ‚ö†Ô∏è Could not calculate metrics for {asset_class}")
        continue
    
    # Merge with market data
    idx = struct.index.intersection(data['main'].index).intersection(data['vol'].index)
    if len(idx) < 500:
        idx = struct.index.intersection(data['main'].index)
    
    df = struct.loc[idx].copy()
    df['price'] = data['main'].reindex(idx)
    df['volatility'] = data['vol'].reindex(idx).ffill()
    df['safe'] = data['safe'].reindex(idx).ffill()
    
    # Add signals
    df = add_signals(df)
    df['future_ret_22'] = df['price'].pct_change(22).shift(-22)
    df = df.dropna()
    
    print(f"   Dataset: {len(df)} observations")
    print(f"   Period: {df.index.min().date()} to {df.index.max().date()}")
    
    if len(df) < 500:
        print(f"   ‚ö†Ô∏è Insufficient observations for {asset_class}")
        continue
    
    # Run Quantile Regression
    print(f"   Running Quantile Regression...")
    
    # Use volatility-based filtering if VIX available
    if 'volatility' in df.columns and df['volatility'].notna().sum() > 100:
        low_vol = df[df['volatility'] < df['volatility'].quantile(0.6)].copy()
    else:
        low_vol = df.copy()
    
    try:
        mod_base = smf.quantreg('future_ret_22 ~ absorp_z', low_vol)
        res_base = mod_base.fit(q=0.05)
        
        mod_peak = smf.quantreg('future_ret_22 ~ absorp_z + caria_peak', low_vol)
        res_peak = mod_peak.fit(q=0.05)
        
        imp = ((res_peak.prsquared - res_base.prsquared) / res_base.prsquared) * 100 if res_base.prsquared > 0 else 0
        
        print(f"   Base R¬≤: {res_base.prsquared:.5f}")
        print(f"   Peak R¬≤: {res_peak.prsquared:.5f}")
        print(f"   üî• Improvement: {imp:.1f}%")
    except Exception as e:
        print(f"   ‚ö†Ô∏è QR Error: {e}")
        imp = 0
    
    # Run Minsky Hedge
    print(f"   Running Minsky Hedge...")
    df = run_minsky_hedge(df)
    
    years = len(df) / 252
    dd_bnh = get_max_drawdown(df['cum_bnh'])
    dd_strat = get_max_drawdown(df['cum_strat'])
    cagr_bnh = (df['cum_bnh'].iloc[-1])**(1/years) - 1 if years > 0 else 0
    cagr_strat = (df['cum_strat'].iloc[-1])**(1/years) - 1 if years > 0 else 0
    
    print(f"   Benchmark: DD={dd_bnh:.1%}, CAGR={cagr_bnh:.1%}")
    print(f"   Minsky:    DD={dd_strat:.1%}, CAGR={cagr_strat:.1%}")
    print(f"   Time in Hedge: {df['in_hedge'].mean()*100:.1f}%")
    
    results[asset_class] = {
        'df': df,
        'qr_improvement': imp,
        'dd_bnh': dd_bnh,
        'dd_strat': dd_strat,
        'cagr_bnh': cagr_bnh,
        'cagr_strat': cagr_strat,
        'hedge_time': df['in_hedge'].mean()
    }

print("\n‚úÖ Analysis complete")

In [None]:
# @title 5. Summary Table

summary_rows = []
for asset_class, res in results.items():
    summary_rows.append({
        'Asset Class': asset_class,
        'QR Improvement (%)': f"{res['qr_improvement']:.1f}",
        'Benchmark DD': f"{res['dd_bnh']:.1%}",
        'Minsky DD': f"{res['dd_strat']:.1%}",
        'DD Reduction': f"{res['dd_bnh'] - res['dd_strat']:.1%}",
        'Benchmark CAGR': f"{res['cagr_bnh']:.1%}",
        'Minsky CAGR': f"{res['cagr_strat']:.1%}",
        'Time in Hedge': f"{res['hedge_time']*100:.1f}%"
    })

summary_df = pd.DataFrame(summary_rows)
print("\n" + "="*80)
print("MULTI-ASSET CARIA-SR VALIDATION SUMMARY")
print("="*80)
print(summary_df.to_string(index=False))

summary_df.to_csv(f'{WORK_DIR}/tables/Multi_Asset_Summary.csv', index=False)
print(f"\n‚úÖ Saved to {WORK_DIR}/tables/Multi_Asset_Summary.csv")

In [None]:
# @title 6. Equity Curves (All Asset Classes)

n_assets = len(results)
if n_assets > 0:
    fig, axes = plt.subplots(n_assets, 1, figsize=(14, 5*n_assets))
    if n_assets == 1:
        axes = [axes]
    
    for ax, (asset_class, res) in zip(axes, results.items()):
        df = res['df']
        ax.plot(df.index, df['cum_bnh'], label=f"Buy&Hold (DD:{res['dd_bnh']:.1%})", color='gray', alpha=0.6)
        ax.plot(df.index, df['cum_strat'], label=f"Minsky (DD:{res['dd_strat']:.1%})", color='blue', linewidth=2)
        ax.set_yscale('log')
        ax.set_title(f'{asset_class}: QR Improvement = {res["qr_improvement"]:.1f}%', fontsize=14, fontweight='bold')
        ax.legend(loc='upper left')
        ax.grid(True, alpha=0.3)
        
        # Highlight hedge periods
        hedge_starts = df[df['in_hedge'] & ~df['in_hedge'].shift(1).fillna(False)].index
        hedge_ends = df[~df['in_hedge'] & df['in_hedge'].shift(1).fillna(False)].index
        for start in hedge_starts[:20]:  # Limit to 20 for clarity
            ax.axvline(x=start, color='red', alpha=0.2, linewidth=0.5)
    
    plt.tight_layout()
    plt.savefig(f'{WORK_DIR}/figures/Multi_Asset_Equity.png', dpi=300)
    plt.show()
else:
    print("No results to plot")

In [None]:
# @title 7. Crisis-Specific Drawdown Analysis

CRISES = {
    'Black Monday 1987': ('1987-08-01', '1987-12-31'),
    'Dot-Com Crash': ('2000-03-01', '2002-10-31'),
    'GFC 2008': ('2007-10-01', '2009-03-31'),
    'Euro Crisis 2011': ('2011-07-01', '2011-10-31'),
    'COVID 2020': ('2020-02-01', '2020-04-30'),
    'Rate Hikes 2022': ('2022-01-01', '2022-10-31')
}

crisis_results = []

for asset_class, res in results.items():
    df = res['df']
    
    for crisis_name, (start, end) in CRISES.items():
        try:
            period = df.loc[start:end]
            if len(period) < 20:
                continue
            
            dd_bnh = get_max_drawdown(period['cum_bnh'] / period['cum_bnh'].iloc[0])
            dd_strat = get_max_drawdown(period['cum_strat'] / period['cum_strat'].iloc[0])
            hedge_pct = period['in_hedge'].mean() * 100
            
            crisis_results.append({
                'Asset': asset_class,
                'Crisis': crisis_name,
                'BnH DD': f"{dd_bnh:.1%}",
                'Minsky DD': f"{dd_strat:.1%}",
                'Protection': f"{dd_bnh - dd_strat:.1%}",
                'Hedge %': f"{hedge_pct:.0f}%"
            })
        except:
            pass

if crisis_results:
    crisis_df = pd.DataFrame(crisis_results)
    print("\n" + "="*80)
    print("CRISIS-SPECIFIC DRAWDOWN ANALYSIS")
    print("="*80)
    print(crisis_df.to_string(index=False))
    crisis_df.to_csv(f'{WORK_DIR}/tables/Crisis_Drawdowns.csv', index=False)

In [None]:
# @title 8. Bootstrap Confidence Intervals (All Assets)

n_boot = 500  # Reduced for speed
bootstrap_results = []

for asset_class, res in results.items():
    print(f"\nBootstrapping {asset_class}...")
    df = res['df'].copy()
    
    improvements = []
    for i in range(n_boot):
        sample = df.sample(n=len(df), replace=True)
        try:
            r_base = smf.quantreg('future_ret_22 ~ absorp_z', sample).fit(q=0.05).prsquared
            r_peak = smf.quantreg('future_ret_22 ~ absorp_z + caria_peak', sample).fit(q=0.05).prsquared
            if r_base > 0:
                improvements.append((r_peak - r_base) / r_base)
        except:
            pass
    
    if improvements:
        bootstrap_results.append({
            'Asset': asset_class,
            'Mean Improvement': f"{np.mean(improvements):.1%}",
            '95% CI Lower': f"{np.percentile(improvements, 2.5):.1%}",
            '95% CI Upper': f"{np.percentile(improvements, 97.5):.1%}",
            'P(Imp > 0)': f"{np.mean(np.array(improvements) > 0):.1%}"
        })

if bootstrap_results:
    boot_df = pd.DataFrame(bootstrap_results)
    print("\n" + "="*60)
    print("BOOTSTRAP RESULTS")
    print("="*60)
    print(boot_df.to_string(index=False))
    boot_df.to_csv(f'{WORK_DIR}/tables/Bootstrap_Results.csv', index=False)

In [None]:
# @title 9. Final Summary

print("\n" + "="*70)
print("üî¨ CARIA-SR EXTENDED VALIDATION COMPLETE")
print("="*70)

print(f"\nüìä Asset Classes Tested: {len(results)}")
for asset_class, res in results.items():
    print(f"\n   {asset_class}:")
    print(f"      QR Improvement: {res['qr_improvement']:.1f}%")
    print(f"      DD Reduction: {res['dd_bnh'] - res['dd_strat']:.1%}")

print(f"\nüìÅ Files saved to: {WORK_DIR}")
print("\n‚úÖ DONE!")