# üî¨ CARIA-SR Hysteresis Validation (1990-2025)

**Validated notebook with all bugs fixed**

- Data: 1990-01-01 to present
- Universe: S&P 500 constituents
- Key metrics: Absorption Ratio + Entropy + Peak Memory

In [None]:
# @title 1. Setup
!pip install -q yfinance pandas numpy scipy scikit-learn statsmodels seaborn matplotlib pyarrow requests

from google.colab import drive
drive.mount('/content/drive')

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
import requests
import warnings
from datetime import datetime
import statsmodels.formula.api as smf
from sklearn.covariance import LedoitWolf

warnings.filterwarnings('ignore')
np.random.seed(42)
sns.set_style('whitegrid')

# Configuration
WORK_DIR = '/content/drive/MyDrive/CARIA_1990'
os.makedirs(f'{WORK_DIR}/figures', exist_ok=True)
os.makedirs(f'{WORK_DIR}/tables', exist_ok=True)

FMP_API_KEY = "79fY9wvC9qtCJHcn6Yelf4ilE9TkRMoq"
START_DATE = "1990-01-01"  # VIX available from 1990
END_DATE = datetime.now().strftime("%Y-%m-%d")

print(f"‚úÖ Period: {START_DATE} to {END_DATE}")
print(f"‚úÖ Output: {WORK_DIR}")

In [None]:
# @title 2. Download S&P 500 Constituent Prices

# Get current S&P 500 tickers
url = f"https://financialmodelingprep.com/api/v3/sp500_constituent?apikey={FMP_API_KEY}"
resp = requests.get(url)
sp500_tickers = [x['symbol'] for x in resp.json()] if resp.status_code == 200 else []
print(f"Downloading {len(sp500_tickers)} stocks...")

# Download in batches
all_prices = []
for i in range(0, len(sp500_tickers), 50):
    batch = sp500_tickers[i:i+50]
    try:
        data = yf.download(batch, start=START_DATE, end=END_DATE, progress=False, auto_adjust=True)['Close']
        all_prices.append(data)
        print(f"   Batch {i//50 + 1}/{(len(sp500_tickers)-1)//50 + 1}")
    except Exception as e:
        print(f"   Error batch {i//50 + 1}: {e}")

prices = pd.concat(all_prices, axis=1).dropna(axis=1, how='all')
prices.to_csv(f'{WORK_DIR}/sp500_prices.csv')

# Market data (VIX, SPY, TLT, 10Y Treasury)
print("Downloading market data...")
market = yf.download(['^VIX', 'SPY', 'TLT', '^TNX'], start=START_DATE, end=END_DATE, progress=False)

market_df = pd.DataFrame({
    'volatility': market['Close']['^VIX'],
    'price': market['Close']['SPY'],
    'tlt': market['Close']['TLT'],
    'treasury_10y': market['Close']['^TNX']
}).dropna()
market_df.index.name = 'Date'
market_df.to_csv(f'{WORK_DIR}/market_validation_data.csv')

print(f"\n‚úÖ Prices: {prices.shape[1]} stocks, {len(prices)} days")
print(f"‚úÖ Market: {len(market_df)} days")
print(f"‚úÖ Period: {prices.index.min().date()} to {prices.index.max().date()}")

In [None]:
# @title 3. Calculate Structural Metrics (AR + Entropy) ‚è≥ ~15 min

def cov_to_corr(S):
    d = np.sqrt(np.diag(S))
    d = np.where(d == 0, 1e-10, d)
    C = S / np.outer(d, d)
    return np.nan_to_num((C + C.T) / 2)

def eig_metrics(C, k_frac=0.2):
    w = np.sort(np.linalg.eigvalsh(C))[::-1]
    w = np.maximum(w, 1e-10)  # Avoid negative eigenvalues
    k = max(1, int(np.ceil(k_frac * len(w))))
    ar = np.sum(w[:k]) / np.sum(w)
    p = w / np.sum(w)
    ent = -np.sum(p * np.log(p + 1e-10)) / np.log(len(w)) if len(w) > 1 else 0.5
    return float(ar), float(ent)

# Calculate returns
returns = np.log(prices).diff()
good_coverage = returns.notna().mean() >= 0.9
returns = returns.loc[:, good_coverage]
print(f"Using {returns.shape[1]} stocks with >90% coverage")

# Rolling structural metrics
window = 252
step = 5
lw = LedoitWolf()

struct = pd.DataFrame(index=returns.index, columns=['absorption_ratio', 'entropy'], dtype=float)

total_steps = (len(returns) - window) // step
print(f"\nCalculating AR + Entropy ({total_steps} steps)...")

for idx, t in enumerate(range(window, len(returns), step)):
    W = returns.iloc[t-window:t]
    W = W.loc[:, W.notna().mean() >= 0.9]
    if W.shape[1] < 100:
        continue
    W = W.apply(lambda s: s.fillna(s.mean()))
    X = W.values - np.nanmean(W.values, axis=0)
    try:
        C = cov_to_corr(lw.fit(X).covariance_)
    except:
        C = np.corrcoef(X, rowvar=False)
        C = np.nan_to_num((C + C.T) / 2)
    ar, ent = eig_metrics(C)
    struct.iloc[t] = [ar, ent]
    if (idx + 1) % 100 == 0:
        print(f"   {idx + 1}/{total_steps} ({(idx+1)/total_steps*100:.0f}%)")

struct = struct.ffill().bfill()
struct.index.name = 'date'
struct.to_csv(f'{WORK_DIR}/caria_structural_metrics.csv')

print(f"\n‚úÖ Structural metrics saved")
print(f"   AR mean: {struct['absorption_ratio'].mean():.4f}")
print(f"   Entropy mean: {struct['entropy'].mean():.4f}")

In [None]:
# @title 4. Merge Data and Calculate Signals

# Load from saved files (in case rerunning)
struct_df = pd.read_csv(f'{WORK_DIR}/caria_structural_metrics.csv', index_col='date', parse_dates=True)
market_df = pd.read_csv(f'{WORK_DIR}/market_validation_data.csv', index_col='Date', parse_dates=True)

# Merge
df = struct_df.join(market_df, how='inner').sort_index()

# Z-Score normalization
window_z = 252
rolling_mean = df['absorption_ratio'].rolling(window=window_z).mean()
rolling_std = df['absorption_ratio'].rolling(window=window_z).std()
df['absorp_z'] = (df['absorption_ratio'] - rolling_mean) / rolling_std

# Peak Memory (60 days) - THE KEY FEATURE
window_memory = 60
df['caria_peak'] = df['absorp_z'].rolling(window=window_memory).max()

# Future returns for prediction
df['ret_future'] = df['price'].pct_change(22).shift(-22)

df = df.dropna()
print(f"\n‚úÖ Dataset: {len(df)} observations")
print(f"   Period: {df.index.min().date()} to {df.index.max().date()}")

In [None]:
# @title Phase 8: Quantile Regression (Regime & Memory Test)

low_vol_df = df[df['volatility'] < 20].copy()
print(f"Testing on 'Calm Markets' (VIX < 20). N={len(low_vol_df)}")

# Model A: VIX Only
mod_vix = smf.quantreg('ret_future ~ volatility', low_vol_df)
res_vix = mod_vix.fit(q=0.05)

# Model B: VIX + Peak Memory
mod_struct = smf.quantreg('ret_future ~ volatility + caria_peak', low_vol_df)
res_struct = mod_struct.fit(q=0.05)

print(res_struct.summary())

print(f"\nBase Model (VIX Only) Pseudo R¬≤:      {res_vix.prsquared:.5f}")
print(f"Structural Model (+Peak) Pseudo R¬≤:   {res_struct.prsquared:.5f}")
imp = ((res_struct.prsquared - res_vix.prsquared)/res_vix.prsquared)*100
print(f"üî• Improvement in Low-Vol Regime:     {imp:.1f}%")

# Visualization
subset = df.loc['2019-01-01':'2020-06-01']
fig, ax1 = plt.subplots(figsize=(12, 6))
ax1.plot(subset.index, subset['volatility'], color='gray', linestyle='--', label='VIX')
ax1.set_ylabel('VIX', color='gray')
ax2 = ax1.twinx()
ax2.plot(subset.index, subset['absorp_z'], color='salmon', alpha=0.5, label='Original Signal')
ax2.plot(subset.index, subset['caria_peak'], color='darkred', linewidth=3, label='Peak Memory')
ax2.set_ylabel('Structure (Z-Score)', color='darkred')
plt.title('Peak Memory vs Original Signal (COVID Period)')
fig.legend(loc='upper left', bbox_to_anchor=(0.1, 0.9))
plt.tight_layout()
plt.savefig(f'{WORK_DIR}/figures/Figure_PeakMemory.png', dpi=300)
plt.show()

In [None]:
# @title Phase 9: Robustness Heatmap (FIXED)

windows = [20, 40, 60, 90, 120]
vix_caps = [15, 18, 20, 22, 25]
results_matrix = np.zeros((len(windows), len(vix_caps)))

print("Running Sensitivity Grid...")

# Pre-compute peak signals for all windows
for w in windows:
    df[f'peak_{w}'] = df['absorp_z'].rolling(window=w).max()

for i, w in enumerate(windows):
    for j, v in enumerate(vix_caps):
        subset = df[df['volatility'] < v].copy()
        subset['ret_future_local'] = subset['price'].pct_change(22).shift(-22)
        subset = subset.dropna()
        
        if len(subset) > 500:
            try:
                mod_base = smf.quantreg('ret_future_local ~ volatility', subset).fit(q=0.05)
                mod_struct = smf.quantreg(f'ret_future_local ~ volatility + peak_{w}', subset).fit(q=0.05)
                # FIXED: Use mod_struct and mod_base (not res_struct/res_base)
                imp = ((mod_struct.prsquared - mod_base.prsquared)/mod_base.prsquared) * 100
                results_matrix[i, j] = imp
            except:
                results_matrix[i, j] = 0

# Save table
sensitivity_df = pd.DataFrame(results_matrix, index=windows, columns=vix_caps)
sensitivity_df.to_csv(f'{WORK_DIR}/tables/Table_Sensitivity.csv')

# Visualization
plt.figure(figsize=(10, 8))
sns.heatmap(results_matrix, annot=True, fmt=".1f", cmap="RdYlGn", 
            xticklabels=vix_caps, yticklabels=windows)
plt.title("Robustness: Improvement in Tail Risk Prediction (%)")
plt.xlabel("VIX Threshold")
plt.ylabel("Memory Window (Days)")
plt.tight_layout()
plt.savefig(f'{WORK_DIR}/figures/Figure_RobustnessHeatmap.png', dpi=300)
plt.show()

In [None]:
# @title Structural Alpha Landscape (AR √ó Entropy)

pcts = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95]
results_grid = np.zeros((len(pcts), len(pcts)))

subset = df[df['volatility'] < 25].copy().dropna()
mod_base_alpha = smf.quantreg('ret_future ~ volatility', subset)
res_base_alpha = mod_base_alpha.fit(q=0.05)
r2_base = res_base_alpha.prsquared

print("Generating Heatmap...")
for i, s_pct in enumerate(pcts):
    s_thresh = subset['absorption_ratio'].quantile(s_pct)
    for j, e_pct in enumerate(pcts):
        e_thresh = subset['entropy'].quantile(e_pct)
        signal = ((subset['absorption_ratio'] > s_thresh) &
                  (subset['entropy'] < e_thresh)).astype(int)
        if signal.sum() > 10:
            try:
                subset_temp = subset.copy()
                subset_temp['signal'] = signal
                mod = smf.quantreg('ret_future ~ volatility + signal', subset_temp)
                res = mod.fit(q=0.05)
                imp = ((res.prsquared - r2_base) / r2_base) * 100
                results_grid[i, j] = imp
            except:
                results_grid[i, j] = 0

plt.figure(figsize=(10, 8))
x_lbls = [f"Bot {int(p*100)}%" for p in pcts]
y_lbls = [f"Top {int(p*100)}%" for p in pcts]
sns.heatmap(results_grid, annot=True, fmt=".0f", cmap="RdBu", center=0, vmin=-20, vmax=40,
            xticklabels=x_lbls, yticklabels=y_lbls)
plt.title('Structural Alpha Landscape (Blue = Signal Works)', fontsize=14, fontweight='bold')
plt.xlabel('Entropy (Low Diversity)', fontsize=12)
plt.ylabel('Synchronization (High Rigidity)', fontsize=12)
plt.gca().invert_yaxis()
plt.tight_layout()
plt.savefig(f'{WORK_DIR}/figures/Figure_StructuralAlpha.png', dpi=300)
plt.show()

In [None]:
# @title Phase 14: Risk Metrics + Bootstrap (FIXED)

def calc_risk_metrics(series, rf=0.04):
    rf_daily = (1 + rf)**(1/252) - 1
    excess_ret = series - rf_daily
    ann_ret = np.mean(series) * 252
    ann_vol = np.std(series) * np.sqrt(252)
    # FIXED: Use np.std(excess_ret) not np.std(series)
    sharpe = np.mean(excess_ret) / np.std(excess_ret) * np.sqrt(252) if np.std(excess_ret) > 0 else 0
    downside = excess_ret[excess_ret < 0]
    downside_std = np.std(downside) * np.sqrt(252)
    sortino = np.mean(excess_ret) / downside_std * np.sqrt(252) if downside_std > 0 else 0
    return ann_ret, ann_vol, sharpe, sortino

# Backtest setup
backtest_df = df.copy()
backtest_df['daily_ret'] = backtest_df['price'].pct_change()
backtest_df['treasury_daily_ret'] = (backtest_df['treasury_10y'] / 100) / 252

# Signal: Peak > 1.5 sigma = Unsafe
THRESHOLD = 1.5
backtest_df['unsafe_state'] = (backtest_df['caria_peak'].shift(1) > THRESHOLD)

# Strategies
backtest_df['strat_ret'] = np.where(backtest_df['unsafe_state'], 0, backtest_df['daily_ret'])
backtest_df['smart_ret'] = np.where(backtest_df['unsafe_state'], backtest_df['treasury_daily_ret'], backtest_df['daily_ret'])
leverage = 1.5
backtest_df['lev_ret'] = np.where(backtest_df['unsafe_state'], backtest_df['treasury_daily_ret'],
                                   backtest_df['daily_ret'] * leverage - (0.05/252 * (leverage-1)))

# Calculate metrics
metrics_bench = calc_risk_metrics(backtest_df['daily_ret'].dropna())
metrics_smart = calc_risk_metrics(backtest_df['smart_ret'].dropna())
metrics_lev = calc_risk_metrics(backtest_df['lev_ret'].dropna())

risk_table = pd.DataFrame({
    'Metric': ['Ann. Return', 'Ann. Volatility', 'Sharpe Ratio', 'Sortino Ratio'],
    'S&P 500': [f"{metrics_bench[0]:.1%}", f"{metrics_bench[1]:.1%}", f"{metrics_bench[2]:.2f}", f"{metrics_bench[3]:.2f}"],
    'Minsky Hedge': [f"{metrics_smart[0]:.1%}", f"{metrics_smart[1]:.1%}", f"{metrics_smart[2]:.2f}", f"{metrics_smart[3]:.2f}"],
    'Minsky 1.5x': [f"{metrics_lev[0]:.1%}", f"{metrics_lev[1]:.1%}", f"{metrics_lev[2]:.2f}", f"{metrics_lev[3]:.2f}"]
})

print("\n--- TABLE: ECONOMIC PERFORMANCE ---")
print(risk_table.to_string(index=False))
risk_table.to_csv(f'{WORK_DIR}/tables/Table_RiskMetrics.csv', index=False)

# Bootstrap (FIXED)
print("\nRunning Bootstrap (1000 iterations)...")
n_boot = 1000
improvements = []

boot_subset = df[df['volatility'] < 20].copy().dropna()

for i in range(n_boot):
    sample = boot_subset.sample(n=len(boot_subset), replace=True)
    try:
        mod_base = smf.quantreg('ret_future ~ volatility', sample).fit(q=0.05)
        mod_struct = smf.quantreg('ret_future ~ volatility + caria_peak', sample).fit(q=0.05)
        # FIXED: Use mod_struct.prsquared and mod_base.prsquared
        if mod_base.prsquared > 0:
            imp = (mod_struct.prsquared - mod_base.prsquared) / mod_base.prsquared
            improvements.append(imp)
    except:
        continue

mean_imp = np.mean(improvements)
ci_lower = np.percentile(improvements, 2.5)
ci_upper = np.percentile(improvements, 97.5)

print(f"\n--- BOOTSTRAP RESULTS ---")
print(f"Mean Improvement: {mean_imp:.1%}")
print(f"95% CI: [{ci_lower:.1%}, {ci_upper:.1%}]")
print(f"P(Improvement > 0): {np.mean(np.array(improvements) > 0):.1%}")

In [None]:
# @title Walk-Forward Cross-Validation

TRAIN_YEARS = 5
TEST_YEARS = 1
PURGE_DAYS = 60

def walk_forward_cv(df, train_years=5, test_years=1, purge_days=60):
    results = []
    train_days = train_years * 252
    test_days = test_years * 252
    n_folds = (len(df) - train_days - purge_days - test_days) // test_days
    print(f"Running {n_folds} walk-forward folds...")

    for fold in range(n_folds):
        train_start = fold * test_days
        train_end = train_start + train_days
        test_start = train_end + purge_days
        test_end = test_start + test_days
        if test_end > len(df):
            break
        train = df.iloc[train_start:train_end]
        test = df.iloc[test_start:test_end]
        test_lowvol = test[test['volatility'] < 20]
        if len(test_lowvol) < 50:
            continue
        try:
            mod_vix = smf.quantreg('ret_future ~ volatility', test_lowvol).fit(q=0.05)
            mod_peak = smf.quantreg('ret_future ~ volatility + caria_peak', test_lowvol).fit(q=0.05)
            r2_vix = mod_vix.prsquared
            r2_peak = mod_peak.prsquared
            improvement = (r2_peak - r2_vix) / r2_vix if r2_vix > 0.001 else r2_peak - r2_vix
            results.append({'fold': fold, 'r2_vix': r2_vix, 'r2_peak': r2_peak, 'improvement': improvement})
        except:
            pass
    return pd.DataFrame(results)

cv_results = walk_forward_cv(df, TRAIN_YEARS, TEST_YEARS, PURGE_DAYS)

print(f"\n{'='*60}")
print("WALK-FORWARD CROSS-VALIDATION RESULTS")
print(f"{'='*60}")
print(f"\nFolds completed: {len(cv_results)}")
print(f"\nMean R¬≤ (VIX only):      {cv_results['r2_vix'].mean():.5f}")
print(f"Mean R¬≤ (VIX + Peak):    {cv_results['r2_peak'].mean():.5f}")
print(f"Mean Improvement:        {cv_results['improvement'].mean():.1%}")
print(f"\nFolds where Peak > VIX:  {(cv_results['r2_peak'] > cv_results['r2_vix']).sum()}/{len(cv_results)}")
print(f"Win Rate:                {(cv_results['r2_peak'] > cv_results['r2_vix']).mean():.1%}")
cv_results.to_csv(f'{WORK_DIR}/tables/WalkForward_CV.csv', index=False)

In [None]:
# @title Permutation Test

def permutation_test(df, n_permutations=500):
    test_df = df[df['volatility'] < 20].copy()
    r2_vix_real = smf.quantreg('ret_future ~ volatility', test_df).fit(q=0.05).prsquared
    r2_peak_real = smf.quantreg('ret_future ~ volatility + caria_peak', test_df).fit(q=0.05).prsquared
    real_improvement = r2_peak_real - r2_vix_real
    perm_improvements = []
    print(f"Running {n_permutations} permutations...")
    for i in range(n_permutations):
        test_df_perm = test_df.copy()
        test_df_perm['caria_peak'] = np.random.permutation(test_df_perm['caria_peak'].values)
        try:
            r2_perm = smf.quantreg('ret_future ~ volatility + caria_peak', test_df_perm).fit(q=0.05).prsquared
            perm_improvements.append(r2_perm - r2_vix_real)
        except:
            pass
        if (i+1) % 100 == 0:
            print(f"   {i+1}/{n_permutations}")
    perm_improvements = np.array(perm_improvements)
    p_value = (perm_improvements >= real_improvement).mean()
    return {'real_improvement': real_improvement, 'perm_mean': perm_improvements.mean(),
            'perm_95th': np.percentile(perm_improvements, 95), 'p_value': p_value, 'significant': p_value < 0.05}

perm_results = permutation_test(df)

print(f"\n{'='*60}")
print("PERMUTATION TEST RESULTS")
print(f"{'='*60}")
print(f"\nReal Improvement (R¬≤):    {perm_results['real_improvement']:.5f}")
print(f"Random Mean:              {perm_results['perm_mean']:.5f}")
print(f"Random 95th Percentile:   {perm_results['perm_95th']:.5f}")
print(f"\nP-value:                  {perm_results['p_value']:.4f}")
print(f"Significant (p < 0.05):   {'‚úÖ YES' if perm_results['significant'] else '‚ùå NO'}")

In [None]:
# @title Figure 4: Economic Significance (Final Plot)

plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['font.family'] = 'serif'

# Cumulative returns
backtest_df['cum_bnh'] = (1 + backtest_df['daily_ret'].fillna(0)).cumprod()
backtest_df['cum_smart'] = (1 + backtest_df['smart_ret'].fillna(0)).cumprod()
backtest_df['cum_lev'] = (1 + backtest_df['lev_ret'].fillna(0)).cumprod()

plt.figure(figsize=(12, 7))
plt.plot(backtest_df.index, backtest_df['cum_bnh'], label='S&P 500 (Benchmark)', color='gray', alpha=0.4, linewidth=1.5)
plt.plot(backtest_df.index, backtest_df['cum_smart'], label='Minsky Hedge (Unlevered)', color='#08519c', linewidth=2.5)
plt.plot(backtest_df.index, backtest_df['cum_lev'], label='Minsky Levered (1.5x)', color='darkgreen', linewidth=2.0)
plt.fill_between(backtest_df.index, backtest_df['cum_lev'].min(), backtest_df['cum_lev'].max()*1.2,
                 where=backtest_df['unsafe_state'], color='#deebf7', alpha=0.4, label='Structural Risk Regime')
plt.yscale('log')
plt.title('Figure 4: Economic Significance (1990‚Äì2025)\nDecoupling from Crashes while Compounding Growth', fontsize=14, fontweight='bold')
plt.ylabel('Portfolio Wealth (Log Scale)', fontsize=12)
plt.xlabel('Year', fontsize=12)
plt.legend(loc='upper left', frameon=True, framealpha=0.9)
plt.grid(True, which='both', alpha=0.2)
plt.tight_layout()
plt.savefig(f'{WORK_DIR}/figures/Figure4_Economic.png', dpi=300)
plt.show()

In [None]:
# @title Final Summary

def get_max_drawdown(cumulative):
    return (cumulative / cumulative.cummax() - 1).min()

dd_bnh = get_max_drawdown(backtest_df['cum_bnh'])
dd_smart = get_max_drawdown(backtest_df['cum_smart'])
dd_lev = get_max_drawdown(backtest_df['cum_lev'])

print("\n" + "="*70)
print("üî¨ CARIA-SR VALIDATION SUMMARY (1990-2025)")
print("="*70)
print(f"\nüìä DATA: {len(df)} observations, {df.index.min().date()} to {df.index.max().date()}")
print(f"\nüî¨ QUANTILE REGRESSION (VIX < 20):")
print(f"   Improvement: {imp:.1f}%")
if len(cv_results) > 0:
    print(f"\nüìà WALK-FORWARD CV: {(cv_results['r2_peak'] > cv_results['r2_vix']).mean():.0%} Win Rate")
print(f"\nüìä PERMUTATION TEST: p = {perm_results['p_value']:.4f}")
print(f"\nüí∞ MINSKY HEDGE:")
print(f"   S&P 500 Max DD: {dd_bnh:.1%}")
print(f"   Minsky Max DD:  {dd_smart:.1%}")
print(f"   Reduction:      {dd_bnh - dd_smart:.1%}")
print(f"\nüìÅ Files saved to: {WORK_DIR}")
print("\n‚úÖ DONE!")