# CARIA: Publication Master (The Definitive Validation)

## Abstract
This notebook generates the final empirical evidence for the CARIA model as a **Seminal Contribution** to Financial Physics.

We rigorously benchmark **Smart CARIA (Synchronization + Trend)** against:
- **Buy & Hold** (Baseline)
- **Volatility Targeting** (VIX Proxy) - The standard "Risk Parity" approach.
- **Naive Synchronization** (Great Caria v3) - Showing why "Structure" alone isn't enough.

## The Physics Engine: Structural Momentum ($p = m \times v$)
We implement the **Vector Physics** model defined in the Technical Report:
1.  **Velocity ($v$)**: Market Volatility (Speed of price change).
2.  **Mass ($m$)**: Social Synchronization ($r$).
    - Calculated by decomposing **Raw Returns** (Vector) into time scales and measuring Phase Alignment.
    - *Critial Change*: We use Raw Returns, not Abs(Returns), to preserve the directional/cyclical nature of the "Clocks".
3.  **Momentum ($p$)**: $p = r \times v$ (The weight of the crash).
4.  **Regime Filter**: Distinguishing Bubble ($r$ High, Trend Up) vs Crash ($r$ High, Trend Down).

In [None]:
!pip install yfinance PyWavelets scikit-learn numpy pandas scipy matplotlib seaborn -q
import pandas as pd
import numpy as np
import yfinance as yf
from scipy import stats, signal
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import matthews_corrcoef, f1_score, roc_auc_score, precision_score, recall_score, confusion_matrix
from sklearn.feature_selection import mutual_info_regression

# === I. DATA LOADER ===
def fetch_data(ticker, start='2005-01-01'):
    df = yf.download(ticker, start=start, progress=False)
    if isinstance(df.columns, pd.MultiIndex): df = df.xs('Close', axis=1, level=0)
    return df.iloc[:, 0].dropna() if isinstance(df, pd.DataFrame) else df.dropna()

print("Fetching Market Data...")
sp500 = fetch_data('^GSPC')
vix = fetch_data('^VIX') 
bond = fetch_data('TLT')
gold = fetch_data('GC=F')
print("Data Ready.")

In [None]:
# === II. VECTOR PHYSICS ENGINE ===
SCALES = {
    'fast':   {'window': 5,   'weight': 0.15},
    'medium': {'window': 20,  'weight': 0.35}, 
    'slow':   {'window': 60, 'weight': 0.25},
    'macro':  {'window': 200, 'weight': 0.25}
}

def calculate_vector_physics(price_series):
    # 1. Input Vector: Raw Returns (Preserving Direction/Phase)
    returns = price_series.pct_change().dropna()
    
    # 2. Decomposition (Bandpass on Raw Signal)
    bands = {}
    sorted_scales = sorted(SCALES.items(), key=lambda x: x[1]['window'])
    
    # We use the raw signal to decompose distinct time-horizon CYCLES ( Oscillations )
    signal_in = returns
    
    for i, (name, config) in enumerate(sorted_scales):
        w = config['window']
        # Bandpass Logic: Moving Average(t) - Moving Average(t-1 scale)
        # Using centered windows or carefully lagged ones to avoid lookahead
        # Here we use standard rolling means. 
        # Note: Returns are already detrended prices, effectively.
        smooth = signal_in.rolling(w).mean()
        if i < len(sorted_scales) - 1:
            next_w = sorted_scales[i+1][1]['window']
            next_smooth = signal_in.rolling(next_w).mean()
            bands[name] = smooth - next_smooth
        else:
            bands[name] = smooth
            
    bands_df = pd.DataFrame(bands).dropna()
    
    # 3. Phase Extraction (Hilbert on Cycles)
    phases = {}
    for col in bands_df.columns:
        series = bands_df[col].values
        # Hilbert requires oscillation around 0. Our bands (diff of means) naturally do this.
        analytic = signal.hilbert(series)
        phases[col] = np.angle(analytic)
    
    phases_df = pd.DataFrame(phases, index=bands_df.index)
    
    # 4. Synchronization (The Mass term 'm')
    # r = |mean(e^i*phi)|
    complex_phases = np.exp(1j * phases_df)
    kuramoto_r = np.abs(complex_phases.mean(axis=1))
    SYNC = pd.Series(kuramoto_r, index=phases_df.index, name='Sync')
    
    # 5. Volatility (The Velocity term 'v')
    # We use a reactive volatility metric (e.g., 20d standard deviation)
    VOL = returns.rolling(20).std() * np.sqrt(252)
    # Align indices
    VOL = VOL.reindex(SYNC.index).fillna(0)
    
    # 6. Structural Momentum (p = m * v)
    # Fragility = Sync * Volatility
    # This is the "Weight of the Falling Anvil"
    MOMENTUM = SYNC * VOL
    
    # 7. Entropy (H)
    def calc_entropy(row):
        counts, _ = np.histogram(row, bins=8, range=(-np.pi, np.pi), density=True)
        counts = counts[counts > 0]
        return -np.sum(counts * np.log(counts))
    entropy = phases_df.apply(calc_entropy, axis=1)
    
    return pd.DataFrame({
        'Price': price_series,
        'Returns': returns,
        'Sync': SYNC,
        'Volatility': VOL,
        'Momentum': MOMENTUM, # The Core Physics Metric
        'Entropy': entropy
    }).dropna()

In [None]:
# === III. VALIDATION & BENCHMARKS ===

def run_vector_validation(name, df):
    data = df.copy()
    
    # --- Signals ---
    # 1. Volatility Only (Traditional Risk)
    vol_thresh = data['Volatility'].rolling(252).quantile(0.8)
    data['Signal_Vol'] = np.where(data['Volatility'] > vol_thresh, 0, 1)
    
    # 2. Naive CARIA (Sync Only) - The "Consensus" detector
    # Using MOMENTUM (Sync*Vol) as the metric, but ignoring direction/trend
    mom_thresh = data['Momentum'].rolling(252).quantile(0.8)
    data['Signal_Naive'] = np.where(data['Momentum'] > mom_thresh, 0, 1)
    
    # 3. Smart CARIA (Vector Physics)
    # Signal = Momentum(High) AND Trend(Down)
    trend = data['Price'].pct_change(20)
    is_heavy = data['Momentum'] > mom_thresh # High Mass * Speed
    is_crash_vector = trend < 0 # Downward direction
    
    # If Heavy AND Crash Vector -> EXIT (0). Else -> STAY (1)
    # Note: If Heavy AND Up Vector (Bubble) -> STAY
    data['Signal_Smart'] = np.where(is_heavy & is_crash_vector, 0, 1)
    data['Signal_BH'] = 1

    # Lag adjustment: Trade at Close of Signal Day (effectively next day's return)
    for col in ['Signal_BH', 'Signal_Vol', 'Signal_Naive', 'Signal_Smart']:
        data[col] = data[col].shift(1).fillna(1)
        
    return data.dropna()

def calc_stats_final(model, returns, signals, price_data):
    cum = (1 + returns).cumprod()
    ret_total = cum.iloc[-1] - 1
    sharpe = returns.mean()/returns.std()*np.sqrt(252)
    dd = (cum/cum.cummax()) - 1
    
    # Classification
    y_true = (price_data.pct_change(20).shift(-20) < -0.05).astype(int)
    y_pred = (signals == 0).astype(int)
    valid = y_true.notna() & y_pred.notna()
    yt, yp = y_true[valid], y_pred[valid]
    
    return {
        'Model': model,
        'Sharpe': sharpe,
        'Return': f"{ret_total*100:.0f}%",
        'MaxDD': dd.min(),
        'AUC': roc_auc_score(yt, yp) if len(np.unique(yt))>1 else 0.5,
        'MCC': matthews_corrcoef(yt, yp),
        'F1': f1_score(yt, yp)
    }

# === RUN ENGINE ===
assets = {'S&P 500': sp500, 'Bitcoin': btc, 'TLT': tlt, 'Gold': gold}

for name, asset_data in assets.items():
    if asset_data.empty: continue
    print(f"\n=== {name} (Vector Physics) ===")
    df_phys = calculate_vector_physics(asset_data)
    res = run_vector_validation(name, df_phys)
    
    stats_list = []
    for strat in ['Buy & Hold', 'Vol Only', 'Naive CARIA', 'Smart CARIA']:
        sig_map = {'Buy & Hold':'Signal_BH', 'Vol Only':'Signal_Vol', 
                   'Naive CARIA':'Signal_Naive', 'Smart CARIA':'Signal_Smart'}
        strat_ret = res['Returns'] * res[sig_map[strat]]
        stats_list.append(calc_stats_final(strat, strat_ret, res[sig_map[strat]], res['Price']))
        
    print(pd.DataFrame(stats_list).set_index('Model'))
    
    mi = mutual_info_regression(res[['Sync']].values, res['Returns'].shift(-20).fillna(0))
    print(f"Mutual Info (Sync->Returns): {mi[0]:.4f}")