# GREAT CARIA - Meta-Fragility Composite + Complexity Validation

## Mejoras finales:
1. **Meta-Fragility Composite**: Se√±ales m√°s tempranas de cada m√≥dulo
2. **Surrogate Testing**: Null hypothesis testing
3. **Bifurcation Markers**: Detecci√≥n de puntos cr√≠ticos
4. **Bootstrap Confidence**: Intervalos de confianza

In [None]:
!pip install PyWavelets networkx -q

import pandas as pd
import numpy as np
from scipy import stats, signal
from scipy.ndimage import gaussian_filter1d
import pywt
import networkx as nx
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import warnings
warnings.filterwarnings('ignore')

from google.colab import drive
drive.mount('/content/drive')

MARKET_PATH = '/content/drive/MyDrive/CARIA/data/raw/yahoo_market.parquet'
df = pd.read_parquet(MARKET_PATH)
COUNTRIES = ['USA', 'CHN', 'JPN', 'DEU', 'GBR', 'FRA', 'BRA', 'MEX', 'KOR', 'AUS', 'IND', 'ZAF']
print(f'Data: {df.shape}')

In [None]:
# === BASE COMPUTATIONS (from moonshot) ===
idx_cols = [f'{c}_index' for c in COUNTRIES if f'{c}_index' in df.columns]
ret = df[idx_cols].pct_change().dropna()
ret.columns = [c.replace('_index', '') for c in ret.columns]

# Crisis Factor
def compute_cf(r, w=20):
    cf = []
    for i in range(w, len(r)):
        wr = r.iloc[i-w:i]
        c = wr.corr().values
        ac = (c.sum() - len(c)) / (len(c) * (len(c) - 1))
        cf.append(ac * wr.std().mean() * 100)
    return pd.Series(cf, index=r.index[w:])

CF = compute_cf(ret)

# Kuramoto sync
def extract_phase(series):
    detrended = series - gaussian_filter1d(series.values, sigma=60)
    analytic = signal.hilbert(detrended)
    return np.angle(analytic)

phases = pd.DataFrame({c: extract_phase(ret[c].fillna(0)) for c in ret.columns}, index=ret.index)

def kuramoto_order(phases, window=60):
    r = []
    for i in range(window, len(phases)):
        ph = phases.iloc[i].values
        r.append(np.abs(np.exp(1j * ph).mean()))
    return pd.Series(r, index=phases.index[window:])

sync_order = kuramoto_order(phases)

# EWS
def compute_ews(series, window=120):
    ac1 = series.rolling(window).apply(lambda x: x.autocorr(lag=1), raw=False)
    var = series.rolling(window).var()
    skew = series.rolling(window).skew()
    return pd.DataFrame({'ac1': ac1, 'variance': var, 'skewness': skew})

ews = compute_ews(CF)
print(f'Signals computed')

In [None]:

# === Additional Complexity Metrics (Curvature & TDA) ===
print('=== Additional Complexity Metrics ===')
import networkx as nx

# Rolling window parameters
window = 60
threshold = 0.7
curvature_series = pd.Series(index=ret.index, dtype=float)
tda_h1 = pd.Series(index=ret.index, dtype=float)
tda_loops = pd.Series(index=ret.index, dtype=float)

for date in ret.index:
    window_data = ret.loc[:date].tail(window)
    if len(window_data) < window:
        continue
    # correlation matrix
    corr = window_data.corr().fillna(0)
    # adjacency matrix for positive correlations above threshold
    adjacency = (corr.values > threshold).astype(int)
    G = nx.from_numpy_array(adjacency)
    # compute curvature proxy as average clustering coefficient
    try:
        curvature_series.loc[date] = nx.average_clustering(G)
    except:
        curvature_series.loc[date] = np.nan
    # compute loops via Euler characteristic (edges - nodes + components)
    num_nodes = G.number_of_nodes()
    num_edges = G.number_of_edges()
    num_components = nx.number_connected_components(G)
    loops = num_edges - num_nodes + num_components
    tda_loops.loc[date] = loops
    tda_h1.loc[date] = loops / max(num_nodes, 1)

# Combine into DataFrame
tda_df = pd.DataFrame({'h1_lifetime': tda_h1, 'num_loops': tda_loops})


---
# PART 1: META-FRAGILITY COMPOSITE

Combina las se√±ales M√ÅS TEMPRANAS de cada m√≥dulo

In [None]:
# === 1A: Identify earliest signals per module ===
print('=== Identifying Earliest Warning Signals ===')

# Define crisis events
CRISES = {
    'Lehman': pd.Timestamp('2008-09-15'),
    'COVID': pd.Timestamp('2020-03-11')
}

def find_lead_time(signal, crisis_date, threshold_pct=0.9):
    """Find how many days before crisis the signal crossed threshold"""
    threshold = signal.quantile(threshold_pct)
    pre_crisis = signal[signal.index < crisis_date].iloc[-120:]  # 6 months before
    
    # Find first crossing
    crossings = pre_crisis[pre_crisis > threshold]
    if len(crossings) > 0:
        first_cross = crossings.index[0]
        return (crisis_date - first_cross).days
    return 0

# Compute lead times for each signal
signals_to_test = {
    'CF': CF,
    'Sync': sync_order,
    'ACF1': ews['ac1'],
    'Variance': ews['variance'],
    'Skewness': ews['skewness'].abs(),
    'Curvature': 1 - curvature_series,
    'H1': tda_df['h1_lifetime'],
    'Loops': tda_df['num_loops']
}
lead_times = {}
for crisis_name, crisis_date in CRISES.items():
    lead_times[crisis_name] = {}
    for sig_name, sig in signals_to_test.items():
        try:
            lead = find_lead_time(sig.dropna(), crisis_date)
            lead_times[crisis_name][sig_name] = lead
        except:
            lead_times[crisis_name][sig_name] = 0

print('\nLead times (days before crisis):')
lead_df = pd.DataFrame(lead_times)
print(lead_df)

In [None]:
# === 1B: Create Meta-Fragility with optimal weights ===
print('
=== Meta-Fragility Composite ===')

# Weight signals by their average lead time
avg_leads = lead_df.mean(axis=1)
weights = avg_leads / avg_leads.sum()
print('Signal weights based on lead time:')
print(weights)

# Align all signals
common_idx = CF.dropna().index
for s in [sync_order, ews['ac1'], ews['variance'], ews['skewness'].abs(), 1 - curvature_series, tda_df['h1_lifetime'], tda_df['num_loops']]:
    common_idx = common_idx.intersection(s.dropna().index)

# Normalization helper
def normalize(s):
    return (s - s.min()) / (s.max() - s.min() + 1e-8)

# Compute normalized components (invert curvature to reflect fragility)
cf_norm      = normalize(CF.loc[common_idx])
sync_norm    = normalize(sync_order.loc[common_idx])
acf_norm     = normalize(ews['ac1'].loc[common_idx])
var_norm     = normalize(ews['variance'].loc[common_idx])
skew_norm    = normalize(ews['skewness'].abs().loc[common_idx])
curv_norm    = 1 - normalize(curvature_series.loc[common_idx])
h1_norm      = normalize(tda_df['h1_lifetime'].loc[common_idx])
loops_norm   = normalize(tda_df['num_loops'].loc[common_idx])

# Weighted meta-fragility composite
meta_fragility = (
    weights['CF']       * cf_norm +
    weights['Sync']     * sync_norm +
    weights['ACF1']     * acf_norm +
    weights['Variance'] * var_norm +
    weights['Skewness'] * skew_norm +
    weights['Curvature']* curv_norm +
    weights['H1']       * h1_norm +
    weights['Loops']    * loops_norm
)

print(f'
Meta-Fragility range: {meta_fragility.min():.3f} - {meta_fragility.max():.3f}')

In [None]:
# === 1C: Compare lead times ===

# Old IFI (equal weights)
ifi_equal = (
    0.2 * normalize(CF.loc[common_idx]) +
    0.2 * normalize(sync_order.loc[common_idx]) +
    0.2 * normalize(ews['ac1'].loc[common_idx]) +
    0.2 * normalize(ews['variance'].loc[common_idx]) +
    0.2 * normalize(ews['skewness'].abs().loc[common_idx])
)

# Compare lead times
print('Lead time comparison:')
for crisis_name, crisis_date in CRISES.items():
    lead_old = find_lead_time(ifi_equal.dropna(), crisis_date)
    lead_new = find_lead_time(meta_fragility.dropna(), crisis_date)
    print(f'{crisis_name}: IFI={lead_old}d, Meta={lead_new}d, improvement={lead_new-lead_old}d')

---
# PART 2: SURROGATE TESTING (Null Hypothesis)

In [None]:
# === 2A: Phase randomization surrogate ===
print('=== Surrogate Testing ===')

def phase_surrogate(series):
    """Create phase-randomized surrogate preserving spectrum"""
    n = len(series)
    fft = np.fft.rfft(series)
    random_phases = np.exp(1j * np.random.uniform(0, 2*np.pi, len(fft)))
    # Keep DC and Nyquist real
    random_phases[0] = 1
    if n % 2 == 0:
        random_phases[-1] = 1
    surrogate = np.fft.irfft(fft * random_phases, n)
    return surrogate

# Generate surrogates for each country
N_SURROGATES = 100
surrogate_syncs = []

print(f'Generating {N_SURROGATES} surrogates...')
for i in tqdm(range(N_SURROGATES)):
    # Randomize phases for each country
    surrogate_ret = pd.DataFrame({
        c: phase_surrogate(ret[c].fillna(0).values)
        for c in ret.columns
    }, index=ret.index)
    
    # Compute sync for surrogate
    surr_phases = pd.DataFrame({
        c: extract_phase(surrogate_ret[c])
        for c in surrogate_ret.columns
    })
    surr_sync = kuramoto_order(surr_phases)
    surrogate_syncs.append(surr_sync.max())  # Max sync under null

# Compare
real_max_sync = sync_order.max()
surr_mean = np.mean(surrogate_syncs)
surr_std = np.std(surrogate_syncs)
z_score = (real_max_sync - surr_mean) / (surr_std + 1e-8)
p_value = 1 - stats.norm.cdf(z_score)

print(f'\nSynchronization null test:')
print(f'  Real max sync: {real_max_sync:.3f}')
print(f'  Surrogate mean: {surr_mean:.3f} ¬± {surr_std:.3f}')
print(f'  Z-score: {z_score:.2f}')
print(f'  p-value: {p_value:.4f}')
print(f'  ‚úì SIGNIFICANT (p<0.05)' if p_value < 0.05 else '  ‚úó NOT SIGNIFICANT')

In [None]:
# === 2B: Shuffle surrogate for ACF1 ===
print('\n=== ACF1 Null Test ===')

# Under null: CF is random walk, ACF1 should be ~1 anyway
# Better test: does ACF1 increase BEFORE crises more than random?

def pre_crisis_increase(signal, crisis_date, pre_days=60):
    """Measure increase in signal before crisis"""
    pre_60 = signal.loc[(signal.index < crisis_date) & 
                        (signal.index > crisis_date - pd.Timedelta(days=pre_days))].mean()
    pre_180 = signal.loc[(signal.index < crisis_date - pd.Timedelta(days=pre_days)) & 
                         (signal.index > crisis_date - pd.Timedelta(days=180))].mean()
    return pre_60 - pre_180

# Real increases
real_increases = []
for crisis_date in CRISES.values():
    try:
        inc = pre_crisis_increase(ews['ac1'].dropna(), crisis_date)
        real_increases.append(inc)
    except:
        pass

real_avg_increase = np.mean(real_increases)

# Surrogate: random dates
surr_increases = []
valid_dates = ews['ac1'].dropna().index[180:-60]
for _ in range(500):
    random_date = np.random.choice(valid_dates)
    inc = pre_crisis_increase(ews['ac1'].dropna(), random_date)
    surr_increases.append(inc)

surr_mean = np.mean(surr_increases)
surr_std = np.std(surr_increases)
z_score = (real_avg_increase - surr_mean) / (surr_std + 1e-8)
p_value = 1 - stats.norm.cdf(z_score)

print(f'Pre-crisis ACF1 increase test:')
print(f'  Real increase: {real_avg_increase:.4f}')
print(f'  Random dates: {surr_mean:.4f} ¬± {surr_std:.4f}')
print(f'  Z-score: {z_score:.2f}')
print(f'  p-value: {p_value:.4f}')
print(f'  ‚úì SIGNIFICANT' if p_value < 0.05 else '  ‚úó NOT SIGNIFICANT')

---
# PART 3: BIFURCATION MARKERS

In [None]:
# === 3A: Potential analysis (bifurcation detection) ===
print('=== Bifurcation Detection ===')

def estimate_potential(series, window=120, n_bins=50):
    """Estimate potential function from time series"""
    potentials = []
    dates = []
    
    for i in range(window, len(series), window//2):
        window_data = series.iloc[i-window:i].values
        
        # Histogram -> probability
        hist, bin_edges = np.histogram(window_data, bins=n_bins, density=True)
        bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
        
        # Potential = -log(probability)
        potential = -np.log(hist + 1e-10)
        
        # Number of local minima (stable states)
        from scipy.signal import find_peaks
        minima, _ = find_peaks(-potential)
        n_states = len(minima)
        
        # Bimodality coefficient
        skew = stats.skew(window_data)
        kurt = stats.kurtosis(window_data)
        bimodality = (skew**2 + 1) / (kurt + 3)
        
        potentials.append({'n_states': n_states, 'bimodality': bimodality})
        dates.append(series.index[i])
    
    return pd.DataFrame(potentials, index=dates)

potential_analysis = estimate_potential(CF)
print(f'Potential analysis: {len(potential_analysis)} windows')

In [None]:
# === 3B: Flickering detection ===
print('\n=== Flickering Detection ===')

def detect_flickering(series, window=60, threshold_std=1.5):
    """Detect rapid oscillations (flickering) before bifurcation"""
    # Number of sign changes in derivative
    diff = series.diff()
    sign_changes = (diff * diff.shift(1) < 0).rolling(window).sum()
    
    # High flickering = more sign changes than normal
    mean_changes = sign_changes.mean()
    std_changes = sign_changes.std()
    flickering = (sign_changes - mean_changes) / (std_changes + 1e-8)
    
    return flickering

flickering = detect_flickering(CF)

# Check pre-crisis flickering
for crisis_name, crisis_date in CRISES.items():
    try:
        pre_flicker = flickering.loc[(flickering.index < crisis_date) & 
                                     (flickering.index > crisis_date - pd.Timedelta(days=60))].mean()
        print(f'{crisis_name}: pre-crisis flickering z-score = {pre_flicker:.2f}')
    except:
        pass

In [None]:
# === 3C: Combined bifurcation indicator ===

# Combine signals that indicate approaching bifurcation
bifurcation_indicator = pd.DataFrame(index=flickering.dropna().index)
common = bifurcation_indicator.index.intersection(ews['ac1'].dropna().index)
common = common.intersection(potential_analysis.index)

bifurcation_indicator = pd.DataFrame({
    'flickering': flickering.loc[common],
    'ac1': ews['ac1'].loc[common],
    'variance': ews['variance'].loc[common],
    'bimodality': potential_analysis['bimodality'].reindex(common, method='ffill')
}).dropna()

# Normalize and combine
for col in bifurcation_indicator.columns:
    bifurcation_indicator[col] = normalize(bifurcation_indicator[col])

bifurcation_score = bifurcation_indicator.mean(axis=1)

print(f'Bifurcation score range: {bifurcation_score.min():.3f} - {bifurcation_score.max():.3f}')

---
# PART 4: BOOTSTRAP CONFIDENCE INTERVALS

In [None]:
# === 4A: Bootstrap prediction accuracy ===
print('=== Bootstrap Validation ===')

# Target: market drawdown
sp500_ret = df['USA_index'].pct_change()
sp500_20d = sp500_ret.rolling(20).sum().shift(-20)
crisis_target = (sp500_20d < -0.05).astype(int)

# Align
common = meta_fragility.index.intersection(crisis_target.dropna().index)
X = meta_fragility.loc[common].values.reshape(-1, 1)
y = crisis_target.loc[common].values

# Bootstrap
N_BOOTSTRAP = 100
PURGE = 30
bootstrap_accs = []
bootstrap_aucs = []

n = len(X)
train_size = int(n * 0.7)

for _ in tqdm(range(N_BOOTSTRAP)):
    # Bootstrap sample from training data
    boot_idx = np.random.choice(train_size, train_size, replace=True)
    X_train = X[boot_idx]
    y_train = y[boot_idx]
    
    # Fixed test set (after purge)
    X_test = X[train_size + PURGE:]
    y_test = y[train_size + PURGE:]
    
    if len(np.unique(y_train)) < 2 or len(X_test) == 0:
        continue
    
    lr = LogisticRegression(max_iter=1000).fit(X_train, y_train)
    y_pred = lr.predict(X_test)
    y_prob = lr.predict_proba(X_test)[:, 1]
    
    bootstrap_accs.append(accuracy_score(y_test, y_pred))
    try:
        bootstrap_aucs.append(roc_auc_score(y_test, y_prob))
    except:
        pass

# Confidence intervals
acc_ci = np.percentile(bootstrap_accs, [2.5, 97.5])
auc_ci = np.percentile(bootstrap_aucs, [2.5, 97.5]) if bootstrap_aucs else [0, 0]

print(f'\nAccuracy: {np.mean(bootstrap_accs):.1%} (95% CI: {acc_ci[0]:.1%} - {acc_ci[1]:.1%})')
print(f'AUC: {np.mean(bootstrap_aucs):.3f} (95% CI: {auc_ci[0]:.3f} - {auc_ci[1]:.3f})')

---
# PART 5: FINAL VALIDATION SUMMARY

In [None]:
# === Plot everything ===

fig, axes = plt.subplots(4, 1, figsize=(14, 16), sharex=True)

# Meta-Fragility
axes[0].fill_between(meta_fragility.index, meta_fragility.values, alpha=0.3, color='red')
axes[0].plot(meta_fragility.index, meta_fragility.values, 'r-', linewidth=1)
axes[0].set_ylabel('Meta-Fragility')
axes[0].set_title('Meta-Fragility Composite (weighted by lead time)')

# Bifurcation score
axes[1].fill_between(bifurcation_score.index, bifurcation_score.values, alpha=0.3, color='purple')
axes[1].plot(bifurcation_score.index, bifurcation_score.values, 'purple', linewidth=1)
axes[1].set_ylabel('Bifurcation Score')
axes[1].set_title('Bifurcation Indicator (flickering + AC1 + variance + bimodality)')

# Flickering
axes[2].plot(flickering.index, flickering.values, 'orange', alpha=0.7)
axes[2].axhline(2, color='red', linestyle='--', label='High flickering')
axes[2].set_ylabel('Flickering (z-score)')
axes[2].set_title('Flickering Detector')
axes[2].legend()

# S&P 500
sp500 = df['USA_index'].loc[meta_fragility.index].dropna()
axes[3].plot(sp500.index, sp500.values, 'b-', alpha=0.7)
axes[3].set_ylabel('S&P 500')
axes[3].set_yscale('log')

# Mark crises
for ax in axes:
    for name, date in CRISES.items():
        ax.axvline(date, color='red', alpha=0.5, linestyle='--')

plt.tight_layout()
plt.savefig('/content/drive/MyDrive/CARIA/models/meta_fragility_validation.png', dpi=150)
plt.show()

In [None]:
# === FINAL SUMMARY ===
print('\n' + '='*70)
print('GREAT CARIA - META-FRAGILITY COMPOSITE + COMPLEXITY VALIDATION')
print('='*70)

print('\nüìä SIGNAL WEIGHTS (by lead time):')
for sig, w in weights.items():
    lead = avg_leads[sig]
    print(f'  {sig:12s}: weight={w:.2f} (avg lead={lead:.0f}d)')

print('\nüî¨ SURROGATE TESTS:')
print(f'  Synchronization: z={z_score:.2f}, p<0.05 = {p_value < 0.05}')

print('\nüìà BOOTSTRAP CONFIDENCE:')
print(f'  Accuracy: {np.mean(bootstrap_accs):.1%} [{acc_ci[0]:.1%} - {acc_ci[1]:.1%}]')
print(f'  AUC: {np.mean(bootstrap_aucs):.3f} [{auc_ci[0]:.3f} - {auc_ci[1]:.3f}]')

print('\nüåã BIFURCATION DETECTION:')
print(f'  Bifurcation score pre-Lehman: {bifurcation_score.loc[(bifurcation_score.index < CRISES["Lehman"]) & (bifurcation_score.index > CRISES["Lehman"] - pd.Timedelta(days=60))].mean():.3f}')
print(f'  Bifurcation score pre-COVID: {bifurcation_score.loc[(bifurcation_score.index < CRISES["COVID"]) & (bifurcation_score.index > CRISES["COVID"] - pd.Timedelta(days=60))].mean():.3f}')

print('\n' + '='*70)
print('STATUS: Ready for frontend integration')
print('='*70)

In [None]:
# === EXPORT FOR FRONTEND ===
import json

export = {
    'version': 'Great Caria Meta-Fragility v1.0',
    'generated': pd.Timestamp.now().isoformat(),
    'current': {
        'meta_fragility': float(meta_fragility.iloc[-1]),
        'bifurcation_score': float(bifurcation_score.iloc[-1]),
        'flickering': float(flickering.iloc[-1]),
        'acf1': float(ews['ac1'].iloc[-1]),
        'sync_order': float(sync_order.iloc[-1])
    },
    'thresholds': {
        'warning': float(meta_fragility.quantile(0.8)),
        'critical': float(meta_fragility.quantile(0.95))
    },
    'weights': weights.to_dict(),
    'validation': {
        'accuracy_mean': float(np.mean(bootstrap_accs)),
        'accuracy_ci_low': float(acc_ci[0]),
        'accuracy_ci_high': float(acc_ci[1]),
        'auc_mean': float(np.mean(bootstrap_aucs)),
        'surrogate_p_value': float(p_value)
    },
    'history': [
        {'date': d.isoformat(), 'mf': float(meta_fragility.loc[d]), 'bf': float(bifurcation_score.loc[d]) if d in bifurcation_score.index else 0}
        for d in meta_fragility.index[-252:]
    ]
}

with open('/content/drive/MyDrive/CARIA/models/meta_fragility_export.json', 'w') as f:
    json.dump(export, f, indent=2)

print('\n‚úì Exported: meta_fragility_export.json')
print('\nDownload this file and place in: Caria_repo/caria/models/')