# GREAT CARIA MOONSHOT - Extended Economic Space

## Components:
1. **Alternative Data**: EPU, GPR, GDELT sentiment, Google Trends
2. **Multi-scale Analysis**: Wavelets, cross-correlation by frequency
3. **Kuramoto Synchronization**: Phase coherence as crisis precursor
4. **Ricci Curvature**: Network geometry for fragility
5. **Topological Persistence**: Homology for early warning
6. **Critical Transitions**: Slowing down, variance signals

In [None]:
!pip install pywt networkx GraphRicciCurvature gudhi -q

import pandas as pd
import numpy as np
from scipy import stats, signal
from scipy.ndimage import gaussian_filter1d
import pywt
import networkx as nx
import gudhi
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import warnings
warnings.filterwarnings('ignore')

from google.colab import drive
drive.mount('/content/drive')

MARKET_PATH = '/content/drive/MyDrive/CARIA/data/raw/yahoo_market.parquet'
df = pd.read_parquet(MARKET_PATH)
COUNTRIES = ['USA', 'CHN', 'JPN', 'DEU', 'GBR', 'FRA', 'BRA', 'MEX', 'KOR', 'AUS', 'IND', 'ZAF']
print(f'Data: {df.shape}')

In [None]:
# === BASE DATA ===
idx_cols = [f'{c}_index' for c in COUNTRIES if f'{c}_index' in df.columns]
ret = df[idx_cols].pct_change().dropna()
ret.columns = [c.replace('_index', '') for c in ret.columns]

# Crisis Factor
def compute_cf(r, w=20):
    cf = []
    for i in range(w, len(r)):
        wr = r.iloc[i-w:i]
        c = wr.corr().values
        ac = (c.sum() - len(c)) / (len(c) * (len(c) - 1))
        cf.append(ac * wr.std().mean() * 100)
    return pd.Series(cf, index=r.index[w:])

CF = compute_cf(ret)
print(f'CF: {len(CF)}')

---
# PART 1: ALTERNATIVE DATA SOURCES

In [None]:
# === 1A: Create proxy indicators from available data ===
print('=== Alternative Data Proxies ===')

# VIX as fear/uncertainty proxy (already have)
vix = df['VIX'].loc[CF.index]

# DXY as dollar stress proxy
dxy = df['DXY'].loc[CF.index]

# Oil volatility as geopolitical proxy
oil_vol = df['Oil'].pct_change().rolling(20).std().loc[CF.index] * 100

# Gold/Oil ratio as risk-off proxy
gold_oil = (df['Gold'] / df['Oil']).loc[CF.index]

# EM stress: avg EM FX volatility
em_fx_cols = [c for c in df.columns if c.endswith('_fx') and c.split('_')[0] in ['BRA', 'MEX', 'ZAF', 'IND']]
if em_fx_cols:
    em_stress = df[em_fx_cols].pct_change().rolling(20).std().mean(axis=1).loc[CF.index] * 100
else:
    em_stress = pd.Series(0, index=CF.index)

# Dispersion index: std of country returns (low = herding)
dispersion = ret.std(axis=1).loc[CF.index]

# Combine into alternative data frame
alt_data = pd.DataFrame({
    'vix': vix,
    'dxy': dxy,
    'oil_vol': oil_vol,
    'gold_oil': gold_oil,
    'em_stress': em_stress,
    'dispersion': dispersion,
    'cf': CF
}).dropna()

print(f'Alternative data: {alt_data.shape}')
print(alt_data.corr()['cf'].sort_values())

---
# PART 2: MULTI-SCALE WAVELET ANALYSIS

In [None]:
# === 2A: Wavelet decomposition ===
print('=== Wavelet Multi-scale Analysis ===')

def wavelet_decompose(series, wavelet='db4', level=5):
    """Decompose into frequency bands"""
    coeffs = pywt.wavedec(series.values, wavelet, level=level)
    # Reconstruct each level
    reconstructed = {}
    for i in range(level + 1):
        coeffs_copy = [np.zeros_like(c) for c in coeffs]
        coeffs_copy[i] = coeffs[i]
        rec = pywt.waverec(coeffs_copy, wavelet)[:len(series)]
        reconstructed[f'level_{i}'] = rec
    return reconstructed

# Decompose CF
cf_waves = wavelet_decompose(CF)

# Plot
fig, axes = plt.subplots(3, 2, figsize=(14, 10))
for i, (name, wave) in enumerate(cf_waves.items()):
    if i < 6:
        ax = axes[i // 2, i % 2]
        ax.plot(CF.index[:len(wave)], wave[:len(CF)], alpha=0.7)
        ax.set_title(f'{name} (frequency band {i})')
plt.suptitle('Crisis Factor Wavelet Decomposition')
plt.tight_layout()
plt.show()

In [None]:
# === 2B: Cross-correlation by frequency band ===
print('\n=== Cross-correlation by Frequency ===')

# Decompose each country
country_waves = {}
for c in ret.columns:
    country_waves[c] = wavelet_decompose(ret[c].fillna(0))

# Compute correlation networks at each scale
scale_networks = {}
for level in range(6):
    corr_matrix = np.zeros((len(ret.columns), len(ret.columns)))
    for i, c1 in enumerate(ret.columns):
        for j, c2 in enumerate(ret.columns):
            w1 = country_waves[c1][f'level_{level}']
            w2 = country_waves[c2][f'level_{level}']
            min_len = min(len(w1), len(w2))
            corr_matrix[i, j] = np.corrcoef(w1[:min_len], w2[:min_len])[0, 1]
    scale_networks[level] = corr_matrix
    avg_corr = (corr_matrix.sum() - len(corr_matrix)) / (len(corr_matrix) * (len(corr_matrix) - 1))
    print(f'Level {level}: avg correlation = {avg_corr:.3f}')

---
# PART 3: KURAMOTO SYNCHRONIZATION

In [None]:
# === 3A: Phase extraction via Hilbert transform ===
print('=== Kuramoto Synchronization Analysis ===')

def extract_phase(series):
    """Extract instantaneous phase using Hilbert transform"""
    # Detrend
    detrended = series - gaussian_filter1d(series.values, sigma=60)
    # Hilbert transform
    analytic = signal.hilbert(detrended)
    phase = np.angle(analytic)
    return phase

# Extract phases for each country
phases = pd.DataFrame({c: extract_phase(ret[c].fillna(0)) for c in ret.columns}, index=ret.index)

# Kuramoto order parameter r(t)
def kuramoto_order_param(phases, window=60):
    """Compute synchronization order parameter"""
    r = []
    for i in range(window, len(phases)):
        ph = phases.iloc[i].values
        # r = |<e^{i*theta}>|
        complex_phases = np.exp(1j * ph)
        r_t = np.abs(complex_phases.mean())
        r.append(r_t)
    return pd.Series(r, index=phases.index[window:])

sync_order = kuramoto_order_param(phases, window=60)
print(f'Sync order range: {sync_order.min():.3f} - {sync_order.max():.3f}')

In [None]:
# === 3B: Plot synchronization vs crises ===

fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)

# Sync order
axes[0].plot(sync_order.index, sync_order.values, 'b-', alpha=0.7)
axes[0].axhline(sync_order.quantile(0.9), color='red', linestyle='--', label='90th percentile')
axes[0].set_ylabel('Kuramoto r(t)')
axes[0].set_title('Global Market Synchronization')
axes[0].legend()

# Mark crises
for name, date in [('Lehman', '2008-09-15'), ('COVID', '2020-03-11')]:
    axes[0].axvline(pd.Timestamp(date), color='red', alpha=0.5)
    axes[1].axvline(pd.Timestamp(date), color='red', alpha=0.5)

# VIX for comparison
axes[1].plot(vix.index, vix.values, 'orange', alpha=0.7)
axes[1].set_ylabel('VIX')
axes[1].set_title('VIX (for comparison)')

plt.tight_layout()
plt.show()

# Correlation sync vs VIX
common = sync_order.index.intersection(vix.index)
corr_sync_vix = sync_order.loc[common].corr(vix.loc[common])
print(f'\nSync-VIX correlation: {corr_sync_vix:.3f}')

---
# PART 4: RICCI CURVATURE

In [None]:
# === 4A: Ollivier-Ricci curvature on correlation network ===
print('=== Ricci Curvature Analysis ===')

try:
    from GraphRicciCurvature.OllivierRicci import OllivierRicci
    HAS_RICCI = True
except:
    print('GraphRicciCurvature not available, using simplified version')
    HAS_RICCI = False

def compute_network_curvature(returns, window=60):
    """Compute average Ricci curvature of correlation network"""
    corr = returns.corr().values
    corr = np.clip(corr, -1, 1)
    dist = np.sqrt(2 * (1 - corr))
    np.fill_diagonal(dist, 0)
    
    # Create graph
    G = nx.Graph()
    n = len(returns.columns)
    for i in range(n):
        for j in range(i+1, n):
            if dist[i,j] < 1.5:  # Only close connections
                G.add_edge(i, j, weight=1/(dist[i,j]+0.01))
    
    if HAS_RICCI and G.number_of_edges() > 0:
        try:
            orc = OllivierRicci(G, alpha=0.5)
            orc.compute_ricci_curvature()
            curvatures = [d['ricciCurvature'] for _, _, d in orc.G.edges(data=True)]
            return np.mean(curvatures) if curvatures else 0
        except:
            pass
    
    # Simplified: use clustering coefficient as proxy
    if G.number_of_nodes() > 2:
        return nx.average_clustering(G)
    return 0

# Compute rolling curvature
curvatures = []
dates = []
window = 60
step = 20

for i in tqdm(range(window, len(ret), step)):
    window_ret = ret.iloc[i-window:i]
    curv = compute_network_curvature(window_ret)
    curvatures.append(curv)
    dates.append(ret.index[i])

curvature_series = pd.Series(curvatures, index=dates)
print(f'Curvature range: {curvature_series.min():.3f} - {curvature_series.max():.3f}')

In [None]:
# === 4B: Plot curvature vs crises ===

fig, ax = plt.subplots(figsize=(14, 5))
ax.plot(curvature_series.index, curvature_series.values, 'g-', linewidth=1.5)
ax.axhline(curvature_series.mean(), color='gray', linestyle='--', label='Mean')

# Mark crises
for name, date in [('Lehman', '2008-09-15'), ('COVID', '2020-03-11')]:
    ax.axvline(pd.Timestamp(date), color='red', alpha=0.7, label=name if name=='Lehman' else '')

ax.set_ylabel('Network Curvature (clustering proxy)')
ax.set_title('Ricci Curvature of Market Network')
ax.legend()
plt.show()

# Before vs after crisis
lehman = pd.Timestamp('2008-09-15')
pre_lehman = curvature_series[(curvature_series.index < lehman) & (curvature_series.index > lehman - pd.Timedelta(days=180))].mean()
post_lehman = curvature_series[(curvature_series.index > lehman) & (curvature_series.index < lehman + pd.Timedelta(days=180))].mean()
print(f'\nLehman: pre={pre_lehman:.3f}, post={post_lehman:.3f}, change={post_lehman-pre_lehman:.3f}')

---
# PART 5: TOPOLOGICAL PERSISTENCE (TDA)

In [None]:
# === 5A: Persistent homology of correlation network ===
print('=== Topological Data Analysis ===')

def compute_persistence_features(returns):
    """Compute TDA features from returns"""
    corr = returns.corr().values
    corr = np.clip(corr, -1, 1)
    dist = np.sqrt(2 * (1 - corr))
    np.fill_diagonal(dist, 0)
    
    try:
        rips = gudhi.RipsComplex(distance_matrix=dist, max_edge_length=2.5)
        st = rips.create_simplex_tree(max_dimension=2)
        st.compute_persistence()
        
        # H0 (connected components)
        h0 = st.persistence_intervals_in_dimension(0)
        h0_lifetime = np.sum(h0[:, 1] - h0[:, 0]) if len(h0) > 0 else 0
        
        # H1 (loops)
        h1 = st.persistence_intervals_in_dimension(1)
        if len(h1) > 0:
            lifetimes = h1[:, 1] - h1[:, 0]
            lifetimes = lifetimes[np.isfinite(lifetimes)]
            h1_lifetime = np.sum(lifetimes) if len(lifetimes) > 0 else 0
            n_loops = len(lifetimes)
        else:
            h1_lifetime = 0
            n_loops = 0
        
        return {'h0_lifetime': h0_lifetime, 'h1_lifetime': h1_lifetime, 'n_loops': n_loops}
    except:
        return {'h0_lifetime': 0, 'h1_lifetime': 0, 'n_loops': 0}

# Compute rolling TDA
tda_features = []
tda_dates = []
window = 60
step = 10

for i in tqdm(range(window, len(ret), step)):
    window_ret = ret.iloc[i-window:i]
    feat = compute_persistence_features(window_ret)
    tda_features.append(feat)
    tda_dates.append(ret.index[i])

tda_df = pd.DataFrame(tda_features, index=tda_dates)
print(f'TDA features computed: {tda_df.shape}')

In [None]:
# === 5B: TDA as early warning ===

fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)

axes[0].plot(tda_df.index, tda_df['h1_lifetime'], 'purple', alpha=0.7)
axes[0].set_ylabel('H1 Lifetime')
axes[0].set_title('Topological Complexity (Loops in Correlation Network)')

axes[1].plot(tda_df.index, tda_df['n_loops'], 'orange', alpha=0.7)
axes[1].set_ylabel('Number of Loops')

for name, date in [('Lehman', '2008-09-15'), ('COVID', '2020-03-11')]:
    axes[0].axvline(pd.Timestamp(date), color='red', alpha=0.5)
    axes[1].axvline(pd.Timestamp(date), color='red', alpha=0.5)

plt.tight_layout()
plt.show()

---
# PART 6: CRITICAL TRANSITIONS

In [None]:
# === 6A: Early warning signals ===
print('=== Critical Transition Signals ===')

def compute_ews(series, window=120):
    """Compute early warning signals"""
    # Autocorrelation at lag 1
    ac1 = series.rolling(window).apply(lambda x: x.autocorr(lag=1), raw=False)
    
    # Variance
    var = series.rolling(window).var()
    
    # Skewness
    skew = series.rolling(window).skew()
    
    return pd.DataFrame({'ac1': ac1, 'variance': var, 'skewness': skew})

ews = compute_ews(CF)
print(f'EWS computed: {ews.dropna().shape}')

In [None]:
# === 6B: Plot EWS vs crises ===

fig, axes = plt.subplots(3, 1, figsize=(14, 10), sharex=True)

axes[0].plot(ews.index, ews['ac1'], 'b-', alpha=0.7)
axes[0].set_ylabel('Autocorrelation(1)')
axes[0].set_title('Early Warning Signals for Crisis Factor')

axes[1].plot(ews.index, ews['variance'], 'g-', alpha=0.7)
axes[1].set_ylabel('Variance')

axes[2].plot(ews.index, ews['skewness'], 'r-', alpha=0.7)
axes[2].set_ylabel('Skewness')

# Mark crises
for ax in axes:
    for name, date in [('Lehman', '2008-09-15'), ('COVID', '2020-03-11')]:
        ax.axvline(pd.Timestamp(date), color='red', alpha=0.5)

plt.tight_layout()
plt.show()

# Check pre-crisis behavior
for name, date in [('Lehman', '2008-09-15'), ('COVID', '2020-03-11')]:
    try:
        d = pd.Timestamp(date)
        pre_180 = ews['ac1'].loc[(ews.index < d) & (ews.index > d - pd.Timedelta(days=180))].mean()
        pre_60 = ews['ac1'].loc[(ews.index < d) & (ews.index > d - pd.Timedelta(days=60))].mean()
        print(f'{name}: AC1 pre-180d={pre_180:.3f}, pre-60d={pre_60:.3f}, trend={pre_60-pre_180:+.3f}')
    except:
        pass

---
# PART 7: INTEGRATED FRAGILITY INDEX

In [None]:
# === 7A: Combine all signals into unified index ===
print('=== Integrated Fragility Index ===')

# Align all signals
common_idx = CF.index
for series in [sync_order, curvature_series, tda_df['h1_lifetime'], ews['ac1']]:
    common_idx = common_idx.intersection(series.dropna().index)

# Normalize each signal to [0,1]
def normalize(s):
    return (s - s.min()) / (s.max() - s.min() + 1e-8)

signals = pd.DataFrame({
    'cf': normalize(CF.loc[common_idx]),
    'sync': normalize(sync_order.loc[common_idx]),
    'curv_inv': 1 - normalize(curvature_series.loc[common_idx]),  # Invert: low curv = high fragility
    'tda': normalize(tda_df['h1_lifetime'].loc[common_idx]),
    'ac1': normalize(ews['ac1'].loc[common_idx]),
    'var': normalize(ews['variance'].loc[common_idx])
})

# Integrated index = weighted average
weights = {'cf': 0.25, 'sync': 0.20, 'curv_inv': 0.15, 'tda': 0.15, 'ac1': 0.15, 'var': 0.10}
integrated_fragility = sum(signals[k] * v for k, v in weights.items())

print(f'Integrated Fragility computed: {len(integrated_fragility)} samples')

In [None]:
# === 7B: Plot integrated index ===

fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)

# Integrated fragility
axes[0].fill_between(integrated_fragility.index, integrated_fragility.values, alpha=0.3, color='red')
axes[0].plot(integrated_fragility.index, integrated_fragility.values, 'r-', linewidth=1)
axes[0].axhline(integrated_fragility.quantile(0.8), color='orange', linestyle='--', label='Warning (80%)')
axes[0].axhline(integrated_fragility.quantile(0.95), color='darkred', linestyle='--', label='Critical (95%)')
axes[0].set_ylabel('Integrated Fragility')
axes[0].set_title('Great Caria Moonshot: Integrated Systemic Fragility Index')
axes[0].legend()

# S&P 500 for reference
sp500 = df['USA_index'].loc[integrated_fragility.index].dropna()
axes[1].plot(sp500.index, sp500.values, 'b-', alpha=0.7)
axes[1].set_ylabel('S&P 500')
axes[1].set_yscale('log')

# Mark crises
for ax in axes:
    for name, date in [('Lehman', '2008-09-15'), ('COVID', '2020-03-11')]:
        ax.axvline(pd.Timestamp(date), color='red', alpha=0.5)

plt.tight_layout()
plt.show()

In [None]:
# === 7C: Predictive power of integrated index ===
print('\n=== Integrated Index Prediction Test ===')

# Target: will market drawdown > 5% in next 20 days?
sp500_ret = df['USA_index'].pct_change()
sp500_20d = sp500_ret.rolling(20).sum().shift(-20)  # Forward 20d return
crisis_target = (sp500_20d < -0.05).astype(int).loc[integrated_fragility.index].dropna()

# Features
X = signals.loc[crisis_target.index]
y = crisis_target

# Purged split
PURGE = 30
n = len(X)
train_end = int(n * 0.7)
test_start = train_end + PURGE

X_train, y_train = X.iloc[:train_end].values, y.iloc[:train_end].values
X_test, y_test = X.iloc[test_start:].values, y.iloc[test_start:].values

# Model
lr = LogisticRegression(max_iter=1000).fit(X_train, y_train)
y_pred = lr.predict(X_test)
acc = accuracy_score(y_test, y_pred)

# Baseline (random)
y_shuf = np.random.permutation(y_train)
lr_shuf = LogisticRegression(max_iter=1000).fit(X_train, y_shuf)
acc_shuf = accuracy_score(y_test, lr_shuf.predict(X_test))

print(f'Crisis prediction accuracy: {acc:.1%}')
print(f'Shuffle baseline: {acc_shuf:.1%}')
print(f'Lift: {(acc - acc_shuf)*100:.1f}pp')

In [None]:

# === 8A: Meta-Fragility Composite (New) ===
print('=== Meta-Fragility Composite (New) ===')

# Define crisis dates for lead calculations (Lehman and COVID events). Adjust or extend as needed.
crisis_dates = [pd.Timestamp('2008-09-15'), pd.Timestamp('2020-03-09')]

# Collect early-warning signals into a dictionary.
# Invert curvature (low curvature implies high fragility) and take absolute skewness.
# Include the Crisis Factor itself as a signal to capture its behaviour.
signals = {
    'CF': CF,
    'Sync': sync_order,
    'ACF1': ews['ac1'],
    'Variance': ews['variance'],
    'Skewness': ews['skewness'].abs(),
    'Curvature': 1 - curvature_series,
    'H1': tda_df['h1_lifetime'],
    'Loops': tda_df['num_loops']
}

# Align signals and drop any dates with missing values
signal_df = pd.concat(signals, axis=1).dropna()

# Normalize each series to zero mean and unit variance (add epsilon to avoid divide-by-zero)
signal_norm = (signal_df - signal_df.mean()) / (signal_df.std() + 1e-8)

# Compute average lead times: for each signal and each crisis, find the index of maximum absolute deviation
# within a 60 day window preceding the crisis. The lead is the number of days between the crisis and that date.
lead_times = {}
for name, series in signal_norm.items():
    leads = []
    for cdate in crisis_dates:
        window = series.loc[:cdate].tail(60)
        if not window.empty:
            idx_max = window.abs().idxmax()
            lead = max((cdate - idx_max).days, 1)  # ensure positive
            leads.append(lead)
    lead_times[name] = np.mean(leads) if leads else 1

# Convert lead times to weights by taking their inverse and normalizing
weights = {k: 1.0/v for k, v in lead_times.items()}
total_w = sum(weights.values())
weights = {k: w/total_w for k, w in weights.items()}
print('Signal weights (inverse mean lead time):')
print(weights)

# Construct meta-fragility composite as weighted sum of normalized signals
meta_fragility_new = pd.Series(0.0, index=signal_norm.index)
for k, w in weights.items():
    meta_fragility_new += w * signal_norm[k]

# Smooth the series with a 5-day centered rolling average to reduce noise
meta_fragility_new = meta_fragility_new.rolling(5, center=True, min_periods=1).mean()

# Plot new meta fragility vs original integrated fragility
fig, ax = plt.subplots(figsize=(14, 6))
ax.plot(meta_fragility_new.index, meta_fragility_new, label='Meta‑Fragility (New)', linewidth=2)
ax.plot(integrated.index, integrated, label='Integrated Fragility (Original)', alpha=0.5)
for cdate in crisis_dates:
    ax.axvline(cdate, color='red', linestyle='--', alpha=0.5)
ax.set_ylabel('Composite Score')
ax.set_title('Meta‑Fragility Composite vs Integrated Fragility Index')
ax.legend()

# Evaluate predictive power: logistic regression on 20-day forward drawdowns (>5%) in S&P 500
print('
=== Meta‑Fragility Prediction Test ===')
sp500_ret = df['USA_index'].pct_change()
target = (sp500_ret.rolling(20).sum().shift(-20) < -0.05).astype(int)
common_idx = meta_fragility_new.dropna().index.intersection(target.dropna().index)
X = meta_fragility_new.loc[common_idx].values.reshape(-1, 1)
y = target.loc[common_idx].values
model = LogisticRegression(max_iter=1000)
model.fit(X, y)
preds = model.predict(X)
acc = accuracy_score(y, preds)
print(f'Accuracy: {acc * 100:.1f}%')


In [None]:
# === FINAL SUMMARY ===
print('\n' + '='*60)
print('GREAT CARIA MOONSHOT - COMPLETE')
print('='*60)
print('\nComponents implemented:')
print('  ✓ Alternative data proxies (VIX, DXY, Oil vol, EM stress, dispersion)')
print('  ✓ Wavelet multi-scale decomposition')
print('  ✓ Kuramoto synchronization (phase coherence)')
print('  ✓ Network curvature (Ricci proxy)')
print('  ✓ Topological persistence (H0, H1 homology)')
print('  ✓ Critical transition signals (AC1, variance, skewness)')
print('  ✓ Integrated Fragility Index')
print('\nCrisis prediction with integrated signals:')
print(f'  Accuracy: {acc:.1%}, Lift: {(acc-acc_shuf)*100:.1f}pp')
print('='*60)