# 03 — Rigorous Stylized Facts Validation

Run multiple independent simulations and statistically validate whether the model
reproduces the key stylized facts of financial returns:
1. Fat tails (excess kurtosis)
2. Volatility clustering (ACF of |returns|)
3. No linear return autocorrelation
4. Tail index consistent with empirical data

In [None]:
import sys
sys.path.insert(0, '..')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from market_abm import MarketModel, DEFAULT_PARAMS
from market_abm.analytics import (
    compute_return_statistics,
    compute_autocorrelation,
    hill_estimator,
    validate_stylized_facts,
)
from market_abm.visualization import plot_autocorrelation_panel, plot_return_distribution

%matplotlib inline
plt.rcParams['figure.dpi'] = 100

## Run ensemble of simulations

In [None]:
N_RUNS = 20
all_stats = []
all_facts = []

for i in range(N_RUNS):
    params = {**DEFAULT_PARAMS, 'steps': 3000, 'n_agents': 200, 'seed': 1000 + i}
    model = MarketModel(params)
    results = model.run()
    returns = np.array(model.market_maker.return_history)
    
    stats = compute_return_statistics(returns)
    stats['run'] = i
    stats['hill_index'] = hill_estimator(returns)
    all_stats.append(stats)
    
    facts = validate_stylized_facts(returns)
    row = {'run': i}
    for fact_name, info in facts.items():
        row[fact_name] = info['passed']
    all_facts.append(row)

df_stats = pd.DataFrame(all_stats)
df_facts = pd.DataFrame(all_facts)
print(f'Completed {N_RUNS} simulations.')

## Summary statistics across runs

In [None]:
display_cols = ['kurtosis', 'skewness', 'std', 'jb_pvalue', 'hill_index']
df_stats[display_cols].describe().round(4)

## Stylized fact pass rates

In [None]:
fact_cols = [c for c in df_facts.columns if c != 'run']
pass_rates = df_facts[fact_cols].mean()

print('Pass rate across runs:')
print('-' * 40)
for fact, rate in pass_rates.items():
    status = '✅' if rate >= 0.8 else '⚠️' if rate >= 0.5 else '❌'
    print(f'{status} {fact}: {rate:.0%} ({int(rate * N_RUNS)}/{N_RUNS})')

## Distribution of kurtosis and tail index

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(14, 4))

axes[0].hist(df_stats['kurtosis'], bins=10, edgecolor='black', alpha=0.7)
axes[0].axvline(0, color='red', linestyle='--', label='Normal (κ=0)')
axes[0].set_title('Distribution of Excess Kurtosis')
axes[0].set_xlabel('Excess Kurtosis')
axes[0].legend()

axes[1].hist(df_stats['hill_index'], bins=10, edgecolor='black', alpha=0.7)
axes[1].axvspan(2, 6, alpha=0.15, color='green', label='Empirical range [2,6]')
axes[1].set_title('Distribution of Hill Tail Index')
axes[1].set_xlabel('Tail Index (α)')
axes[1].legend()

axes[2].hist(df_stats['jb_pvalue'], bins=10, edgecolor='black', alpha=0.7)
axes[2].axvline(0.05, color='red', linestyle='--', label='p=0.05')
axes[2].set_title('JB Test p-values')
axes[2].set_xlabel('p-value')
axes[2].legend()

plt.tight_layout()
fig.savefig('../figures/stylized_facts_ensemble.png', dpi=150, bbox_inches='tight')
plt.show()

## Detailed ACF analysis (single representative run)

In [None]:
# Re-run one representative simulation for detailed plots
model = MarketModel({**DEFAULT_PARAMS, 'steps': 5000, 'n_agents': 200, 'seed': 42})
results = model.run()
returns = np.array(model.market_maker.return_history)

fig, axes = plt.subplots(1, 3, figsize=(14, 4))
plot_autocorrelation_panel(returns, nlags=100, axes=axes)
plt.tight_layout()
fig.savefig('../figures/acf_detailed.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
fig, (ax_hist, ax_qq) = plt.subplots(1, 2, figsize=(12, 4))
plot_return_distribution(returns, ax_hist=ax_hist, ax_qq=ax_qq)
plt.tight_layout()
fig.savefig('../figures/return_distribution.png', dpi=150, bbox_inches='tight')
plt.show()