# Rare Event Detection Experiment

This notebook demonstrates importance sampling for improving rare event detection.

## Objectives
1. Generate synthetic data with rare events
2. Compare sampling methods (uniform, stratified, importance, adaptive)
3. Evaluate detection sensitivity and MTTD
4. Show improvements vs baseline

In [None]:
import sys
sys.path.append('..')

from models.importance_sampling import ImportanceSampling
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Generate synthetic data with rare events
np.random.seed(42)
n_samples = 10000

df = pd.DataFrame({
    'trip_duration': np.random.uniform(10, 120, n_samples),
    'total_events': np.random.poisson(2, n_samples),
    'critical_events': np.random.poisson(0.1, n_samples),
    'avg_latency_ms': np.random.uniform(10, 200, n_samples),
    'safety_score': np.random.uniform(50, 100, n_samples)
})

# Rare events (1% rate)
rare_event_prob = 0.01
df['has_rare_event'] = np.random.binomial(1, rare_event_prob, n_samples)

# Add signal: rare events more likely with high latency
df.loc[df['avg_latency_ms'] > 150, 'has_rare_event'] = np.random.binomial(
    1, 0.1, df.loc[df['avg_latency_ms'] > 150].shape[0]
)

print(f"Data shape: {df.shape}")
print(f"Rare event rate: {df['has_rare_event'].mean():.2%}")

In [None]:
# Run experiment
is_sampler = ImportanceSampling(rare_event_rate=rare_event_prob)
print("Running importance sampling experiment...")
results = is_sampler.run_experiment(df)

print("\nExperiment Results:")
print(results['results'])

In [None]:
# Visualize results
results_df = results['results']

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Sensitivity
axes[0].bar(results_df['method'], results_df['sensitivity'])
axes[0].set_ylabel('Sensitivity (TPR)')
axes[0].set_title('Detection Sensitivity')
axes[0].set_ylim(0, 1)
axes[0].grid(True, alpha=0.3)

# MTTD
axes[1].bar(results_df['method'], results_df['mttd_hours'])
axes[1].set_ylabel('MTTD (hours)')
axes[1].set_title('Mean Time To Detection')
axes[1].grid(True, alpha=0.3)

# MTTD Improvement
axes[2].bar(results_df['method'], results_df['mttd_improvement_pct'])
axes[2].set_ylabel('MTTD Improvement (%)')
axes[2].set_title('MTTD Improvement vs Baseline')
axes[2].axhline(0, color='black', linestyle='--')
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()