# Survival Analysis for Safety Regression

This notebook demonstrates the survival model for predicting time-to-safety-regression.

## Objectives
1. Prepare survival data
2. Fit Weibull survival model
3. Generate predictions with credible intervals
4. Perform posterior predictive checks

In [None]:
import sys
sys.path.append('..')

from models.survival_model import SurvivalModel
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import arviz as az

# Generate synthetic survival data (or load from analytics tables)
np.random.seed(42)
n_vehicles = 50
n_observations = 500

vehicle_ids = [f"VH_{i:05d}" for i in range(n_vehicles)]
df = pd.DataFrame({
    'vehicle_id': np.random.choice(vehicle_ids, n_observations),
    'time_to_event_hours': np.random.weibull(1.5, n_observations) * 100,
    'regression_occurred': np.random.binomial(1, 0.7, n_observations)
})

print(f"Data shape: {df.shape}")
print(f"Event rate: {df['regression_occurred'].mean():.2%}")

In [None]:
# Prepare data
model = SurvivalModel(samples=1000, tune=500, chains=2)
data = model.prepare_data(df)

print(f"Number of vehicles: {data['n_vehicles']}")
print(f"Number of observations: {len(data['time'])}")

In [None]:
# Fit model
print("Fitting survival model...")
idata = model.fit(data, progressbar=True)
print("Model fitting complete!")

In [None]:
# Get diagnostics
diagnostics = model.get_diagnostics()
print("Model Diagnostics:")
print(diagnostics.head(10))

# Check convergence
max_rhat = diagnostics['r_hat'].max()
print(f"\nMax R-hat: {max_rhat:.4f}")
print(f"Converged: {max_rhat < 1.01}")

In [None]:
# Predict time to event
preds = model.predict_time_to_event()
print(f"Mean time to event: {preds['mean_time_to_event']:.2f} hours")
print(f"95% CI: [{preds['time_to_event_lower_ci']:.2f}, {preds['time_to_event_upper_ci']:.2f}] hours")