# USA Measles Validation (Incidence Mode)

This notebook validates the `incidence` mode of the SIRD model using historical USA measles data (1980-2020). We analyze the ability of the model to handle periods of low transmission and sporadic outbreaks.

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from epydemics import DataContainer, Model

%matplotlib inline

## 1. Load Data
We use the Our World in Data dataset for US measles cases.

In [None]:
import os
try:
    # Load local data if available (downloaded via script)
    # Handle running from root or notebook dir
    possible_paths = [
        'examples/data/owid/measles_us_cases.csv',
        'data/owid/measles_us_cases.csv',
        '../examples/data/owid/measles_us_cases.csv'
    ]
    csv_path = None
    for p in possible_paths:
        if os.path.exists(p):
            csv_path = p
            break
    
    if csv_path:
        df_cases = pd.read_csv(csv_path)
    else:
        raise FileNotFoundError("Could not find measles_us_cases.csv")

    df_pop = pd.DataFrame({'Year': range(1980, 2021), 'N': 300e6}) # Approximation for demo
except Exception as e:
    print(f"Data not found or error: {e}")
    print("Please run examples/data/fetch_measles_data.py first.")
    # Create dummy data for testing
    print("Creating dummy data for testing...")
    df_cases = pd.DataFrame({'Year': range(1980, 1985), 'cases': [100.0]*5, 'Entity': ['United States']*5})


## 2. Preprocessing
Prepare the data for the DataContainer. We need `I` (Incident Cases), `D` (Deaths - we'll assume 0 for this validation if missing), and `N` (Population).

In [None]:
if df_cases is not None:
    # Filter for United States just in case, though file is usually country specific or has Entity column
    if 'Entity' in df_cases.columns:
        df_usa = df_cases[df_cases['Entity'] == 'United States'].copy()
    else:
        df_usa = df_cases.copy()
        
    # Rename columns
    # OWID commonly uses 'Number of confirmed measles cases'
    col_map = {col: 'I' for col in df_usa.columns if 'cases' in col.lower()}
    df_usa = df_usa.rename(columns=col_map)
    
    # Merge with population
    df = pd.merge(df_usa, df_pop, on='Year', how='left')
    
    # Add dummy deaths if not present
    if 'D' not in df.columns:
        df['D'] = 0
        
    # Ensure sorted by date/year
    df = df.sort_values('Year')
    
    print(df.head())

## 3. Model Initialization (Incidence Mode)
Here we explicitly set `mode='incidence'` to tell the model that `I` represents incident cases per period, not cumulative cases.

In [None]:
# Initialize DataContainer with incidence mode
container = DataContainer(df, mode='incidence')

# Create Model
model = Model(container)
model.create_model()

## 4. Forecasting and Simulation
We fit a VAR model to the rates and simulate forward.

In [None]:
model.fit_model(max_lag=3)
model.forecast(steps=5)
model.run_simulations(n_jobs=1)
results = model.generate_result()

## 5. Visualization
Plot the historical and forecasted incident cases.

In [None]:
model.visualize_results(results)