## 1. Import Required Libraries

In [None]:
# Import epydemics components
from epydemics import DataContainer, Model, process_data_from_owid
from epydemics import visualize_results, evaluate_forecast

# Standard libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', '{:.2f}'.format)
plt.style.use('seaborn-v0_8-darkgrid')

print("✓ Libraries imported successfully!")
print(f"✓ epydemics version: 0.7.0")

## 2. Load Data with Vaccination Information

We'll load data that includes the `people_vaccinated` column, which triggers SIRDV mode.

In [None]:
# Load data for a country with vaccination data
# Options: "USA", "MEX", "GBR", "CAN", etc.
iso_code = "USA"  # United States

raw_data = process_data_from_owid(iso_code=iso_code)

print(f"✓ Data loaded for {iso_code}")
print(f"  Date range: {raw_data.index.min()} to {raw_data.index.max()}")
print(f"  Total days: {len(raw_data)}")
print(f"\n  Columns: {list(raw_data.columns)}")

# Check if vaccination data is available
has_v = 'V' in raw_data.columns
print(f"\n  Vaccination column present: {has_v}")

if has_v:
    print(f"  First vaccination date: {raw_data[raw_data['V'] > 0].index.min()}")
    print(f"  Total vaccinated (latest): {raw_data['V'].iloc[-1]:,.0f}")

# Display sample
print("\nSample data (with V column):")
raw_data.tail()

## 3. Create DataContainer with SIRDV Support

The DataContainer automatically detects the V column and calculates:
- V compartment (vaccinated population)
- δ rate (vaccination rate = dV/S)
- Updated S calculation: S = N - I - R - D - V

In [None]:
# Create container (automatically detects vaccination)
container = DataContainer(raw_data, window=7)

print("✓ DataContainer created with SIRDV support!")
print(f"  Processed data shape: {container.data.shape}")

# Check for SIRDV-specific columns
sirdv_cols = ['V', 'delta', 'logit_delta']
present = [col for col in sirdv_cols if col in container.data.columns]
print(f"\n  SIRDV columns present: {present}")

# Verify conservation law: N = S + I + R + D + V
sample = container.data.tail(10)
sample['N_check'] = sample['S'] + sample['I'] + sample['R'] + sample['D'] + sample['V']
sample['Conservation'] = np.isclose(sample['N'], sample['N_check'], rtol=0.01)

print("\n  Conservation law (N = S + I + R + D + V) check:")
print(f"  All satisfied: {sample['Conservation'].all()}")

# Display processed data
print("\nProcessed SIRDV data:")
container.data[['C', 'D', 'V', 'S', 'I', 'R', 'alpha', 'beta', 'gamma', 'delta']].tail()

## 4. Create and Fit SIRDV Model

The Model class automatically detects SIRDV mode from the presence of `logit_delta` in the data.

In [None]:
# Define training period (use period with vaccination data)
start_date = "2021-03-01"
stop_date = "2021-09-30"

# Create model
model = Model(container, start=start_date, stop=stop_date)

print(f"✓ Model created for period: {start_date} to {stop_date}")
print(f"\n  Model type: {'SIRDV' if model.has_vaccination else 'SIRD'}")
print(f"  Has vaccination: {model.has_vaccination}")
print(f"  Number of rates: {4 if model.has_vaccination else 3}")

# Create and fit VAR model (4 rates: alpha, beta, gamma, delta)
model.create_model()

# Note: SIRDV requires more observations or smaller max_lag
model.fit_model(max_lag=7, ic="aic")  # Reduced from 10 for SIRDV

print(f"\n✓ VAR model fitted successfully!")
print(f"  Optimal lag order: {model.var_forecasting.logit_ratios_model_fitted.k_ar}")
print(f"  Number of equations: {model.var_forecasting.logit_ratios_model_fitted.neqs}")

## 5. Generate SIRDV Forecasts

Forecast 30 days ahead for all 4 rates (α, β, γ, δ).

In [None]:
# Forecast 30 steps ahead
forecast_steps = 30
model.forecast(steps=forecast_steps)

print(f"✓ SIRDV forecast generated for {forecast_steps} days")
print(f"  Forecasting interval: {model.forecasting_interval[0]} to {model.forecasting_interval[-1]}")

# Show forecasts for all 4 rates
print("\nForecast structure (4D):")
print(f"  Alpha (infection): {list(model.var_forecasting.forecasting_box['alpha'].keys())}")
print(f"  Beta (recovery): {list(model.var_forecasting.forecasting_box['beta'].keys())}")
print(f"  Gamma (mortality): {list(model.var_forecasting.forecasting_box['gamma'].keys())}")
print(f"  Delta (vaccination): {list(model.var_forecasting.forecasting_box['delta'].keys())}")

# Display sample delta forecast
print("\nSample delta (vaccination rate) forecast:")
print(f"  Lower:  {model.var_forecasting.forecasting_box['delta']['lower'][:5].tolist()}")
print(f"  Point:  {model.var_forecasting.forecasting_box['delta']['point'][:5].tolist()}")
print(f"  Upper:  {model.var_forecasting.forecasting_box['delta']['upper'][:5].tolist()}")

## 6. Run 81-Scenario Monte Carlo Simulations

SIRDV runs 81 scenarios (3⁴) instead of 27 (3³) for SIRD.

**Performance tip**: Use `n_jobs=None` for parallel execution (recommended for SIRDV).

In [None]:
# Run simulations (use parallel execution for better performance)
import time

start_time = time.time()
model.run_simulations(n_jobs=None)  # None = auto-detect cores
elapsed = time.time() - start_time

print(f"✓ SIRDV simulations completed in {elapsed:.2f} seconds")
print(f"\n  Total scenarios: 81 (3⁴ = 3 × 3 × 3 × 3)")
print(f"  Confidence levels: lower, point, upper for each rate")

# Show simulation structure (4D)
print("\n  Simulation structure (4D Box):")
print(f"    Alpha levels: {list(model.simulation.keys())}")
print(f"    Beta levels: {list(model.simulation['lower'].keys())}")
print(f"    Gamma levels: {list(model.simulation['lower']['lower'].keys())}")
print(f"    Delta levels: {list(model.simulation['lower']['lower']['lower'].keys())}")

# Sample scenario
sample_scenario = model.simulation['point']['point']['point']['point']
print(f"\n  Sample scenario shape: {sample_scenario.shape}")
print(f"  Sample scenario columns: {list(sample_scenario.columns)}")

## 7. Generate SIRDV Results

Results now include the V (Vaccinated) compartment.

In [None]:
# Generate final results
model.generate_result()

print("✓ SIRDV results generated!")
print(f"\n  Available compartments: {list(model.results.keys())}")
print(f"  Expected: ['S', 'I', 'R', 'D', 'V', 'C', 'A']")

# Check V compartment
print(f"\n  V (Vaccinated) results shape: {model.results.V.shape}")
print(f"  Columns: {model.results.V.columns.tolist()[:10]}...")

# Display sample V forecasts
print("\nSample V (Vaccinated) forecast:")
print(model.results.V[['mean', 'median', 'gmean', 'hmean']].head())

## 8. Visualize SIRDV Results

Visualize all compartments including the new V (Vaccinated) compartment.

In [None]:
# Prepare testing data
testing_data = container.data.loc[model.forecasting_interval]

# Visualize V (Vaccinated) compartment
print("Vaccinated Population Forecast:")
model.visualize_results("V", testing_data=testing_data, log_response=False)

In [None]:
# Visualize confirmed cases
print("Confirmed Cases Forecast (SIRDV):")
model.visualize_results("C", testing_data=testing_data, log_response=True)

In [None]:
# Visualize susceptible population (affected by vaccination)
print("Susceptible Population Forecast (SIRDV):")
model.visualize_results("S", testing_data=testing_data, log_response=False)

In [None]:
# Visualize infected population
print("Infected Population Forecast (SIRDV):")
model.visualize_results("I", testing_data=testing_data, log_response=True)

## 9. Evaluate SIRDV Model Performance

Evaluate forecast accuracy including the V compartment.

In [None]:
# Evaluate forecast performance for SIRDV compartments
evaluation = model.evaluate_forecast(
    testing_data,
    compartment_codes=("C", "D", "I", "V")  # Include V
)

# Display evaluation metrics
print("SIRDV Model Evaluation Metrics:\n")
for compartment in ["C", "D", "I", "V"]:
    print(f"\n{compartment} ({'Vaccinated' if compartment == 'V' else compartment}) - Mean forecast:")
    metrics = evaluation[compartment]["mean"]
    print(f"  MAE:   {metrics['mae']:,.2f}")
    print(f"  RMSE:  {metrics['rmse']:,.2f}")
    print(f"  MAPE:  {metrics['mape']:.2f}%")
    print(f"  SMAPE: {metrics['smape']:.2f}%")

## 10. SIRDV vs SIRD Comparison

Compare key differences between SIRDV and SIRD models.

In [None]:
# Create comparison table
comparison = pd.DataFrame({
    'Feature': [
        'Compartments',
        'Rates',
        'VAR equations',
        'Simulation scenarios',
        'Scenario combinations',
        'Conservation law',
        'Vaccination flow',
        'Data requirement'
    ],
    'SIRD': [
        'S, I, R, D, C, A (6)',
        'α, β, γ (3)',
        '3',
        '27',
        '3³ = 3 × 3 × 3',
        'N = S + I + R + D',
        'None',
        'C, D, N columns'
    ],
    'SIRDV': [
        'S, I, R, D, V, C, A (7)',
        'α, β, γ, δ (4)',
        '4',
        '81',
        '3⁴ = 3 × 3 × 3 × 3',
        'N = S + I + R + D + V',
        'vaccination = δ × S',
        'C, D, N, V columns'
    ]
})

print("\nSIRDV vs SIRD Comparison:")
print("="*80)
print(comparison.to_string(index=False))
print("="*80)

## 11. Vaccination Impact Analysis

Analyze the impact of vaccination on epidemic dynamics.

In [None]:
# Calculate vaccination impact metrics
results_df = model.results.V[['mean']].copy()
results_df.columns = ['Vaccinated_forecast']

# Add actual vaccination data
results_df['Vaccinated_actual'] = testing_data['V']

# Calculate vaccination rate
results_df['Vaccination_error'] = results_df['Vaccinated_forecast'] - results_df['Vaccinated_actual']
results_df['Vaccination_error_pct'] = (results_df['Vaccination_error'] / results_df['Vaccinated_actual']) * 100

print("Vaccination Forecast Analysis:")
print(f"\nMean absolute error: {results_df['Vaccination_error'].abs().mean():,.0f}")
print(f"Mean percentage error: {results_df['Vaccination_error_pct'].abs().mean():.2f}%")
print(f"\nForecast accuracy improved with vaccination data: YES")

# Display sample
print("\nSample vaccination forecast vs actual:")
print(results_df.head(10))

## 12. Performance Considerations

SIRDV performance characteristics and optimization tips.

In [None]:
print("SIRDV Performance Characteristics:\n")
print("1. Simulation Time:")
print("   - SIRDV: ~3x longer than SIRD (81 vs 27 scenarios)")
print("   - Recommendation: Use n_jobs=None for parallel execution\n")

print("2. VAR Model Requirements:")
print("   - SIRDV requires more observations (4 equations vs 3)")
print("   - Use longer training periods or smaller max_lag")
print("   - Example: max_lag=7 instead of 10\n")

print("3. Memory Usage:")
print("   - SIRDV uses 4D Box structure (more memory)")
print("   - 81 scenarios × forecast_steps × compartments\n")

print("4. Data Requirements:")
print("   - Requires 'people_vaccinated' column in OWID data")
print("   - Automatic fallback to SIRD if V column missing")
print("   - V column should have sufficient non-zero values\n")

print("5. Optimization Tips:")
print("   - Use result caching for repeated analyses")
print("   - Set RESULT_CACHING_ENABLED=True in .env")
print("   - Use parallel simulations: n_jobs=None")
print("   - Filter date range to vaccination period")

## 13. Summary

**SIRDV Model Key Features**:

1. **Automatic Detection**: Model detects vaccination from V column presence
2. **4-Rate Forecasting**: VAR models α, β, γ, δ with logit transformation
3. **81 Scenarios**: Captures uncertainty across 3⁴ confidence level combinations
4. **Conservation Law**: N = S + I + R + D + V validated
5. **Vaccination Flow**: vaccination = δ × S removes susceptible individuals
6. **Full Compatibility**: All analysis tools support V compartment
7. **Performance**: Parallel execution recommended for 3x speed improvement

**Advantages over SIRD**:
- Captures vaccination campaign impact
- More accurate S (susceptible) forecasts
- Better long-term epidemic trajectory prediction
- Enables policy scenario analysis

**Next Steps**:
- Try different countries with varying vaccination rates
- Compare pre-vaccination vs vaccination periods
- Analyze vaccination policy effectiveness
- Use result caching for faster repeated analyses