## 1. Import Required Libraries

In [None]:
# Import epydemics components
from epydemics import DataContainer, Model, process_data_from_owid

# Import visualization and evaluation
from epydemics import visualize_results, evaluate_forecast

# Standard libraries
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

# Display settings
pd.set_option('display.max_columns', None)
plt.style.use('seaborn-v0_8-darkgrid')

print("Libraries imported successfully!")

## 2. Load Data

We'll use COVID-19 data from Our World in Data (OWID). The data includes confirmed cases, deaths, and population.

In [None]:
# Load global COVID-19 data
raw_data = process_data_from_owid(iso_code="OWID_WRL")

print(f"Data loaded: {len(raw_data)} days")
print(f"Date range: {raw_data.index.min()} to {raw_data.index.max()}")
print(f"\nColumns: {list(raw_data.columns)}")
print(f"\nFirst few rows:")
raw_data.head()

## 3. Create DataContainer

The `DataContainer` handles all data preprocessing:
- 7-day moving average smoothing
- SIRD compartment calculation (S, I, R, D)
- Rate calculation (α, β, γ)
- Logit transformation for VAR modeling

In [None]:
# Create container with 7-day smoothing window
container = DataContainer(raw_data, window=7)

print("DataContainer created successfully!")
print(f"\nProcessed data shape: {container.data.shape}")
print(f"\nAvailable columns:")
print(container.data.columns.tolist())

# Display sample of processed data
container.data.tail(10)

## 4. Create and Fit SIRD Model

We'll create a model for a specific time period and fit a VAR model to the logit-transformed rates.

In [None]:
# Define training period
start_date = "2020-03-01"
stop_date = "2020-12-31"

# Create model
model = Model(container, start=start_date, stop=stop_date)

print(f"Model created for period: {start_date} to {stop_date}")
print(f"Has vaccination: {model.has_vaccination}")

# Create and fit VAR model
model.create_model()
model.fit_model(max_lag=10, ic="aic")

print("\nModel fitted successfully!")
print(f"Optimal lag order: {model.var_forecasting.fitted_model.k_ar}")

## 5. Generate Forecasts

Forecast 30 days ahead with confidence intervals.

In [None]:
# Forecast 30 steps ahead
forecast_steps = 30
model.forecast(steps=forecast_steps)

print(f"Forecast generated for {forecast_steps} days")
print(f"\nForecasting interval: {model.forecasting_interval[0]} to {model.forecasting_interval[-1]}")

# Show sample forecast for infection rate (alpha)
print("\nSample alpha (infection rate) forecast:")
print(f"Lower bound: {model.forecasting_box.alpha.lower[:5]}")
print(f"Point forecast: {model.forecasting_box.alpha.point[:5]}")
print(f"Upper bound: {model.forecasting_box.alpha.upper[:5]}")

## 6. Run Monte Carlo Simulations

Run 27 simulation scenarios (3³ combinations of lower/point/upper for α, β, γ).

In [None]:
# Run simulations (use n_jobs=None for auto parallel execution)
model.run_simulations(n_jobs=1)  # Use 1 for sequential, None for parallel

print("Simulations completed!")
print(f"\nSimulation scenarios: 27 (3³)")
print(f"Available scenarios: {list(model.simulation.keys())}")

## 7. Generate Results

Aggregate simulation results with central tendency measures.

In [None]:
# Generate final results
model.generate_result()

print("Results generated!")
print(f"\nAvailable compartments: {list(model.results.keys())}")
print(f"\nConfirmed cases (C) results shape: {model.results.C.shape}")

# Show sample results
print("\nSample confirmed cases forecast:")
print(model.results.C[['mean', 'median', 'gmean', 'hmean']].head())

## 8. Visualize Results

Visualize forecasts for different compartments.

In [None]:
# Prepare testing data for comparison
testing_data = container.data.loc[model.forecasting_interval]

# Visualize confirmed cases
print("Confirmed Cases Forecast:")
model.visualize_results("C", testing_data=testing_data, log_response=True)

In [None]:
# Visualize deaths
print("Deaths Forecast:")
model.visualize_results("D", testing_data=testing_data, log_response=True)

In [None]:
# Visualize infected population
print("Infected Population Forecast:")
model.visualize_results("I", testing_data=testing_data, log_response=True)

## 9. Evaluate Model Performance

Calculate evaluation metrics (MAE, MSE, RMSE, MAPE, SMAPE).

In [None]:
# Evaluate forecast performance
evaluation = model.evaluate_forecast(
    testing_data,
    compartment_codes=("C", "D", "I")
)

# Display evaluation metrics
print("Model Evaluation Metrics:\n")
for compartment in ["C", "D", "I"]:
    print(f"\n{compartment} (Mean forecast):")
    metrics = evaluation[compartment]["mean"]
    print(f"  MAE:   {metrics['mae']:.2f}")
    print(f"  RMSE:  {metrics['rmse']:.2f}")
    print(f"  MAPE:  {metrics['mape']:.2f}%")
    print(f"  SMAPE: {metrics['smape']:.2f}%")

## 10. Summary

**Key Takeaways**:

1. **SIRD Model**: Models time-varying infection (α), recovery (β), and mortality (γ) rates
2. **VAR Forecasting**: Uses Vector Autoregression on logit-transformed rates
3. **Monte Carlo**: 27 scenarios capture uncertainty across confidence levels
4. **Evaluation**: Multiple metrics assess forecast accuracy
5. **Visualization**: Professional plots with central tendencies and actual data

**Next Steps**:
- Try different time periods or countries
- Experiment with different lag orders
- Use parallel simulations for faster execution
- Explore the SIRDV model with vaccination data