# HPV Agent-Based Model: Simulation Results Visualization

This notebook visualizes the outputs from three vaccination scenarios comparing:
1. **Baseline Uniform**: 60% coverage across all populations
2. **Race-Stratified Vaccination**: Different coverage rates by race (White 75%, Black 45%, Hispanic 55%)
3. **Race-Stratified with Screening**: Same vaccination + annual screening uptake by race (White 80%, Black 50%, Hispanic 60%)

We'll compare prevalence and cancer incidence trends to assess simulation performance.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from pathlib import Path
import json

# Set style for better-looking plots
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 10)

# Define paths
results_dir = Path("results")
figures_dir = results_dir / "figures"
csv_dir = results_dir

print("Results directory:", results_dir)
print("Files available:", list(csv_dir.glob("*.csv")))

## Load Summary Statistics

Load the summary CSV files for all three scenarios to compare prevalence and cancer incidence.

In [None]:
# Load summary statistics for each scenario
scenarios = {
    "Baseline Uniform": "summary_stats_baseline_uniform.csv",
    "Race-Stratified Vaccination": "summary_stats_race_stratified_vaccination.csv",
    "Race-Stratified with Screening": "summary_stats_race_stratified_with_screening.csv"
}

data = {}
for scenario_name, filename in scenarios.items():
    filepath = csv_dir / filename
    if filepath.exists():
        data[scenario_name] = pd.read_csv(filepath)
        print(f"\n✓ Loaded {scenario_name}:")
        print(f"  Shape: {data[scenario_name].shape}")
        print(f"  Columns: {list(data[scenario_name].columns)}")
        print(f"  First few rows:\n{data[scenario_name].head(3)}")
    else:
        print(f"✗ File not found: {filepath}")

# Check what columns are available
if data:
    first_df = list(data.values())[0]
    print(f"\nAvailable columns: {list(first_df.columns)}")

## Plot 1: Disease Prevalence Over Time

Compare HPV prevalence trajectories across all three scenarios. Lower prevalence indicates better disease control.

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18, 5), sharey=True)
colors = {'Baseline Uniform': '#1f77b4', 'Race-Stratified Vaccination': '#ff7f0e', 'Race-Stratified with Screening': '#2ca02c'}

for idx, (scenario_name, df) in enumerate(data.items()):
    ax = axes[idx]

    # Check which column contains prevalence data
    prevalence_col = None
    for col in ['prevalence', 'Prevalence', 'infected', 'Infected']:
        if col in df.columns:
            prevalence_col = col
            break

    if prevalence_col:
        ax.plot(df.index, df[prevalence_col], linewidth=2.5, color=colors[scenario_name], label='Prevalence')
        ax.set_title(scenario_name, fontsize=12, fontweight='bold')
        ax.set_xlabel('Timestep (Months)', fontsize=10)
        if idx == 0:
            ax.set_ylabel('Prevalence (% Population)', fontsize=10)
        ax.grid(True, alpha=0.3)
        ax.legend()
    else:
        ax.text(0.5, 0.5, f'No prevalence column found.\nAvailable: {list(df.columns)}',
                ha='center', va='center', transform=ax.transAxes)

plt.tight_layout()
plt.savefig('prevalence_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

print("✓ Prevalence comparison plot created")

## Plot 2: Cancer Incidence Over Time

Track cumulative cancer cases across scenarios. Screening and vaccination should reduce progression to cancer.

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18, 5), sharey=True)

for idx, (scenario_name, df) in enumerate(data.items()):
    ax = axes[idx]

    # Check which column contains cancer incidence data
    cancer_col = None
    for col in ['cancer_incidence', 'Cancer Incidence', 'cancerous', 'Cancerous']:
        if col in df.columns:
            cancer_col = col
            break

    if cancer_col:
        ax.plot(df.index, df[cancer_col], linewidth=2.5, color='#d62728', linestyle='--', label='Cancer Incidence')
        ax.set_title(scenario_name, fontsize=12, fontweight='bold')
        ax.set_xlabel('Timestep (Months)', fontsize=10)
        if idx == 0:
            ax.set_ylabel('Cancer Incidence (% Population)', fontsize=10)
        ax.grid(True, alpha=0.3)
        ax.legend()
    else:
        ax.text(0.5, 0.5, f'No cancer column found.\nAvailable: {list(df.columns)}',
                ha='center', va='center', transform=ax.transAxes)

plt.tight_layout()
plt.savefig('cancer_incidence_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

print("✓ Cancer incidence comparison plot created")

## Plot 3: Combined Comparison (Overlay)

Overlay all three scenarios to directly compare their impact on disease control.

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

colors = {'Baseline Uniform': '#1f77b4', 'Race-Stratified Vaccination': '#ff7f0e', 'Race-Stratified with Screening': '#2ca02c'}
linestyles = {'Baseline Uniform': '-', 'Race-Stratified Vaccination': '--', 'Race-Stratified with Screening': '-.'}

# Find prevalence and cancer columns
prevalence_col = None
cancer_col = None
for col in ['prevalence', 'Prevalence', 'infected', 'Infected']:
    if col in list(data.values())[0].columns:
        prevalence_col = col
        break
for col in ['cancer_incidence', 'Cancer Incidence', 'cancerous', 'Cancerous']:
    if col in list(data.values())[0].columns:
        cancer_col = col
        break

# Plot 1: Prevalence overlay
if prevalence_col:
    for scenario_name, df in data.items():
        ax1.plot(df.index, df[prevalence_col], linewidth=2.5,
                color=colors[scenario_name], linestyle=linestyles[scenario_name],
                label=scenario_name, alpha=0.8)
    ax1.set_title('HPV Prevalence Comparison', fontsize=13, fontweight='bold')
    ax1.set_xlabel('Time (Months)', fontsize=11)
    ax1.set_ylabel('Prevalence (% Population)', fontsize=11)
    ax1.legend(fontsize=10, loc='best')
    ax1.grid(True, alpha=0.3)

# Plot 2: Cancer incidence overlay
if cancer_col:
    for scenario_name, df in data.items():
        ax2.plot(df.index, df[cancer_col], linewidth=2.5,
                color=colors[scenario_name], linestyle=linestyles[scenario_name],
                label=scenario_name, alpha=0.8)
    ax2.set_title('Cancer Incidence Comparison', fontsize=13, fontweight='bold')
    ax2.set_xlabel('Time (Months)', fontsize=11)
    ax2.set_ylabel('Cancer Incidence (% Population)', fontsize=11)
    ax2.legend(fontsize=10, loc='best')
    ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('combined_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

print("✓ Combined comparison plot created")

## Summary Statistics: Final Outcomes

Compare final prevalence and cancer incidence at the end of the simulation (month 120).

In [None]:
summary_table = []

for scenario_name, df in data.items():
    # Get final values (last row)
    final_row = df.iloc[-1]

    # Find columns dynamically
    prev_col = None
    cancer_col = None
    for col in ['prevalence', 'Prevalence', 'infected', 'Infected']:
        if col in df.columns:
            prev_col = col
            break
    for col in ['cancer_incidence', 'Cancer Incidence', 'cancerous', 'Cancerous']:
        if col in df.columns:
            cancer_col = col
            break

    summary_table.append({
        'Scenario': scenario_name,
        'Final Prevalence (%)': f"{final_row[prev_col]*100:.2f}" if prev_col else "N/A",
        'Final Cancer Incidence (%)': f"{final_row[cancer_col]*100:.2f}" if cancer_col else "N/A",
        'Peak Prevalence (%)': f"{(df[prev_col].max()*100):.2f}" if prev_col else "N/A",
        'Peak Cancer (%)': f"{(df[cancer_col].max()*100):.2f}" if cancer_col else "N/A"
    })

summary_df = pd.DataFrame(summary_table)
print("\n" + "="*90)
print("SIMULATION OUTCOMES SUMMARY (Month 120)")
print("="*90)
print(summary_df.to_string(index=False))
print("="*90)