In [None]:
# Jupyter notebook with markdown cells explaining the analysis

# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Load cleaned data
df = pd.read_csv('../data/processed/cleaned_covid_data.csv', 
                 parse_dates=['date'])

# Calculate additional metrics
df['death_rate'] = df['total_deaths'] / df['total_cases']
df['cases_per_million'] = (df['total_cases'] / df['population']) * 1e6

# Time series plot
plt.figure(figsize=(12, 6))
for country in df['location'].unique():
    country_data = df[df['location'] == country]
    plt.plot(country_data['date'], country_data['total_cases'], label=country)

plt.title('Total COVID-19 Cases Over Time')
plt.xlabel('Date')
plt.ylabel('Total Cases')
plt.legend()
plt.show()

# Vaccination progress
plt.figure(figsize=(12, 6))
for country in df['location'].unique():
    country_data = df[df['location'] == country]
    vaccinated_pct = (country_data['people_vaccinated'] / 
                     country_data['population']) * 100
    plt.plot(country_data['date'], vaccinated_pct, label=country)

plt.title('COVID-19 Vaccination Progress')
plt.xlabel('Date')
plt.ylabel('% Population Vaccinated')
plt.legend()
plt.show()