# COVID-19 Global Trends Analysis Template

This notebook provides a scaffold for loading, cleaning, and visualizing global COVID-19 data. Fill in `selected_countries` and run each cell to generate the suggested charts.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

sns.set_style('whitegrid')
%matplotlib inline

In [None]:
# Load data
df = pd.read_csv('owid-covid-data.csv', parse_dates=['date'])
df.head()

In [None]:
# Define countries of interest
selected_countries = ['USA', 'India', 'Kenya']  # replace with your list

## 1. Time-Series Trends

### 1.1 Total Cases & Total Deaths Over Time

In [None]:
fig, ax1 = plt.subplots(figsize=(10, 6))
ax2 = ax1.twinx()
for country in selected_countries:
    data = df[df['location'] == country]
    ax1.plot(data['date'], data['total_cases'], label=f'{country} Cases')
    ax2.plot(data['date'], data['total_deaths'], linestyle='--', label=f'{country} Deaths')

ax1.set_xlabel('Date')
ax1.set_ylabel('Total Cases')
ax2.set_ylabel('Total Deaths')
plt.title('Total Cases & Deaths Over Time')
fig.legend(loc='upper left')
plt.show()

### 1.2 New Cases & New Deaths (7-day Smoothed)

In [None]:
plt.figure(figsize=(10, 6))
for country in selected_countries:
    data = df[df['location'] == country]
    plt.plot(data['date'], data['new_cases_smoothed'], label=f'{country} New Cases (7-day MA)')
    plt.plot(data['date'], data['new_deaths_smoothed'], linestyle='--', label=f'{country} New Deaths (7-day MA)')

plt.xlabel('Date')
plt.ylabel('Count (7-day MA)')
plt.title('Smoothed New Cases & Deaths')
plt.legend()
plt.show()

## 2. Per-Capita & Normalized Comparisons

### 2.1 Total Cases / Deaths per Million (Latest)

In [None]:
latest = df[df['date'] == df['date'].max()]
fig, ax = plt.subplots(figsize=(10, 6))
x = np.arange(len(selected_countries))
cases = latest[latest['location'].isin(selected_countries)]['total_cases_per_million'].values
deaths = latest[latest['location'].isin(selected_countries)]['total_deaths_per_million'].values

width = 0.35
ax.bar(x - width/2, cases, width, label='Cases per Million')
ax.bar(x + width/2, deaths, width, label='Deaths per Million')
ax.set_xticks(x)
ax.set_xticklabels(selected_countries)
ax.set_ylabel('Per Million')
plt.title('Total Cases & Deaths per Million')
plt.legend()
plt.show()

### 2.2 Vaccination Progress (% Population)

In [None]:
plt.figure(figsize=(10, 6))
for country in selected_countries:
    data = df[df['location'] == country]
    plt.plot(data['date'], data['people_vaccinated_per_hundred'], label=f'{country} Partially Vaccinated')
    plt.plot(data['date'], data['people_fully_vaccinated_per_hundred'], linestyle='--', label=f'{country} Fully Vaccinated')

plt.xlabel('Date')
plt.ylabel('% of Population')
plt.title('Vaccination Progress')
plt.legend()
plt.show()

## 3. Geospatial Visualizations

### 3.1 World Choropleth: Total Cases per Million (Latest)

In [None]:
latest = df[df['date'] == df['date'].max()]
fig = px.choropleth(
    latest,
    locations='iso_code',
    color='total_cases_per_million',
    hover_name='location',
    title='Total Cases per Million (Latest)'
)
fig.show()

### 3.2 Stringency Index & Case Trajectory (Optional Bivariate Map)

In [None]:
# For an advanced bivariate map, consider merging stringency and case quartiles,
# then use px.choropleth with a custom color scale.
pass

## 4. Multivariate & Correlation Analysis

### 4.1 GDP per Capita vs. Vaccination Rate

In [None]:
latest = df[df['date'] == df['date'].max()]
plt.figure(figsize=(8, 6))
plt.scatter(
    latest['gdp_per_capita'],
    latest['people_vaccinated_per_hundred'],
    s=latest['population'] / 1e6,
    alpha=0.7
)
plt.xscale('log')
plt.xlabel('GDP per Capita (log)')
plt.ylabel('People Vaccinated per Hundred')
plt.title('GDP vs. Vaccination Rate')
plt.show()

### 4.2 Correlation Matrix of Key Metrics

In [None]:
corr_cols = [
    'total_cases_per_million', 'total_deaths_per_million',
    'stringency_index', 'median_age',
    'hospital_beds_per_thousand', 'human_development_index'
]
corr = df[corr_cols].corr()
plt.figure(figsize=(8, 6))
mask = np.triu(np.ones_like(corr, dtype=bool))
sns.heatmap(corr, annot=True, mask=mask)
plt.title('Correlation Matrix')
plt.show()

## 5. Healthcare Capacity & Outcomes

### 5.1 ICU & Hospital Patients per Million Over Time

In [None]:
plt.figure(figsize=(10, 6))
for country in selected_countries:
    data = df[df['location'] == country]
    plt.plot(data['date'], data['icu_patients_per_million'], label=f'{country} ICU Patients')
    plt.plot(data['date'], data['hosp_patients_per_million'], linestyle='--', label=f'{country} Hospital Patients')

plt.xlabel('Date')
plt.ylabel('Patients per Million')
plt.title('Healthcare Load Over Time')
plt.legend()
plt.show()

### 5.2 Weekly ICU Admissions per Million

In [None]:
plt.figure(figsize=(10, 6))
for country in selected_countries:
    data = df[df['location'] == country]
    plt.bar(data['date'], data['weekly_icu_admissions_per_million'], alpha=0.5)
plt.xlabel('Date')
plt.ylabel('Weekly ICU Admissions per Million')
plt.title('Weekly ICU Admissions')
plt.show()

## 6. Testing & Positivity Analysis

### 6.1 New Tests per Thousand vs. Positive Rate

In [None]:
fig, ax1 = plt.subplots(figsize=(10, 6))
ax2 = ax1.twinx()
for country in selected_countries:
    data = df[df['location'] == country]
    ax1.plot(data['date'], data['new_tests_per_thousand'], label=f'{country} Tests/Thousand')
    ax2.plot(data['date'], data['positive_rate'], linestyle='--', label=f'{country} Positive Rate')

ax1.set_xlabel('Date')
ax1.set_ylabel('Tests per Thousand')
ax2.set_ylabel('Positive Rate')
plt.title('Testing Volume & Positivity Rate')
fig.legend(loc='upper left')
plt.show()

## 7. (Optional) Interactive Dashboard

Use Streamlit or Plotly Dash to make an interactive app where users can select countries and timeframes.