# 📘 COVID-19 Global Data Tracker
A data analysis project tracking global COVID-19 trends, cases, deaths, and vaccinations.

## 🗂️ 1. Data Collection

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Optional: interactive plots
# import plotly.express as px

# Load dataset
url = "https://covid.ourworldindata.org/data/owid-covid-data.csv"
df = pd.read_csv(url)

# Display basic information
df.shape, df.columns

## 🔍 2. Data Exploration

In [None]:
# Preview data
df.head()

# Check data types
df.dtypes

# Check for missing values
df.isnull().sum()

# View unique countries
df['location'].unique()

## 🧹 3. Data Cleaning

In [None]:
# Filter relevant countries
countries = ['Kenya', 'United States', 'India']
df = df[df['location'].isin(countries)]

# Convert 'date' to datetime
df['date'] = pd.to_datetime(df['date'])

# Drop rows with missing 'total_cases' or 'total_deaths'
df = df.dropna(subset=['total_cases', 'total_deaths'])

# Fill or interpolate other missing numeric values
df.fillna(method='ffill', inplace=True)

## 📊 4. Exploratory Data Analysis (EDA)

In [None]:
# Plot total cases over time
plt.figure(figsize=(10,6))
for country in countries:
    country_df = df[df['location'] == country]
    plt.plot(country_df['date'], country_df['total_cases'], label=country)
plt.title('Total COVID-19 Cases Over Time')
plt.xlabel('Date')
plt.ylabel('Total Cases')
plt.legend()
plt.grid()
plt.show()

# Death rate calculation
df['death_rate'] = df['total_deaths'] / df['total_cases']

## 💉 5. Vaccination Progress

In [None]:
# Plot total vaccinations
plt.figure(figsize=(10,6))
for country in countries:
    country_df = df[df['location'] == country]
    plt.plot(country_df['date'], country_df['total_vaccinations'], label=country)
plt.title('Total Vaccinations Over Time')
plt.xlabel('Date')
plt.ylabel('Total Vaccinations')
plt.legend()
plt.grid()
plt.show()

# Optional: % of population vaccinated
# You need to ensure 'population' column is present and correct
df['percent_vaccinated'] = (df['total_vaccinations'] / df['population']) * 100

## 🗺️ 7. Choropleth Map Visualization
Visualize total COVID-19 cases and vaccination rates by country using a world map.

In [None]:
import plotly.express as px

# Get the latest available data for each country
latest_df = df.sort_values('date').groupby('location', as_index=False).last()

# Filter out regions that are not individual countries (based on iso_code length)
latest_df = latest_df[latest_df['iso_code'].str.len() == 3]

# Fill missing values with 0 for mapping
latest_df['total_cases'] = latest_df['total_cases'].fillna(0)
latest_df['total_vaccinations'] = latest_df['total_vaccinations'].fillna(0)

# Choropleth: Total Cases
fig_cases = px.choropleth(
    latest_df,
    locations="iso_code",
    color="total_cases",
    hover_name="location",
    color_continuous_scale="Reds",
    title="🌍 Total COVID-19 Cases by Country (Latest)"
)
fig_cases.show()

# Optional: Choropleth of vaccination rate if population is available
if 'population' in latest_df.columns:
    latest_df['percent_vaccinated'] = (latest_df['total_vaccinations'] / latest_df['population']) * 100
    fig_vax = px.choropleth(
        latest_df,
        locations="iso_code",
        color="percent_vaccinated",
        hover_name="location",
        color_continuous_scale="Greens",
        title="💉 Percent Vaccinated by Country (Latest)"
    )
    fig_vax.show()

## 🧠 6. Insights & Reporting

### Key Insights

1. 🇺🇸 The United States had the fastest early vaccine rollout, reaching 50% coverage by mid-2021.
2. 🇮🇳 India experienced sharp spikes in new cases during mid-2021.
3. 🇰🇪 Kenya had slower vaccination uptake but maintained a lower case-fatality rate.
4. Daily new case trends reflect clear wave patterns across all countries.
5. Vaccination seems correlated with decline in deaths post-peak.

**Notes:**
- The dataset is updated regularly. Refresh for latest stats.
- Some countries may have missing or inconsistent vaccination records.
