# 📘 COVID-19 Global Data Tracker
**Author:** Jussie

**Description:** Analysis and visualization of global COVID-19 data using Python.

In [None]:
# ----------------------------
# 1️⃣ Data Collection
# ----------------------------
# Dataset: https://covid.ourworldindata.org/data/owid-covid-data.csv

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# Set plot style
sns.set(style="darkgrid")

# Load the dataset
df = pd.read_csv("owid-covid-data.csv")

In [None]:
# ----------------------------
# 2️⃣ Data Loading & Exploration
# ----------------------------
print("\nColumns:", df.columns.tolist())
print("\nHead:\n", df.head())
print("\nMissing values:\n", df.isnull().sum())

In [None]:
# ----------------------------
# 3️⃣ Data Cleaning
# ----------------------------
countries = ['Kenya', 'United States', 'India']
df = df[df['location'].isin(countries)]

df = df.dropna(subset=['date', 'total_cases'])
df['date'] = pd.to_datetime(df['date'])

cols_to_interpolate = ['total_deaths', 'new_cases', 'new_deaths', 'total_vaccinations']
df[cols_to_interpolate] = df[cols_to_interpolate].interpolate()

In [None]:
# ----------------------------
# 4️⃣ Exploratory Data Analysis (EDA)
# ----------------------------
plt.figure(figsize=(10, 6))
for country in countries:
    country_df = df[df['location'] == country]
    plt.plot(country_df['date'], country_df['total_cases'], label=country)
plt.title('Total COVID-19 Cases Over Time')
plt.xlabel('Date')
plt.ylabel('Total Cases')
plt.legend()
plt.show()

plt.figure(figsize=(10, 6))
for country in countries:
    country_df = df[df['location'] == country]
    plt.plot(country_df['date'], country_df['total_deaths'], label=country)
plt.title('Total COVID-19 Deaths Over Time')
plt.xlabel('Date')
plt.ylabel('Total Deaths')
plt.legend()
plt.show()

df['death_rate'] = df['total_deaths'] / df['total_cases']

In [None]:
# ----------------------------
# 5️⃣ Visualizing Vaccination Progress
# ----------------------------
plt.figure(figsize=(10, 6))
for country in countries:
    country_df = df[df['location'] == country]
    plt.plot(country_df['date'], country_df['total_vaccinations'], label=country)
plt.title('Total Vaccinations Over Time')
plt.xlabel('Date')
plt.ylabel('Total Vaccinations')
plt.legend()
plt.show()

In [None]:
# ----------------------------
# 6️⃣ Choropleth Map (Optional)
# ----------------------------
latest_data = df[df['date'] == df['date'].max()]
latest_data = latest_data.drop_duplicates(subset=['location'])

fig = px.choropleth(latest_data,
                    locations="iso_code",
                    color="total_cases",
                    hover_name="location",
                    color_continuous_scale="Reds",
                    title="Global COVID-19 Cases (Latest Date)")
fig.show()

# 7️⃣ Insights & Reporting
1. India shows a steady growth in cases with a large vaccination effort.
2. The United States had an early rise in deaths but also led in vaccinations.
3. Kenya’s cases and deaths are comparatively low but show steady trends.
4. Death rate varies significantly between countries, possibly due to healthcare capacity and vaccination rate.
5. Choropleth map highlights global disparities in reported case numbers.