# 🦠 COVID-19 Data Analysis Project
---
This notebook follows a step-by-step guide to collect, clean, analyze, 
and visualize COVID-19 data.

In [None]:
# 1️⃣ Import Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

plt.style.use('seaborn')
sns.set_palette("Set2")

In [None]:
# 2️⃣ Load the Dataset
try:
    df = pd.read_csv("owid-covid-data.csv")
    print("✅ Data loaded successfully!")
except FileNotFoundError:
    print("❌ Dataset not found! Please place 'owid-covid-data.csv' in the working folder.")

df.head()

In [None]:
# Explore dataset
print("Columns:", df.columns.tolist())
print("\nDataset Info:")
print(df.info())
print("\nMissing values per column:")
print(df.isnull().sum().head(15))

In [None]:
# 3️⃣ Data Cleaning
countries = ["Kenya", "United States", "India"]

df = df[df['location'].isin(countries)]
df['date'] = pd.to_datetime(df['date'])
df = df.dropna(subset=['date', 'total_cases'])
df = df.fillna(0)

print("✅ Data cleaned successfully!")
df.head()

In [None]:
# 4️⃣ Exploratory Data Analysis (EDA)
plt.figure(figsize=(12,6))
for c in countries:
    country_data = df[df['location'] == c]
    plt.plot(country_data['date'], country_data['total_cases'], label=c)
plt.title("Total COVID-19 Cases Over Time")
plt.xlabel("Date")
plt.ylabel("Total Cases")
plt.legend()
plt.show()

In [None]:
# Total deaths over time
plt.figure(figsize=(12,6))
for c in countries:
    country_data = df[df['location'] == c]
    plt.plot(country_data['date'], country_data['total_deaths'], label=c)
plt.title("Total COVID-19 Deaths Over Time")
plt.xlabel("Date")
plt.ylabel("Total Deaths")
plt.legend()
plt.show()

In [None]:
# Death rate analysis
df['death_rate'] = df['total_deaths'] / df['total_cases']

plt.figure(figsize=(12,6))
for c in countries:
    country_data = df[df['location'] == c]
    plt.plot(country_data['date'], country_data['death_rate'], label=c)
plt.title("COVID-19 Death Rate Over Time")
plt.xlabel("Date")
plt.ylabel("Death Rate")
plt.legend()
plt.show()

In [None]:
# 5️⃣ Vaccination Analysis
plt.figure(figsize=(12,6))
for c in countries:
    country_data = df[df['location'] == c]
    plt.plot(country_data['date'], country_data['total_vaccinations'], label=c)
plt.title("Cumulative Vaccinations Over Time")
plt.xlabel("Date")
plt.ylabel("Total Vaccinations")
plt.legend()
plt.show()

In [None]:
# 6️⃣ Choropleth Map (Optional)
latest = df[df['date'] == df['date'].max()]

fig = px.choropleth(
    latest,
    locations="iso_code",
    color="total_cases",
    hover_name="location",
    color_continuous_scale="Reds",
    title="🌍 Global COVID-19 Total Cases (Latest)"
)
fig.show()

In [None]:
# 7️⃣ Insights & Reporting
insights = [
    "🇮🇳 India had the steepest vaccination growth in early 2021.",
    "🇰🇪 Kenya's death rate stayed consistently lower compared to USA.",
    "🇺🇸 The United States recorded the highest total cases among the selected countries."
]

print("🔎 Key Insights:")
for i, insight in enumerate(insights, start=1):
    print(f"{i}. {insight}")