# 🦠 COVID-19 Global Data Tracker
A comprehensive analysis of global COVID-19 trends and vaccination progress.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Plotting and display settings
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('viridis')
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

print("COVID-19 Global Data Tracker")
print("============================")
print("This notebook analyzes global COVID-19 trends including cases, deaths, and vaccinations.")

## 1. Data Collection & Loading

In [None]:
url = "https://covid.ourworldindata.org/data/owid-covid-data.csv"
print(f"Downloading data from: {url}")

try:
    df = pd.read_csv(url)
    print("✅ Data loaded successfully!")
except Exception as e:
    print(f"Error: {e}")
    print("Attempting to load from local file...")
    try:
        df = pd.read_csv("owid-covid-data.csv")
        print("✅ Loaded from local file.")
    except:
        print("❌ Could not load data.")

## 2. Data Exploration

In [None]:
print(f"Dataset shape: {df.shape}")
print(f"Time period: {df['date'].min()} to {df['date'].max()}")
print(f"Number of locations: {df['location'].nunique()}")

df.head()

In [None]:
print("\nColumns:")
print(df.columns.tolist())

key_metrics = ['total_cases', 'new_cases', 'total_deaths', 'new_deaths',
               'total_vaccinations', 'people_vaccinated', 'people_fully_vaccinated']

df[key_metrics].describe()

In [None]:
missing_df = pd.DataFrame({
    'Missing Values': df[key_metrics].isnull().sum(),
    'Percentage': (df[key_metrics].isnull().sum() / len(df) * 100).round(2)
})
missing_df

## 3. Data Cleaning

In [None]:
df['date'] = pd.to_datetime(df['date'])

countries_of_interest = ['World', 'United States', 'India', 'Brazil', 'United Kingdom',
                         'South Africa', 'Kenya', 'Australia', 'China', 'Germany']

filtered_df = df[df['location'].isin(countries_of_interest)]

latest_data = df.groupby('location').last().reset_index()
latest_data = latest_data.sort_values('total_cases', ascending=False)

filtered_df['case_fatality_rate'] = (filtered_df['total_deaths'] / filtered_df['total_cases']) * 100
filtered_df['vaccination_rate'] = (filtered_df['people_fully_vaccinated'] / filtered_df['population']) * 100

latest_data[['location', 'total_cases', 'total_deaths']].head(10)