COVID-19 Global Data Analysis: Trends, Vaccinations, and Insights

In [None]:
# Importing libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
df = pd.read_csv('owid-covid-data.csv')

# Check the columns
print(df.columns)

# Preview the data
df.head()


In [None]:
# Keep only necessary columns
columns = ['location', 'date', 'total_cases', 'new_cases', 'total_deaths', 
           'new_deaths', 'total_vaccinations', 'people_vaccinated', 'population']

df = df[columns]

# Filter countries of interest
countries = ['Kenya', 'United States', 'India']
df = df[df['location'].isin(countries)]

# Convert date column to datetime
df['date'] = pd.to_datetime(df['date'])

# Handle missing values
df.fillna(method='ffill', inplace=True)
df.dropna(subset=['total_cases', 'total_deaths', 'date'], inplace=True)

df.head()


In [None]:
plt.figure(figsize=(12, 6))
for country in countries:
    temp = df[df['location'] == country]
    plt.plot(temp['date'], temp['total_cases'], label=country)
plt.title('Total COVID-19 Cases Over Time')
plt.xlabel('Date')
plt.ylabel('Total Cases')
plt.legend()
plt.show()


In [None]:
plt.figure(figsize=(12, 6))
for country in countries:
    temp = df[df['location'] == country]
    plt.plot(temp['date'], temp['total_deaths'], label=country)
plt.title('Total COVID-19 Deaths Over Time')
plt.xlabel('Date')
plt.ylabel('Total Deaths')
plt.legend()
plt.show()


In [None]:
plt.figure(figsize=(12, 6))
for country in countries:
    temp = df[df['location'] == country]
    plt.plot(temp['date'], temp['new_cases'], label=country)
plt.title('Daily New COVID-19 Cases')
plt.xlabel('Date')
plt.ylabel('New Cases')
plt.legend()
plt.show()


In [None]:
df['death_rate'] = df['total_deaths'] / df['total_cases']
latest = df.groupby('location').tail(1)
latest[['location', 'death_rate']]


In [None]:
plt.figure(figsize=(12, 6))
for country in countries:
    temp = df[df['location'] == country]
    plt.plot(temp['date'], temp['total_vaccinations'], label=country)
plt.title('Total Vaccinations Over Time')
plt.xlabel('Date')
plt.ylabel('Total Vaccinations')
plt.legend()
plt.show()


In [None]:
latest['vaccinated_percent'] = latest['people_vaccinated'] / latest['population'] * 100
latest[['location', 'vaccinated_percent']]


In [None]:
🧠 Key Insights:
The USA had the highest total cases and deaths throughout the pandemic.

India experienced sharp waves of cases, especially in mid-2021.

Kenya’s case and vaccination numbers were significantly lower but more stable.

The death rate was higher in Kenya compared to India, possibly due to fewer healthcare resources.

The USA led in vaccination rollout, with over 60% of the population vaccinated.