In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [None]:
 df = pd.read_csv("covid_19_clean_complete.csv")

In [None]:
df.columns

In [None]:
df.head()

In [None]:
df.isnull().sum()

In [None]:
df = df.dropna(subset=['Date'])


In [None]:
df['Date'] = pd.to_datetime(df['Date'])

# Filter latest date data
latest_date = df['Date'].max()
latest_df = df[df['Date'] == latest_date]


In [None]:
countries = ['Kenya', 'India', 'USA']

In [None]:
# Create an independent copy
filtered_df = df[df['Country/Region'].isin(countries)].copy()

# Now safely add the new column
filtered_df['Death Rate (%)'] = (filtered_df['Deaths'] / filtered_df['Confirmed'].replace(0, 1)) * 100


In [None]:
# Group by Date and Country and sum Confirmed cases
cases_over_time = filtered_df.groupby(['Date', 'Country/Region'])['Confirmed'].sum().reset_index()


In [None]:
plt.figure(figsize=(12,6))
sns.lineplot(data=cases_over_time, x='Date', y='Confirmed', hue='Country/Region')
plt.title('Total COVID-19 Cases Over Time')
plt.ylabel('Confirmed Cases')
plt.xlabel('Date')
plt.xticks(rotation=45)
plt.legend(title='Country')
plt.grid(True)
plt.tight_layout()
plt.show()  # <--- this is important!


In [None]:
# Add a Death Rate column
filtered_df['Death Rate (%)'] = (filtered_df['Deaths'] / filtered_df['Confirmed'].replace(0, 1)) * 100

# Average death rate by country
death_rate_avg = filtered_df.groupby('Country/Region')['Death Rate (%)'].mean().reset_index()

# Bar chart
plt.figure(figsize=(8,5))
sns.barplot(data=death_rate_avg, x='Country/Region', y='Death Rate (%)')
plt.title('Average COVID-19 Death Rate by Country (%)')
plt.ylabel('Death Rate (%)')
plt.xlabel('Country')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Correlation heatmap for numeric columns
plt.figure(figsize=(8,6))
sns.heatmap(filtered_df[['Confirmed', 'Deaths', 'Recovered', 'Active']].corr(), annot=True, cmap='YlOrRd')
plt.title('Correlation Heatmap')
plt.tight_layout()
plt.show()


In [None]:
latest_date = df['Date'].max()
latest_df = df[df['Date'] == latest_date]


In [None]:
map_df = latest_df.groupby('Country/Region')['Confirmed'].sum().reset_index()


In [None]:
fig = px.choropleth(
    map_df,
    locations='Country/Region',  # or 'iso_code' if you have it
    locationmode='country names',
    color='Confirmed',
    hover_name='Country/Region',
    color_continuous_scale='OrRd',
    title=f'Total COVID-19 Confirmed Cases as of {latest_date.date()}'
)

fig.show()


In [None]:
insight_1 = "1. The USA reported the highest number of confirmed cases as of 2024-05-01."
insight_2 = "2. India showed steady case growth with a comparatively lower death rate."
insight_3 = "3. Kenya maintained lower total cases but experienced noticeable spikes in July 2021."

# Print the insights
print(insight_1)
print(insight_2)
print(insight_3)
