**Importing modules & Dataset**

In [2]:
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt 


In [3]:
dataset_url = 'https://raw.githubusercontent.com/datasets/covid-19/master/data/countries-aggregated.csv'
df = pd.read_csv(dataset_url)

In [None]:
df

Unnamed: 0,Date,Country,Confirmed,Recovered,Deaths
0,2020-01-22,Afghanistan,0,0,0
1,2020-01-23,Afghanistan,0,0,0
2,2020-01-24,Afghanistan,0,0,0
3,2020-01-25,Afghanistan,0,0,0
4,2020-01-26,Afghanistan,0,0,0
...,...,...,...,...,...
149089,2022-02-08,Zimbabwe,230603,0,5366
149090,2022-02-09,Zimbabwe,230740,0,5367
149091,2022-02-10,Zimbabwe,231040,0,5373
149092,2022-02-11,Zimbabwe,231214,0,5374


**Preprocessing**

In [4]:
df = df[df.Confirmed > 0]
df

Unnamed: 0,Date,Country,Confirmed,Recovered,Deaths
33,2020-02-24,Afghanistan,5,0,0
34,2020-02-25,Afghanistan,5,0,0
35,2020-02-26,Afghanistan,5,0,0
36,2020-02-27,Afghanistan,5,0,0
37,2020-02-28,Afghanistan,5,0,0
...,...,...,...,...,...
149287,2022-02-09,Zimbabwe,230740,0,5367
149288,2022-02-10,Zimbabwe,231040,0,5373
149289,2022-02-11,Zimbabwe,231214,0,5374
149290,2022-02-12,Zimbabwe,231299,0,5374


In [None]:
df[df.Country == 'India']

Unnamed: 0,Date,Country,Confirmed,Recovered,Deaths
60248,2020-01-30,India,1,0,0
60249,2020-01-31,India,1,0,0
60250,2020-02-01,India,1,0,0
60251,2020-02-02,India,2,0,0
60252,2020-02-03,India,3,0,0
...,...,...,...,...,...
60988,2022-02-08,India,42410976,0,505279
60989,2022-02-09,India,42478060,0,506520
60990,2022-02-10,India,42536137,0,507177
60991,2022-02-11,India,42586544,0,507981


**Global Spread of COVID-19**


In [None]:
fig = px.choropleth(df, locations = 'Country', locationmode = 'country names', color = 'Confirmed', animation_frame = 'Date')
fig.update_layout(title_text = 'Global Spread of COVID-19')
fig.show()

**Calculating change in cases per day for India**


In [None]:
df_India = df[df.Country == 'India']
df_India = df_India[['Date', 'Confirmed']]
df_India

Unnamed: 0,Date,Confirmed
60248,2020-01-30,1
60249,2020-01-31,1
60250,2020-02-01,1
60251,2020-02-02,2
60252,2020-02-03,3
...,...,...
60988,2022-02-08,42410976
60989,2022-02-09,42478060
60990,2022-02-10,42536137
60991,2022-02-11,42586544


In [None]:
#calculating the derivation of confirmed column
df_India['Infection Rate'] = df_India['Confirmed'].diff()

px.line(df_India, x = 'Date', y = 'Infection Rate')

In [None]:
#Calculating max infection Rate

df_India['Infection Rate'].max()

414188.0

In [None]:
#Highest change in cases was recorded on date

df_India[df_India['Infection Rate'] == 414188.0].Date

60710    2021-05-06
Name: Date, dtype: object

**Calculating max Infection rate for all countries**

In [None]:
df['Infection Rate'] = df['Confirmed'].diff()

In [None]:
countries = list(df.Country.unique())
max_Infection_rate = {}

for country in countries :
  MIR = df[df['Country'] == country].Confirmed.diff().max()
  max_Infection_rate[country] = MIR

max_Infection_rate 

In [6]:
#Creating a new Data Frame to store maximum infection rates

df_MIR = pd.DataFrame()
df_MIR['Country'] = max_Infection_rate.keys()
df_MIR['Max Infection Rate'] = max_Infection_rate.values()

In [None]:
#Comparing dates of maximum infection

date_max_Infection = {}

for country in countries :
  MIR = max_Infection_rate[country]
  df_new = df.loc[(df['Country'] == country) & (df['Infection Rate'] == MIR)]
  date_max = df_new['Date']
  date_max_Infection[country] = date_max

date_max_Infection

In [None]:
#Average Infection Rate globally
df_MIR['Max Infection Rate'].mean()

45475.51515151515

In [None]:
px.bar(df_MIR, x = 'Country', y = 'Max Infection Rate', title = 'Global Infection Rates', color = 'Country')

In [None]:
#Creating a filtered data frame of countries with above average infection rates 
df_MIR_filtered = df_MIR[df_MIR['Max Infection Rate'] >= df_MIR['Max Infection Rate'].mean()]

px.bar(df_MIR_filtered, x = 'Country', y = 'Max Infection Rate', title = 'Global Infection Rates', color = 'Country')

**How National Lockdown Impacted Covid19 transmission in India**

In [None]:
first_lockdown_start_date = '2020-03-24'
first_lockdown_a_month_later = '2020-04-24'

lockdown_start_date_2021 = '2021-04-05'
lockdown_end_date_2021 = '2021-06-05'

In [None]:
df_India.columns

In [None]:
#Plotting the infection rates curve before and after 2021 lockdown

fig = px.line(df_India, x = 'Date', y = 'Infection Rate', title = 'Before & After Lockdown')
fig.add_shape(
    dict(
        type = 'line',
        x0 = lockdown_start_date_2021, 
        y0 = 0,
        x1 = lockdown_start_date_2021, 
        y1 = df_India['Infection Rate'].max(),
        line = dict(color = 'red', width = 2)
    )
)
fig.add_shape(
    dict(
        type = 'line',
        x0 = lockdown_end_date_2021, 
        y0 = 0,
        x1 = lockdown_end_date_2021, 
        y1 = df_India['Infection Rate'].max(),
        line = dict(color = 'yellow', width = 2)
    )
)

**Death Rate vs Infection Rate**

In [None]:
df_India['Death Rate'] = df[df['Country'] == 'India'].Deaths.diff()
df_India

Unnamed: 0,Date,Confirmed,Infection Rate,Death Rate
60248,2020-01-30,1,,
60249,2020-01-31,1,0.0,0.0
60250,2020-02-01,1,0.0,0.0
60251,2020-02-02,2,1.0,0.0
60252,2020-02-03,3,1.0,0.0
...,...,...,...,...
60988,2022-02-08,42410976,71365.0,1217.0
60989,2022-02-09,42478060,67084.0,1241.0
60990,2022-02-10,42536137,58077.0,657.0
60991,2022-02-11,42586544,50407.0,804.0


In [None]:
px.line(df_India, x = 'Date', y = ['Infection Rate', 'Death Rate'])

In [None]:
#Scaling the death rate to create a better visual

df_India['Infection Rate'] = df_India['Infection Rate']/df_India['Infection Rate'].max()
df_India['Death Rate'] = df_India['Death Rate']/df_India['Death Rate'].max()
df_India

Unnamed: 0,Date,Confirmed,Infection Rate,Death Rate
60248,2020-01-30,1,,
60249,2020-01-31,1,0.000000,0.000000
60250,2020-02-01,1,0.000000,0.000000
60251,2020-02-02,2,0.000002,0.000000
60252,2020-02-03,3,0.000002,0.000000
...,...,...,...,...
60988,2022-02-08,42410976,0.172301,0.165039
60989,2022-02-09,42478060,0.161965,0.168294
60990,2022-02-10,42536137,0.140219,0.089097
60991,2022-02-11,42586544,0.121701,0.109032


In [None]:
px.line(df_India, x = 'Date', y = ['Infection Rate', 'Death Rate'])