### COVID 19 INDIA DATA VISUALISATION AND ANALYSIS USING PLOTLY OFFLINE

Datasets available on Kaggle website, you can download it from __[here](https://www.kaggle.com/sudalairajkumar/covid19-in-india)__.

In [1]:
# importing necessary libraries
import pandas as pd
import numpy as np
import plotly.offline as pyo
import plotly.graph_objects as go

In [2]:
# reading into the datset
df = pd.read_csv('covid_19_india_27_10.csv')
df.head()

Unnamed: 0,Sno,Date,Time,State/UnionTerritory,ConfirmedIndianNational,ConfirmedForeignNational,Cured,Deaths,Confirmed
0,1,30/01/20,6:00 PM,Kerala,1,0,0,0,1
1,2,31/01/20,6:00 PM,Kerala,1,0,0,0,1
2,3,01/02/20,6:00 PM,Kerala,2,0,0,0,2
3,4,02/02/20,6:00 PM,Kerala,3,0,0,0,3
4,5,03/02/20,6:00 PM,Kerala,3,0,0,0,3


### BASIC TREND ANALYSIS

In [3]:
#Cleaning data and performing some basic datetime conversion. Also dataset messes the name of Telagana quite a
#few times tried cleaning and accounting for the same.

df.drop(['ConfirmedIndianNational', 'ConfirmedForeignNational'], axis = 1, inplace = True)
df.rename(columns = {'State/UnionTerritory':'state_UT'}, inplace = True)
df['Date'] = pd.to_datetime(df['Date'], dayfirst = True)
df['month'] = pd.to_datetime(df['Date'], format='%m').dt.month_name().str.slice(stop=3)
df['state_UT'] = df['state_UT'].str.replace('Telengana***', 'Telengana', regex = False)
df['state_UT'] = df['state_UT'].str.replace('Telangana***', 'Telengana', regex = False)
df['state_UT'] = df['state_UT'].str.replace('Telangana', 'Telengana', regex = False)
df.drop(['Sno'], axis = 1, inplace = True)
df.head()

Unnamed: 0,Date,Time,state_UT,Cured,Deaths,Confirmed,month
0,2020-01-30,6:00 PM,Kerala,0,0,1,Jan
1,2020-01-31,6:00 PM,Kerala,0,0,1,Jan
2,2020-02-01,6:00 PM,Kerala,0,0,2,Feb
3,2020-02-02,6:00 PM,Kerala,0,0,3,Feb
4,2020-02-03,6:00 PM,Kerala,0,0,3,Feb


In [4]:
#plotting the confirmed cases comparision between states
states = df['state_UT'].unique()

data_states = []
n = 0
for state in states:
    data = go.Scatter(x = df[df['state_UT'] == state].Date,
                  y = df[df['state_UT'] == state].Confirmed,
                  name = state)
    data_states.append(data)
    n +=1
    
layout = go.Layout(
    title = dict(text = "<b>Confirmed Cases Compariosn between states",
                 font = dict(family = 'arial', color = 'black', size = 20),
                 xref = 'paper',
                 x = 0.5
                ),
    plot_bgcolor='rgba(0,0,0,0)',
    xaxis = dict(rangeslider_visible = True,
                 title = dict(text = 'Dates')),
    yaxis = dict(fixedrange = False,
                title = dict(text = 'No of confirmed cases')),
    legend = dict(itemclick = "toggleothers",
                  itemdoubleclick = "toggle"
                 ),
    autosize = True
)

fig = go.Figure(data = data_states, layout = layout)

pyo.plot(fig, filename = 'Comparison_states.html')

'Comparison_states.html'

In [5]:
df1 = df
df1 = df1.groupby('Date').sum()
df1.head()

Unnamed: 0_level_0,Cured,Deaths,Confirmed
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-30,0,0,1
2020-01-31,0,0,1
2020-02-01,0,0,2
2020-02-02,0,0,3
2020-02-03,0,0,3


In [6]:
#determine active cases based on available cured, confirmed and death cases.
df1['active'] = df1['Confirmed']-df1['Cured']-df1['Deaths']
df1.head()

Unnamed: 0_level_0,Cured,Deaths,Confirmed,active
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-30,0,0,1,1
2020-01-31,0,0,1,1
2020-02-01,0,0,2,2
2020-02-02,0,0,3,3
2020-02-03,0,0,3,3


In [7]:
#plotting daywise covid cases across india
data_sets = []
series = list(df1.columns)

for col in series:
    data = go.Scatter(x = df1.index,
                  y = df1[col],
                  name = col,
                  fill='tozeroy')
    data_sets.append(data)

layout = go.Layout(
    title = dict(text = "<b>Comparision between different case statuses",
                 font = dict(family = 'arial', color = 'black', size = 20),
                 xref = 'paper',
                 x = 0.5
                ),
    plot_bgcolor='rgba(0,0,0,0)',
    xaxis = dict(rangeslider_visible = True,
                 title = dict(text = 'Dates')
                ),
    yaxis = dict(fixedrange = False,
                 title = dict(text = 'No of Cases')
                ),
    legend = dict(itemclick = "toggleothers",
                  itemdoubleclick = "toggle"
                 ),
    autosize = True
)

fig = go.Figure(data = data_sets, layout = layout)

pyo.plot(fig, filename = 'Comparison_cases.html')

'Comparison_cases.html'

In [8]:
# Identifying per day changes in the active, death and confirmed cases
df1['active_change'] = df1['active'].diff()
df1['confirmed_change'] = df1['Confirmed'].diff()
df1['deaths_change'] = df1['Deaths'].diff()
df1['cured_change'] = df1['Cured'].diff()
df1.head()

Unnamed: 0_level_0,Cured,Deaths,Confirmed,active,active_change,confirmed_change,deaths_change,cured_change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-01-30,0,0,1,1,,,,
2020-01-31,0,0,1,1,0.0,0.0,0.0,0.0
2020-02-01,0,0,2,2,1.0,1.0,0.0,0.0
2020-02-02,0,0,3,3,1.0,1.0,0.0,0.0
2020-02-03,0,0,3,3,0.0,0.0,0.0,0.0


In [9]:
changes = list(df1.columns)[4:]
changes

['active_change', 'confirmed_change', 'deaths_change', 'cured_change']

In [10]:
#plotting per day changes across the complete time period
data_change = []
changes = list(df1.columns)[4:]

for change in changes:
    data = go.Scatter(x = df1.index,
                  y = df1[change],
                  name = change,
                  fill='tozeroy')
    data_change.append(data)

layout = go.Layout(
    plot_bgcolor='white',
    autosize = True,
    xaxis = dict(rangeslider_visible = True,
                 title = dict(text = 'Dates')
                ),
    yaxis = dict(title = 'Per day cases', fixedrange = False),
    title = dict(text = '<b>Daily COVID cases', xref = 'paper', x = 0.5, xanchor = 'center',
                 font = dict(family = 'arial', size = 20, color = 'black')
                ),
    legend = dict(itemclick = "toggleothers",
                  itemdoubleclick = "toggle"
                 ),
)

fig = go.Figure(data = data_change, layout = layout)

pyo.plot(fig, filename = 'change_cases.html')

'change_cases.html'

In [11]:
#ploting moving average (3 Day) for all the cases change
data_change = []
changes = list(df1.columns)[4:]

for change in changes:
    data = go.Scatter(x = df1.index,
                  y = df1[change].rolling(3).mean().round(2),
                  name = change,
                  fill='tozeroy',
                  text = df1[change].rolling(3).mean().round(2),
                  hoverinfo = 'x+text+name')
    data_change.append(data)

layout = go.Layout(
    plot_bgcolor='white',
    autosize = True,
    xaxis = dict(rangeslider_visible = True,
                 title = dict(text = 'Dates')
                ),
    yaxis = dict(title = 'Per day cases', fixedrange = False),
    title = dict(text = '<b>Daily COVID cases (3 Day Moving Average)', xref = 'paper', x = 0.5, xanchor = 'center',
                 font = dict(family = 'arial', size = 20, color = 'black')
                ),
    legend = dict(itemclick = "toggleothers",
                  itemdoubleclick = "toggle"
                 ),
)

fig = go.Figure(data = data_change, layout = layout)

pyo.plot(fig, filename = 'change_cases_3ma.html')

'change_cases_3ma.html'

In [12]:
columns = list(df1.columns)
stacks = []
stacks.append(columns[0])
stacks.append(columns[1])
stacks.append(columns[3])
stacks

['Cured', 'Deaths', 'active']

In [13]:
#plotting break up of confirmed cases across the time period.
bar_change = []

for stack in stacks:
    data = go.Bar(x = df1.index,
                  y = df1[stack],
                  name = stack)
    bar_change.append(data)
    
data = go.Scatter(x = df1.index,
                  y = df1.Confirmed,
                  line = dict(dash = 'dashdot', color = 'black'),
                  name = 'Confirmed')
bar_change.append(data)

layout = go.Layout(
    plot_bgcolor='rgba(0,0,0,0)',
    autosize = True,
    xaxis = dict(rangeslider_visible = True,
                 title = dict(text = 'Dates')),
    yaxis = dict(title = 'Per day case', fixedrange = False),
    title = dict(text = '<b>Daily breakup of Confirmed COVID cases', xref = 'paper', x = 0.5, xanchor = 'center',
                 font = dict(family = 'arial', size = 20, color = 'black')),
    legend = dict(itemclick = "toggleothers",
                  itemdoubleclick = "toggle"
                 ),
    barmode = 'stack')
                 
fig = go.Figure(data = bar_change, layout = layout)

for i in range(0, df1.Confirmed.iloc[-1], 1000000):
    fig.add_shape(type = 'line',
                  yref = 'paper',
                  y0 = 0,
                  y1 = 1,
                  x0 = str(df1.loc[(df1.Confirmed) >= i].index.values[0]),
                  x1 = str(df1.loc[(df1.Confirmed) >= i].index.values[0]),
                  line = dict(width = 3, color = 'black',  dash = 'dash')
                 )
    fig.add_annotation(x = str(df1.loc[(df1.Confirmed) >= i].index.values[0]),
                        y = 0,
                        yref = 'y',
                        xref = 'x',
                        ax = -15,
                        ay = -100,
                        text = '<b>' + str(i) + ' confirmed cases, ' + str(df1.loc[(df1.Confirmed) >= i].index.values[0])[0:10],
                        valign = 'top',
                        bgcolor="white",
                        opacity = 0.8,
                        font = dict(family = 'Arial', size = 14, color = 'black'),
                        textangle=-90,
                      )

pyo.plot(fig, filename = 'Confirmed_breakup.html')

'Confirmed_breakup.html'

In [14]:
#plotting per day change in different types of cases on log scale
data_change = []
changes = list(df1.columns)[4:]

for change in changes:
    data = go.Scatter(x = df1.index,
                  y = np.log(df1[change]),
                  name = change,
                  fill='tozeroy',
                  text = df1[change],
                  hoverinfo = 'x + text + name')
    data_change.append(data)

layout = go.Layout(
    plot_bgcolor='white',
    autosize = True,
    xaxis = dict(rangeslider_visible = True,
                 title = dict(text = 'Dates')),
    yaxis = dict(title = 'Per day case', fixedrange = False),
    title = dict(text = '<b>Daily COVID cases (Log Scale)', xref = 'paper', x = 0.5, xanchor = 'center',
                 font = dict(family = 'arial', size = 20, color = 'black')),
    legend = dict(itemclick = "toggleothers",
                  itemdoubleclick = "toggle"
                 ),
)

fig = go.Figure(data = data_change, layout = layout)

pyo.plot(fig, filename = 'log_change_cases.html')


divide by zero encountered in log


invalid value encountered in log



'log_change_cases.html'

In [15]:
#plotting per day change (3 Day moving average) in different types of cases on log scale
data_change = []
changes = list(df1.columns)[4:]

for change in changes:
    data = go.Scatter(x = df1.index,
                  y = np.log(df1[change].rolling(3).mean()),
                  name = change,
                  fill='tozeroy',
                  text = df1[change].rolling(3).mean().round(2),
                  hoverinfo = 'x + text + name')
    data_change.append(data)

layout = go.Layout(
    plot_bgcolor='white',
    autosize = True,
    xaxis = dict(rangeslider_visible = True,
                 title = dict(text = 'Dates')),
    yaxis = dict(title = 'Per day case', fixedrange = False),
    title = dict(text = 'Daily COVID cases on log scale (3 Day Moving Average)', xref = 'paper', x = 0.5, xanchor = 'center',
                 font = dict(family = 'arial', size = 20, color = 'black')),
    legend = dict(itemclick = "toggleothers",
                  itemdoubleclick = "toggle"
                 ),
)

fig = go.Figure(data = data_change, layout = layout)

pyo.plot(fig, filename = 'log_change_cases_3ma.html')

'log_change_cases_3ma.html'

In [16]:
states = list(df.state_UT.unique())
states

['Kerala',
 'Telengana',
 'Delhi',
 'Rajasthan',
 'Uttar Pradesh',
 'Haryana',
 'Ladakh',
 'Tamil Nadu',
 'Karnataka',
 'Maharashtra',
 'Punjab',
 'Jammu and Kashmir',
 'Andhra Pradesh',
 'Uttarakhand',
 'Odisha',
 'Puducherry',
 'West Bengal',
 'Chhattisgarh',
 'Chandigarh',
 'Gujarat',
 'Himachal Pradesh',
 'Madhya Pradesh',
 'Bihar',
 'Manipur',
 'Mizoram',
 'Andaman and Nicobar Islands',
 'Goa',
 'Unassigned',
 'Assam',
 'Jharkhand',
 'Arunachal Pradesh',
 'Tripura',
 'Nagaland',
 'Meghalaya',
 'Dadar Nagar Haveli',
 'Cases being reassigned to states',
 'Sikkim',
 'Daman & Diu',
 'Dadra and Nagar Haveli and Daman and Diu']

In [17]:
#grouping the cases based on states and dates to create separate timelines for each state.
df_new = df.groupby(['state_UT', 'Date']).sum().diff().clip(0)
df_new.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Cured,Deaths,Confirmed
state_UT,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Andaman and Nicobar Islands,2020-03-26,,,
Andaman and Nicobar Islands,2020-03-27,0.0,0.0,0.0
Andaman and Nicobar Islands,2020-03-28,0.0,0.0,5.0
Andaman and Nicobar Islands,2020-03-29,0.0,0.0,3.0
Andaman and Nicobar Islands,2020-03-30,0.0,0.0,0.0


In [18]:
df_new['Active'] = df_new['Confirmed'] - df_new['Cured'] - df_new['Deaths']
df_new.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Cured,Deaths,Confirmed,Active
state_UT,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Andaman and Nicobar Islands,2020-03-26,,,,
Andaman and Nicobar Islands,2020-03-27,0.0,0.0,0.0,0.0
Andaman and Nicobar Islands,2020-03-28,0.0,0.0,5.0,5.0
Andaman and Nicobar Islands,2020-03-29,0.0,0.0,3.0,3.0
Andaman and Nicobar Islands,2020-03-30,0.0,0.0,0.0,0.0


In [19]:
#plotting heat map for complete time period for each state on log scale

for column in list(df_new.columns):
    data = go.Heatmap(z = np.log(df_new[column]),
                      x = df_new.index.get_level_values(1),
                      y = df_new.index.get_level_values(0),
                      text = df_new[column],
                      hoverinfo = 'x+y+text',
                      ygap = 2,
                      xgap = 2,
                      colorscale = 'Portland',
                      colorbar = dict(title = dict(text = 'Increasing No of Cases', side = 'right',
                                                   font = dict(family = 'arial', color = 'black', size = 14)
                                                  ))
                     )

    layout = go.Layout(
        title = dict(text = '<b>State-wise Daily ' + str(column) + ' Cases on Log Scale',
                     font = dict(family = 'arial', size = 20, color = 'black'),
                     x = 0.5,
                     xref = 'paper'),
        height = 1000,
        autosize = True,
        yaxis = dict(autorange = 'reversed', fixedrange = False,
                     title = dict(text = 'States')
                    ),
        xaxis = dict(type = 'date',
                     dtick = 'M1', rangeslider_visible = True,
                     title = dict(text = 'Dates')
                    ))
    fig = go.Figure(data = data, layout = layout)
    pyo.plot(fig, filename = 'daily_covid_' + str(column) + '_heatmap.html')


divide by zero encountered in log


invalid value encountered in log



In [20]:
df_ma = df_new.copy()
df_ma.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Cured,Deaths,Confirmed,Active
state_UT,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Andaman and Nicobar Islands,2020-03-26,,,,
Andaman and Nicobar Islands,2020-03-27,0.0,0.0,0.0,0.0
Andaman and Nicobar Islands,2020-03-28,0.0,0.0,5.0,5.0
Andaman and Nicobar Islands,2020-03-29,0.0,0.0,3.0,3.0
Andaman and Nicobar Islands,2020-03-30,0.0,0.0,0.0,0.0


In [21]:
#finding 3 day moving average af all category of cases.
for state in states:
    df_ma[df_ma.index.get_level_values(0) == state] = df_ma[df_ma.index.get_level_values(0) == state].rolling(3).mean().round(2)
df_ma.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Cured,Deaths,Confirmed,Active
state_UT,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Andaman and Nicobar Islands,2020-03-26,,,,
Andaman and Nicobar Islands,2020-03-27,,,,
Andaman and Nicobar Islands,2020-03-28,,,,
Andaman and Nicobar Islands,2020-03-29,0.0,0.0,2.67,2.67
Andaman and Nicobar Islands,2020-03-30,0.0,0.0,2.67,2.67


In [22]:
#plotting heat map for complete time period for each state on log scale (3 Day Moving Average)
for column in list(df_ma.columns):
    data = go.Heatmap(z = np.log(df_ma[column]),
                      x = df_ma.index.get_level_values(1),
                      y = df_ma.index.get_level_values(0),
                      text = df_ma[column],
                      hoverinfo = 'x+y+text',
                      ygap = 2,
                      xgap = 2,
                      colorscale = 'Portland',                
                      colorbar = dict(title = dict(text = 'Increasing No of Cases', side = 'right',
                                                   font = dict(family = 'arial', color = 'black', size = 14)
                                                  ))
                     )

    layout = go.Layout(
        title = dict(text = '<b>State-wise Daily ' + str(column) + ' Cases on Log Scale (3 Day Moving Average)',
                     font = dict(family = 'arial', size = 20, color = 'black'),
                     x = 0.5,
                     xref = 'paper'),
        height = 1000,
        autosize = True,
        yaxis = dict(autorange = 'reversed', fixedrange = False,
                     title = dict(text = 'States')
                    ),
        xaxis = dict(type = 'date',
                     dtick = 'M1', rangeslider_visible = True,
                     title = dict(text = 'Dates')
                    ))
    fig = go.Figure(data = data, layout = layout)
    pyo.plot(fig, filename = '3dma_daily_covid_' + str(column) + '_heatmap.html')


divide by zero encountered in log


invalid value encountered in log



### RATE CALCULATION

In [23]:
#calculating the recovery and case fatality rates and plotting
df_rate = df[['Date', 'state_UT', 'Cured', 'Deaths', 'Confirmed']].groupby(['Date']).sum()
df_rate.head()

Unnamed: 0_level_0,Cured,Deaths,Confirmed
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-30,0,0,1
2020-01-31,0,0,1
2020-02-01,0,0,2
2020-02-02,0,0,3
2020-02-03,0,0,3


In [24]:
df_rate['recovery_rate'] = round(df_rate.Cured/df_rate.Confirmed, 4)*100
df_rate['case_fatality_rate'] = round(df_rate.Deaths/df_rate.Confirmed, 4)*100
df_rate.tail()

Unnamed: 0_level_0,Cured,Deaths,Confirmed,recovery_rate,case_fatality_rate
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-10-23,6948497,117306,7761312,89.53,1.51
2020-10-24,7016046,117956,7814682,89.78,1.51
2020-10-25,7078123,118534,7864811,90.0,1.51
2020-10-26,7137228,119014,7909959,90.23,1.5
2020-10-27,7201070,119502,7946429,90.62,1.5


In [25]:
#Plotting recovery rate and fatality rate across the timeperiod for India
rate_data = []
text1 = []
text2 = []
for row in df_rate.itertuples(): 
    text1.append((
        'Date: {index:%Y-%m-%d}<br>' +
        'Recovery Rate: {rate:.2f}%<br>' +
        'No of Confirmed Cases: {case:,}').format(index = row[0], 
                                                rate = row[4], 
                                                case = row[3]))
    text2.append((
        'Date: {index:%Y-%m-%d}<br>' +
        'Case Fatality Rate: {rate:.2f}%<br>' +
        'No of Confirmed Cases: {case:,}').format(index = row[0], 
                                                rate = row[5], 
                                                case = row[3]))

    
data1 = go.Scatter(x = df_rate.index,
                  y = df_rate.recovery_rate,
                  line = dict(dash = 'dashdot', color = 'black'),
                  name = 'Recovery Rate',
                  text = text1,
                  hoverinfo = 'text')

data2 = go.Scatter(x = df_rate.index,
                   y = df_rate.case_fatality_rate,
                   line = dict(dash = 'dash', color = 'red'),
                   name = 'Case Fatality Rate',
                   text = text2,
                   hoverinfo = 'text')
rate_data.append(data1)
rate_data.append(data2)

layout = go.Layout(
    plot_bgcolor='white',
    autosize = True,
    xaxis = dict(rangeslider_visible = True,
                 title = dict(text = 'Dates')
                ),
    yaxis = dict(title = 'Rates', ticksuffix = '%', fixedrange = False),
    title = dict(text = '<b>Recovery and Case Fatality Rate', x = 0.5, xref = 'paper',
                 font = dict(family = 'arial', size = 20, color = 'black')
                ),
    legend = dict(itemclick = "toggleothers",
                  itemdoubleclick = "toggle"
                 )
)

fig = go.Figure(data = rate_data, layout = layout)

pyo.plot(fig, filename = 'covid_rate_trend.html')

'covid_rate_trend.html'

In [26]:
#calculating rates state wise
df_project_rate = df[['Date', 'state_UT', 'Cured', 'Deaths', 'Confirmed']].groupby(['state_UT', 'Date']).sum()
df_project_rate.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Cured,Deaths,Confirmed
state_UT,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Andaman and Nicobar Islands,2020-03-26,0,0,1
Andaman and Nicobar Islands,2020-03-27,0,0,1
Andaman and Nicobar Islands,2020-03-28,0,0,6
Andaman and Nicobar Islands,2020-03-29,0,0,9
Andaman and Nicobar Islands,2020-03-30,0,0,9


In [27]:
df_project_rate['recovery_rate'] = round(df_project_rate.Cured/df_project_rate.Confirmed, 4)*100
df_project_rate['case_fatality_rate'] = round(df_project_rate.Deaths/df_project_rate.Confirmed, 4)*100
df_project_rate.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Cured,Deaths,Confirmed,recovery_rate,case_fatality_rate
state_UT,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
West Bengal,2020-10-23,294911,6308,337283,87.44,1.87
West Bengal,2020-10-24,298587,6368,341426,87.45,1.87
West Bengal,2020-10-25,302340,6427,345574,87.49,1.86
West Bengal,2020-10-26,306197,6487,349701,87.56,1.86
West Bengal,2020-10-27,310086,6546,353822,87.64,1.85


In [28]:
#plotting state wise case fatality rate
state_data = []
  
for state in list(df_project_rate.index.get_level_values(0).unique()):
    text = []
    for row in df_project_rate[df_project_rate.index.get_level_values(0) == state].itertuples():
        text.append((
            'State/ UT: {state}<br>' +
            'Date: {date:%Y-%m-%d}<br>' +
            'Case Fatality Rate: {rate:.2f}%<br>' +
            'No of Confirmed Cases: {case:,}').format(state = row[0][0],
                                                      date = row[0][1],
                                                      rate = row[5],
                                                      case = row[3]))
   
    data = go.Scatter(x = df_project_rate.loc[df_project_rate.index.get_level_values(0) == state].index.get_level_values(1),
                   y = df_project_rate.loc[df_project_rate.index.get_level_values(0) == state].case_fatality_rate,
                   name = str(state),
                   text = text,
                   hoverinfo = 'text')
    state_data.append(data)
    
layout = go.Layout(
    plot_bgcolor='white',
    autosize = True,
    xaxis = dict(rangeslider_visible = True, title = 'Dates'),
    yaxis = dict(title = 'Rates', ticksuffix = '%', fixedrange = False),
    title = dict(text = '<b>State-wise Case Fatality Rate', x = 0.5, xref = 'paper',
                 font = dict(family = 'arial', size = 20, color = 'black')
                ),
    legend = dict(itemclick = "toggleothers",
                  itemdoubleclick = "toggle"
                 ))

fig = go.Figure(data = state_data, layout = layout)

pyo.plot(fig, filename = 'state_wise_fatality_rate.html')

'state_wise_fatality_rate.html'

In [29]:
#plotting state wise recovery rate
state_data = []
    
for state in list(df_project_rate.index.get_level_values(0).unique()):
    text = []
    for row in df_project_rate[df_project_rate.index.get_level_values(0) == state].itertuples():
        text.append((
            'State/ UT: {state}<br>' +
            'Date: {date:%Y-%m-%d}<br>' +
            'Recovery Rate: {rate:.2f}%<br>' +
            'No of Confirmed Cases: {case:,}').format(state = row[0][0],
                                                      date = row[0][1],
                                                      rate = row[4],
                                                      case = row[3]))
    data = go.Scatter(x = df_project_rate.loc[df_project_rate.index.get_level_values(0) == state].index.get_level_values(1),
                   y = df_project_rate.loc[df_project_rate.index.get_level_values(0) == state].recovery_rate,
                   name = str(state),
                   text = text,
                   hoverinfo = 'text')
    state_data.append(data)
    
layout = go.Layout(
    plot_bgcolor='white',
    autosize = True,
    yaxis = dict(title = 'Rates', ticksuffix = '%', fixedrange = False),
    xaxis = dict(rangeslider_visible = True, title = 'Dates'),
    title = dict(text = 'State-wise Recovery Rate', x = 0.5, xref = 'paper',
                 font = dict(family = 'arial', size = 20, color = 'black')
                ),
    legend = dict(itemclick = "toggleothers",
                  itemdoubleclick = "toggle"
                 ))

fig = go.Figure(data = state_data, layout = layout)

pyo.plot(fig, filename = 'state_wise_recovery_rate.html')

'state_wise_recovery_rate.html'

## Test vs Confirmed Comparison

In [30]:
#Analysing testing across the states
df_test = pd.read_csv('StatewiseTestingDetails_27_10.csv')
df_test.head()

Unnamed: 0,Date,State,TotalSamples,Negative,Positive
0,2020-04-17,Andaman and Nicobar Islands,1403.0,1210.0,12.0
1,2020-04-24,Andaman and Nicobar Islands,2679.0,,27.0
2,2020-04-27,Andaman and Nicobar Islands,2848.0,,33.0
3,2020-05-01,Andaman and Nicobar Islands,3754.0,,33.0
4,2020-05-16,Andaman and Nicobar Islands,6677.0,,33.0


In [31]:
df_test.drop(columns = ['Negative', 'Positive'], inplace = True)
df_test.head()

Unnamed: 0,Date,State,TotalSamples
0,2020-04-17,Andaman and Nicobar Islands,1403.0
1,2020-04-24,Andaman and Nicobar Islands,2679.0
2,2020-04-27,Andaman and Nicobar Islands,2848.0
3,2020-05-01,Andaman and Nicobar Islands,3754.0
4,2020-05-16,Andaman and Nicobar Islands,6677.0


In [32]:
df_test['Date'] = pd.to_datetime(df_test['Date'], dayfirst = True)
df_test['State'] = df_test['State'].str.replace('Telengana***', 'Telengana', regex = False)
df_test['State'] = df_test['State'].str.replace('Telangana***', 'Telengana', regex = False)
df_test['State'] = df_test['State'].str.replace('Telangana', 'Telengana', regex = False)
df_test.head()

Unnamed: 0,Date,State,TotalSamples
0,2020-04-17,Andaman and Nicobar Islands,1403.0
1,2020-04-24,Andaman and Nicobar Islands,2679.0
2,2020-04-27,Andaman and Nicobar Islands,2848.0
3,2020-05-01,Andaman and Nicobar Islands,3754.0
4,2020-05-16,Andaman and Nicobar Islands,6677.0


In [33]:
#plotting testing numbers across the states
states = df_test['State'].unique()

data_states = []
n = 0
for state in states:
    data = go.Scatter(x = df_test[df_test['State'] == state].Date,
                  y = df_test[df_test['State'] == state].TotalSamples,
                  name = state)
    data_states.append(data)
    n +=1
    
layout = go.Layout(
    plot_bgcolor='rgba(0,0,0,0)',
    title = dict(text = '<b>Total Samples collected State-wise', xref = 'paper', x = 0.5,
                 font = dict(family = 'arial', size = 20, color = 'black')
                ),
    xaxis = dict(rangeslider_visible = True, title = 'Dates'),
    yaxis = dict(fixedrange = False, title = 'No of Samples collected'),
    autosize = True,
    legend = dict(itemclick = "toggleothers",
                  itemdoubleclick = "toggle"
                 )
)

fig = go.Figure(data = data_states, layout = layout)

pyo.plot(fig, filename = 'Test_Comparison_states.html')

'Test_Comparison_states.html'

In [34]:
df_test_group = df_test.groupby(['Date']).sum()
df_test_group.head()

Unnamed: 0_level_0,TotalSamples
Date,Unnamed: 1_level_1
2020-04-01,11245.0
2020-04-02,14906.0
2020-04-03,20130.0
2020-04-04,10786.0
2020-04-05,56418.0


In [35]:
#plotting cummulative testing/ sample data
data = go.Scatter(x = df_test_group.index,
                  y = df_test_group.TotalSamples,
                  name = 'Total Samples collected',
                  fill='tozeroy')

layout = go.Layout(
    plot_bgcolor='rgba(0,0,0,0)',
    title = dict(text = '<b>Total Cummulative Test Samples Collected',  xref = 'paper', x = 0.5,
                 font = dict(family = 'arial', size = 20, color = 'black')
                ),
    xaxis = dict(rangeslider_visible = True, title = "Dates"),
    yaxis = dict(fixedrange = False, title = 'No of Tests'),
    autosize = True,
    legend = dict(itemclick = "toggleothers",
                  itemdoubleclick = "toggle"
                 )
)

fig = go.Figure(data = data, layout = layout)

pyo.plot(fig, filename = 'Test_cummulative_data.html')

'Test_cummulative_data.html'

In [36]:
df_test_group['test_change'] = df_test_group.TotalSamples.diff().clip(0)
df_test_group.head()

Unnamed: 0_level_0,TotalSamples,test_change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-04-01,11245.0,
2020-04-02,14906.0,3661.0
2020-04-03,20130.0,5224.0
2020-04-04,10786.0,0.0
2020-04-05,56418.0,45632.0


In [37]:
#plotting per day testing data
data = go.Bar(x = df_test_group.index,
              y = df_test_group.test_change,
              name = 'Per Day Change'
             )

layout = go.Layout(bargap = 0.2,
                   plot_bgcolor = 'white',
                   title = dict(text = '<b>Per Day Test Samples collected', xref = 'paper', x = 0.5,
                   font = dict(family = 'arial', size = 20, color = 'black')),
                   xaxis = dict(title = 'Date', rangeslider_visible = True),
                   yaxis = dict(title = 'Per Day Samples Collected', fixedrange = False),
                   autosize = True,
                   legend = dict(itemclick = "toggleothers",
                   itemdoubleclick = "toggle")
                   )

fig = go.Figure(data = data, layout = layout)
pyo.plot(fig, filename = 'Test_change.html')

'Test_change.html'

In [38]:
#plotting per day testing data (3 Day moving average)
data = go.Bar(x = df_test_group.index,
              y = df_test_group.test_change.rolling(3).mean().round(2),
              name = 'Per Day Change (3 Day Moving Average)'
             )

layout = go.Layout(bargap = 0.2,
                   plot_bgcolor = 'white',
                   title = dict(text = 'Per Day Test Samples collected (3 Day Moving Average)', xref = 'paper', x = 0.5,
                   font = dict(family = 'arial', size = 20, color = 'black')),
                   xaxis = dict(title = 'Date', rangeslider_visible = True),
                   yaxis = dict(title = 'Per Day Samples Collected', fixedrange = False),
                   autosize = True,
                   legend = dict(itemclick = "toggleothers",
                   itemdoubleclick = "toggle")
                   )

fig = go.Figure(data = data, layout = layout)
pyo.plot(fig, filename = 'Test_change_3dma.html')

'Test_change_3dma.html'

In [39]:
#plotting comparision between per day testing and per day positive cases 
data1 = go.Bar(x = df_test_group.index,
              y = df_test_group.test_change,
              name = 'Per Day Tests'
             )

data2 = go.Scatter(x = df1.index,
               y = df1.confirmed_change,
               name = 'Per Day Confirmed cases',
               yaxis = 'y2',
                   mode = 'lines'
            )

data_trace = [data1, data2]

layout = go.Layout(bargap = 0.2,
                   barmode = 'group',
                   plot_bgcolor = 'white',
                   title = dict(text = '<b>Per Day Test Samples Collected v/s Confirmed Cases', xref = 'paper', x = 0.5,
                   font = dict(family = 'arial', size = 20, color = 'black')),
                   xaxis = dict(title = 'Date', rangeslider_visible = True),
                   yaxis = dict(title = 'Per Day Tests done', fixedrange = False),
                   yaxis2 = dict(title = 'Per Day Confirmed Cases reported', 
                                 fixedrange = False,
                                 side = 'right',
                                 overlaying = 'y'
                                ),
                   autosize = True,
                   )

fig = go.Figure(data = data_trace, layout = layout)
pyo.plot(fig, filename = 'test_comparison_change.html')

'test_comparison_change.html'

In [40]:
#plotting comparision between per day testing and per day positive cases (3 Day moving average)
data1 = go.Bar(x = df_test_group.index,
              y = df_test_group.test_change.rolling(3).mean().round(2),
              name = 'Per Day Tests (3 Day Moving Average)'
             )

data2 = go.Scatter(x = df1.index,
               y = df1.confirmed_change.rolling(3).mean().round(2),
               name = 'Per Day Confirmed cases (3 Day Moving Average)',
               yaxis = 'y2',
                   mode = 'lines'
            )

data_trace = [data1, data2]

layout = go.Layout(bargap = 0.2,
                   barmode = 'group',
                   plot_bgcolor = 'white',
                   title = dict(text = '<b>Per Day Test Samples Collected v/s Confirmed Cases (3 Day Moving Average)', xref = 'paper', x = 0.5,
                   font = dict(family = 'arial', size = 20, color = 'black')),
                   xaxis = dict(title = 'Date', rangeslider_visible = True),
                   yaxis = dict(title = 'Per Day Tests done', fixedrange = False),
                   yaxis2 = dict(title = 'Per Day Confirmed Cases reported', 
                                 fixedrange = False,
                                 side = 'right',
                                 overlaying = 'y'
                                ),
                   autosize = True,
                   
                  )

fig = go.Figure(data = data_trace, layout = layout)
pyo.plot(fig, filename = 'test_comparison_change_3dma.html')

'test_comparison_change_3dma.html'

In [41]:
df_test_new = df_test.groupby(['State', 'Date']).sum().diff().clip(0)
df_test_new.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,TotalSamples
State,Date,Unnamed: 2_level_1
Andaman and Nicobar Islands,2020-04-17,
Andaman and Nicobar Islands,2020-04-24,1276.0
Andaman and Nicobar Islands,2020-04-27,169.0
Andaman and Nicobar Islands,2020-05-01,906.0
Andaman and Nicobar Islands,2020-05-16,2923.0


In [42]:
states = list(df_test_new.index.get_level_values(0).unique())

In [43]:
for state in states:
    df_test_new[df_test_new.index.get_level_values(0) == state] = df_test_new[df_test_new.index.get_level_values(0) == state].rolling(3).mean().round(2)
df_test_new.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,TotalSamples
State,Date,Unnamed: 2_level_1
Andaman and Nicobar Islands,2020-04-17,
Andaman and Nicobar Islands,2020-04-24,
Andaman and Nicobar Islands,2020-04-27,
Andaman and Nicobar Islands,2020-05-01,783.67
Andaman and Nicobar Islands,2020-05-16,1332.67


In [44]:
#plotting per day testing heat map (3 Day moving average) on log scale
data = go.Heatmap(z = np.log(df_test_new.TotalSamples),
                 x = df_test_new.index.get_level_values(1),
                 y = df_test_new.index.get_level_values(0),
                 text = df_test_new.TotalSamples,
                 hoverinfo = 'x+y+text',
                 ygap = 2,
                 xgap = 2,
                 colorscale = 'Portland',
                 colorbar = dict(title = dict(text = 'Increasing No of Tests', side = 'right',
                                                   font = dict(family = 'arial', color = 'black', size = 14)
                                                  ))
                 )
layout = go.Layout(
        title = dict(text = '<b>State-wise Daily Test Cases on Log Scale (3 Day Moving Average)',
                     x = 0.5,
                     xref = 'paper', font = dict(family = "arial", size = 20, color = 'black')),
        height = 1000,
        autosize = True,
        yaxis = dict(autorange = 'reversed', fixedrange = False),
        xaxis = dict(type = 'date',
                     dtick = 'M1', rangeslider_visible = True))
fig = go.Figure(data = data, layout = layout)
pyo.plot(fig, filename = 'test_3dma_daily_covid_heatmap.html')

'test_3dma_daily_covid_heatmap.html'

In [45]:
df_test_group.head()

Unnamed: 0_level_0,TotalSamples,test_change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-04-01,11245.0,
2020-04-02,14906.0,3661.0
2020-04-03,20130.0,5224.0
2020-04-04,10786.0,0.0
2020-04-05,56418.0,45632.0


In [46]:
df1.head()

Unnamed: 0_level_0,Cured,Deaths,Confirmed,active,active_change,confirmed_change,deaths_change,cured_change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-01-30,0,0,1,1,,,,
2020-01-31,0,0,1,1,0.0,0.0,0.0,0.0
2020-02-01,0,0,2,2,1.0,1.0,0.0,0.0
2020-02-02,0,0,3,3,1.0,1.0,0.0,0.0
2020-02-03,0,0,3,3,0.0,0.0,0.0,0.0


In [47]:
df_combine = pd.concat([df_test_group, df1[['Confirmed', 'confirmed_change']]], axis = 1, join = 'outer')

In [48]:
df_combine.head()

Unnamed: 0_level_0,TotalSamples,test_change,Confirmed,confirmed_change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-30,,,1,
2020-01-31,,,1,0.0
2020-02-01,,,2,1.0
2020-02-02,,,3,1.0
2020-02-03,,,3,0.0


In [49]:
df_combine['positive_rate'] = round((df_combine.Confirmed/df_combine.TotalSamples)*100, 2)
df_combine.tail()

Unnamed: 0_level_0,TotalSamples,test_change,Confirmed,confirmed_change,positive_rate
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-10-23,108806121.0,1639519.0,7761312,54366.0,7.13
2020-10-24,109763264.0,957143.0,7814682,53370.0,7.12
2020-10-25,110764736.0,1001472.0,7864811,50129.0,7.1
2020-10-26,111383719.0,618983.0,7909959,45148.0,7.1
2020-10-27,57201860.0,0.0,7946429,36470.0,13.89


In [50]:
df_combine['daily_positive_rate'] = round((df_combine.confirmed_change/df_combine.test_change)*100, 2).clip(0)
df_combine.tail()

Unnamed: 0_level_0,TotalSamples,test_change,Confirmed,confirmed_change,positive_rate,daily_positive_rate
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-10-23,108806121.0,1639519.0,7761312,54366.0,7.13,3.32
2020-10-24,109763264.0,957143.0,7814682,53370.0,7.12,5.58
2020-10-25,110764736.0,1001472.0,7864811,50129.0,7.1,5.01
2020-10-26,111383719.0,618983.0,7909959,45148.0,7.1,7.29
2020-10-27,57201860.0,0.0,7946429,36470.0,13.89,inf


In [51]:
#plotting cummulative and per day test positive rate
data1 = go.Bar(x = df_combine.index,
               y = df_combine.daily_positive_rate,
               name = 'Daily Test Positive Rate'
              )

data2 = go.Scatter(x = df_combine.index,
                   y = df_combine.positive_rate,
                   name = 'Cummulative Test Positive Rate'
                  )

data_trace = [data1, data2]

layout = go.Layout(plot_bgcolor = 'white',
                   bargap = 0.2,
                   autosize = True,
                   title = dict(text = '<b>Daily and Cummulative Test Positive Rate',
                                xref = 'paper', x = 0.5,
                                font = dict(family = 'arial', size = 20, color = 'black')
                               ),
                   yaxis = dict(title = 'Rates', fixedrange = False,
                                ticksuffix = '%'
                               ),
                   xaxis = dict(title = 'Dates', rangeslider_visible = True)
                  )
    
fig = go.Figure(data = data_trace, layout = layout)
pyo.plot(fig, filename = 'test_positive_rate.html')

'test_positive_rate.html'

In [52]:
#plotting cummulative and per day test positive rate (3 Day moving average)
data1 = go.Bar(x = df_combine.index,
               y = df_combine.daily_positive_rate.rolling(3).mean(),
               name = 'Daily Test Positive Rate'
              )

data2 = go.Scatter(x = df_combine.index,
                   y = df_combine.positive_rate.rolling(3).mean(),
                   name = 'Cummulative Test Positive Rate'
                  )

data_trace = [data1, data2]

layout = go.Layout(plot_bgcolor = 'white',
                   bargap = 0.2,
                   autosize = True,
                   title = dict(text = '<b>Daily and Cummulative Test Positive Rate (3 Day Moving Average)',
                                xref = 'paper', x = 0.5,
                                font = dict(family = 'arial', size = 20, color = 'black')
                               ),
                   yaxis = dict(title = 'Rates', fixedrange = False,
                                ticksuffix = '%'
                               ),
                   xaxis = dict(title = 'Dates', rangeslider_visible = True)
                  )
    
fig = go.Figure(data = data_trace, layout = layout)
pyo.plot(fig, filename = 'test_3dma_positive_rate.html')

'test_3dma_positive_rate.html'

#### Note: You may find some discrepancy in covid test details, this I owe to the testing dataset, since few major datapoints are missing.

### Thank you for going through this notebook.

-Minhaj Ahmed Ansari