# COVID-19 INDIA ANALYSIS

## Updated till 5th May 2020

### Importing packages

In [1]:
import numpy as np
import pandas as pd

pd.set_option("display.max_columns",None) 
pd.set_option("display.max_rows",None) 

import warnings
warnings.filterwarnings("ignore")

import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.templates.default = "seaborn"
from plotly.subplots import make_subplots

In [2]:
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=True)

### Reading dataset

In [3]:
india_df = pd.read_csv("covid_19_india.csv", parse_dates = ['Date'], dayfirst = True)

In [4]:
india_df.tail(5)

Unnamed: 0,Sno,Date,Time,State/UnionTerritory,ConfirmedIndianNational,ConfirmedForeignNational,Cured,Deaths,Confirmed
1633,1634,2020-05-05,5:00 PM,Telengana,-,-,585,29,1085
1634,1635,2020-05-05,5:00 PM,Tripura,-,-,2,0,29
1635,1636,2020-05-05,5:00 PM,Uttarakhand,-,-,39,1,60
1636,1637,2020-05-05,5:00 PM,Uttar Pradesh,-,-,944,53,2859
1637,1638,2020-05-05,5:00 PM,West Bengal,-,-,218,133,1259


In [5]:
india_df = india_df.drop(['Sno', 'Time' ,'ConfirmedIndianNational', 'ConfirmedForeignNational'], axis=1)

In [6]:
india_df.head()

Unnamed: 0,Date,State/UnionTerritory,Cured,Deaths,Confirmed
0,2020-01-30,Kerala,0,0,1
1,2020-01-31,Kerala,0,0,1
2,2020-02-01,Kerala,0,0,2
3,2020-02-02,Kerala,0,0,3
4,2020-02-03,Kerala,0,0,3


In [7]:
india_df.columns = [x.lower() for x in india_df.columns]

In [8]:
india_df = india_df.rename(columns = {'state/unionterritory':'state_or_ut'})
india_df = india_df.rename(columns = {'cured':'recovered'})
india_df['state_or_ut'] = india_df['state_or_ut'].replace('Jharkhand#', 'Jharkhand')
india_df['state_or_ut'] = india_df['state_or_ut'].replace('Nagaland#', 'Nagaland')

In [9]:
india_df.head()

Unnamed: 0,date,state_or_ut,recovered,deaths,confirmed
0,2020-01-30,Kerala,0,0,1
1,2020-01-31,Kerala,0,0,1
2,2020-02-01,Kerala,0,0,2
3,2020-02-02,Kerala,0,0,3
4,2020-02-03,Kerala,0,0,3


In [10]:
india_df.isna().sum()

date           0
state_or_ut    0
recovered      0
deaths         0
confirmed      0
dtype: int64

In [11]:
india_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1638 entries, 0 to 1637
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   date         1638 non-null   datetime64[ns]
 1   state_or_ut  1638 non-null   object        
 2   recovered    1638 non-null   int64         
 3   deaths       1638 non-null   int64         
 4   confirmed    1638 non-null   int64         
dtypes: datetime64[ns](1), int64(3), object(1)
memory usage: 64.1+ KB


In [12]:
a = india_df.date.value_counts().sort_index()
print('Starting date:',a.index[0])
print('Ending date:',a.index[-1])

Starting date: 2020-01-30 00:00:00
Ending date: 2020-05-05 00:00:00


In [13]:
# Active Cases = confirmed - deaths - recovered
india_df['active'] = india_df['confirmed'] - india_df['deaths'] - india_df['recovered']

In [14]:
india_df.shape

(1638, 6)

### Confirmed cases over time

In [15]:
total_cases = india_df.groupby('date')['date', 'confirmed'].sum().reset_index()
total_cases['date'] = pd.to_datetime(total_cases['date'])

fig = go.Figure()
fig.add_trace(go.Scatter(x=total_cases['date'], y=total_cases['confirmed'],
                   mode='lines+markers', line = dict(color='orangered', width=2)))
fig.update_layout(title='Confirmed Cases Over Time in India',
                   xaxis_title='Date',
                   yaxis_title='Total Cases')
fig.show()

### Confirmed cases in each state or union territory

In [16]:
top = india_df[india_df['date'] == india_df['date'].max()]
top_casualities = top.groupby(by = 'state_or_ut')['confirmed'].sum().sort_values(ascending = False).reset_index()
top_casualities

Unnamed: 0,state_or_ut,confirmed
0,Maharashtra,14541
1,Gujarat,5804
2,Delhi,4898
3,Tamil Nadu,3550
4,Rajasthan,3061
5,Madhya Pradesh,3049
6,Uttar Pradesh,2859
7,Andhra Pradesh,1717
8,West Bengal,1259
9,Punjab,1233


### Top 15 states or union territories having most confirmed cases

In [17]:
fig = px.bar(top_casualities.head(15) , x='confirmed', y='state_or_ut', orientation='h', 
             color='state_or_ut', color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(title='Top 15 states or union territories having most confirmed cases',
                   xaxis_title='Total cases',
                   yaxis_title='State or Union Territory', showlegend=False)
fig.show()

### Active cases in each state or union territory

In [18]:
top = india_df[india_df['date'] == india_df['date'].max()]
top_actives = top.groupby(by = 'state_or_ut')['active'].sum().sort_values(ascending = False).reset_index()
top_actives

Unnamed: 0,state_or_ut,active
0,Maharashtra,11493
1,Gujarat,4290
2,Delhi,3403
3,Tamil Nadu,2110
4,Madhya Pradesh,1873
5,Uttar Pradesh,1862
6,Rajasthan,1590
7,Andhra Pradesh,1092
8,Punjab,1082
9,West Bengal,908


### Top 15 states or union territories having most active cases

In [19]:
fig = px.bar(top_actives.head(15) , x='active', y='state_or_ut', orientation='h', 
             color='state_or_ut', color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(title='Top 15 states or union territories having most active cases',
                   xaxis_title='Total cases',
                   yaxis_title='State or Union Territory', showlegend=False)
fig.show()

### Deaths in each state or union territory

In [20]:
top_deaths = top.groupby(by = 'state_or_ut')['deaths'].sum().sort_values(ascending = False).reset_index()
top_deaths

Unnamed: 0,state_or_ut,deaths
0,Maharashtra,583
1,Gujarat,319
2,Madhya Pradesh,176
3,West Bengal,133
4,Rajasthan,77
5,Delhi,64
6,Uttar Pradesh,53
7,Andhra Pradesh,36
8,Tamil Nadu,31
9,Telengana,29


### Top 15 states or union territories having most deaths

In [21]:
fig = px.bar(top_deaths.head(15) , x='deaths', y='state_or_ut', orientation='h', 
             color='state_or_ut', color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(title='Top 15 states or union territories having most deaths',
                   xaxis_title='Total cases',
                   yaxis_title='State or Union Territory', showlegend=False)
fig.show()

### Recovered cases in each state or union territory

In [22]:
top_recovered = top.groupby(by = 'state_or_ut')['recovered'].sum().sort_values(ascending = False).reset_index()
top_recovered

Unnamed: 0,state_or_ut,recovered
0,Maharashtra,2465
1,Delhi,1431
2,Tamil Nadu,1409
3,Rajasthan,1394
4,Gujarat,1195
5,Madhya Pradesh,1000
6,Uttar Pradesh,944
7,Andhra Pradesh,589
8,Telengana,585
9,Kerala,462


### Top 15 states or union territories having most deaths

In [23]:
fig = px.bar(top_recovered.head(15) , x='recovered', y='state_or_ut', orientation='h', 
             color='state_or_ut', color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(title='Top 15 states or union territories having most recovered cases',
                   xaxis_title='Total cases',
                   yaxis_title='State or Union Territory', showlegend=False)
fig.show()

### Mortality and recovery rate in each state or union territory

In [24]:
rate = top.groupby(by = 'state_or_ut')['recovered','confirmed','deaths'].sum().reset_index()
rate['recovery_percentage'] =  round(((rate['recovered']) / (rate['confirmed'])) * 100 , 2)
rate['death_percentage'] =  round(((rate['deaths']) / (rate['confirmed'])) * 100 , 2)
rate.head()

Unnamed: 0,state_or_ut,recovered,confirmed,deaths,recovery_percentage,death_percentage
0,Andaman and Nicobar Islands,32,33,0,96.97,0.0
1,Andhra Pradesh,589,1717,36,34.3,2.1
2,Arunachal Pradesh,1,1,0,100.0,0.0
3,Assam,32,43,1,74.42,2.33
4,Bihar,130,529,4,24.57,0.76


### Mortality rate in each state or union territory

In [25]:
mortality = rate.groupby(by = 'state_or_ut')['death_percentage'].sum().sort_values(ascending = False).reset_index()
mortality

Unnamed: 0,state_or_ut,death_percentage
0,West Bengal,10.56
1,Meghalaya,8.33
2,Madhya Pradesh,5.77
3,Gujarat,5.5
4,Karnataka,4.25
5,Maharashtra,4.01
6,Telengana,2.67
7,Jharkhand,2.61
8,Rajasthan,2.52
9,Himachal Pradesh,2.44


### Top 20 states or union territories having highest mortality rate

In [26]:
fig = px.bar(mortality.head(20) , x='death_percentage', y='state_or_ut', orientation='h', 
             color='state_or_ut', color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(title='Top 20 states or union territories having highest mortality rate',
                   xaxis_title='Mortality rate in percentage',
                   yaxis_title='State or Union Territory', showlegend=False)
fig.show()

### Recovery rate in each state or union territory

In [27]:
recovery = rate.groupby(by = 'state_or_ut')['recovery_percentage'].sum().sort_values(ascending = False).reset_index()
recovery

Unnamed: 0,state_or_ut,recovery_percentage
0,Manipur,100.0
1,Arunachal Pradesh,100.0
2,Goa,100.0
3,Andaman and Nicobar Islands,96.97
4,Himachal Pradesh,92.68
5,Kerala,92.4
6,Meghalaya,83.33
7,Assam,74.42
8,Puducherry,66.67
9,Uttarakhand,65.0


### Top 20 state or union territory having highest recovery rate

In [28]:
fig = px.bar(recovery.head(20) , x='recovery_percentage', y='state_or_ut', orientation='h', 
             color='state_or_ut', color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(title='Top 20 states or union territories having highest recovery rate',
                   xaxis_title='Recovery rate in percentage',
                   yaxis_title='State or Union Territory', showlegend=False)
fig.show()