In [1]:
import numpy as np
import pandas as pd
import plotly as py
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots


In [33]:
# Read Data
df = pd.read_csv('covid_19_india.csv')
df.head()

Unnamed: 0,Sno,Date,Time,State/UnionTerritory,ConfirmedIndianNational,ConfirmedForeignNational,Cured,Deaths,Confirmed
0,1,30/01/20,6:00 PM,Kerala,1,0,0,0,1
1,2,31/01/20,6:00 PM,Kerala,1,0,0,0,1
2,3,01/02/20,6:00 PM,Kerala,2,0,0,0,2
3,4,02/02/20,6:00 PM,Kerala,3,0,0,0,3
4,5,03/02/20,6:00 PM,Kerala,3,0,0,0,3


In [34]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8486 entries, 0 to 8485
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Sno                       8486 non-null   int64 
 1   Date                      8486 non-null   object
 2   Time                      8486 non-null   object
 3   State/UnionTerritory      8486 non-null   object
 4   ConfirmedIndianNational   8486 non-null   object
 5   ConfirmedForeignNational  8486 non-null   object
 6   Cured                     8486 non-null   int64 
 7   Deaths                    8486 non-null   int64 
 8   Confirmed                 8486 non-null   int64 
dtypes: int64(4), object(5)
memory usage: 596.8+ KB


In [37]:
df['Date']=pd.to_datetime(df['Date'], format='%d/%m/%y')
df.dtypes

Sno                                  int64
Date                        datetime64[ns]
Time                                object
State/UnionTerritory                object
ConfirmedIndianNational             object
ConfirmedForeignNational            object
Cured                                int64
Deaths                               int64
Confirmed                            int64
dtype: object

In [38]:
df['State/UnionTerritory'].value_counts()

Kerala                                      292
Delhi                                       260
Rajasthan                                   259
Uttar Pradesh                               258
Haryana                                     258
Tamil Nadu                                  255
Ladakh                                      255
Karnataka                                   253
Jammu and Kashmir                           253
Maharashtra                                 252
Punjab                                      252
Andhra Pradesh                              250
Uttarakhand                                 247
Odisha                                      246
West Bengal                                 244
Puducherry                                  244
Chhattisgarh                                243
Gujarat                                     242
Chandigarh                                  242
Himachal Pradesh                            241
Madhya Pradesh                          

In [39]:
# Rename 'state names'
df=df.replace({'State/UnionTerritory':{'Chandigarh***': 'Chandigarh', 'Punjab***':'Punjab', 'Maharashtra***': 'Maharashtra', 'Telengana***':'Telangana',
                                  'Telangana***':'Telangana', 'Telengana':'Telangana', 'Daman & Diu': 'Dadra and Nagar Haveli and Daman and Diu', 'Dadar Nagar Haveli': 'Dadra and Nagar Haveli and Daman and Diu', 'Jammu and Kashmir': 'Jammu & Kashmir'}})


In [40]:
df = df[df['State/UnionTerritory']!= 'Cases being reassigned to states']
df = df[df['State/UnionTerritory']!= 'Unassigned']

In [41]:
df['State/UnionTerritory'].value_counts()

Kerala                                      292
Telangana                                   260
Delhi                                       260
Rajasthan                                   259
Uttar Pradesh                               258
Haryana                                     258
Tamil Nadu                                  255
Ladakh                                      255
Punjab                                      253
Karnataka                                   253
Maharashtra                                 253
Jammu & Kashmir                             253
Andhra Pradesh                              250
Uttarakhand                                 247
Odisha                                      246
West Bengal                                 244
Puducherry                                  244
Chandigarh                                  243
Chhattisgarh                                243
Gujarat                                     242
Himachal Pradesh                        

In [42]:
# Create new column with active cases
df['Active'] = df['Confirmed'] - df['Deaths'] - df['Cured']

df.head()

Unnamed: 0,Sno,Date,Time,State/UnionTerritory,ConfirmedIndianNational,ConfirmedForeignNational,Cured,Deaths,Confirmed,Active
0,1,2020-01-30,6:00 PM,Kerala,1,0,0,0,1,1
1,2,2020-01-31,6:00 PM,Kerala,1,0,0,0,1,1
2,3,2020-02-01,6:00 PM,Kerala,2,0,0,0,2,2
3,4,2020-02-02,6:00 PM,Kerala,3,0,0,0,3,3
4,5,2020-02-03,6:00 PM,Kerala,3,0,0,0,3,3


### COVID-19 in the world

In [67]:
# Get last update
last_update = df[df['Date'] == max(df['Date'])]
table_data = last_update.groupby(["Date"])[["Confirmed","Active","Cured","Deaths"]].sum().reset_index()
table_data.head()

Unnamed: 0,Date,Confirmed,Active,Cured,Deaths
0,2020-11-16,8845127,465478,8249579,130070


In [68]:
# Create table
labels = ["Last Update","Confirmed","Active","Cured","Deaths"]
fig = go.Figure(data=[go.Table(header=dict(values=labels),
                 cells=dict(values=table_data.loc[0,["Date","Confirmed","Active","Cured","Deaths"]]))
                     ])

fig.update_layout(
    title_text='COVID-19 in India: ',
    title_x = 0.5
)

fig.show()

### Statewise Distribution of the Confirmed Cases in India

In [59]:
df_states = df.sort_values('Date', ascending=True)
df_states

Unnamed: 0,Sno,Date,Time,State/UnionTerritory,ConfirmedIndianNational,ConfirmedForeignNational,Cured,Deaths,Confirmed,Active
0,1,2020-01-30,6:00 PM,Kerala,1,0,0,0,1,1
1,2,2020-01-31,6:00 PM,Kerala,1,0,0,0,1,1
2,3,2020-02-01,6:00 PM,Kerala,2,0,0,0,2,2
3,4,2020-02-02,6:00 PM,Kerala,3,0,0,0,3,3
4,5,2020-02-03,6:00 PM,Kerala,3,0,0,0,3,3
...,...,...,...,...,...,...,...,...,...,...
8463,8464,2020-11-16,8:00 AM,Himachal Pradesh,-,-,22392,442,29713,6879
8464,8465,2020-11-16,8:00 AM,Jammu & Kashmir,-,-,95342,1589,102619,5688
8465,8466,2020-11-16,8:00 AM,Jharkhand,-,-,102188,924,106064,2952
8467,8468,2020-11-16,8:00 AM,Kerala,-,-,448207,1869,524998,74922


In [60]:
fig = go.Figure(data=go.Choropleth(
    geojson="https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/india_states.geojson",
    featureidkey='properties.ST_NM',
    locationmode='geojson-id',
    locations = df_states['State/UnionTerritory'],
    z = df_states['Confirmed'],
        
    autocolorscale=False,
    colorscale='Reds',
    marker_line_color='peachpuff',

))

fig.update_geos(
    visible=False,
    projection=dict(
        type='conic conformal',
        parallels=[12.472944444, 35.172805555556],
        rotation={'lat': 24, 'lon': 80}
    ),
    lonaxis={'range': [68, 98]},
    lataxis={'range': [6, 38]}
)


fig.update_layout(
    title_text = 'Confirmed Cases as of October 31, 2020',
    title_x = 0.5,
    geo=dict(
        showframe = False,
        showcoastlines = False,
        projection_type = 'equirectangular'
    )
)

### Covid-19 spread in India

In [64]:
df_states['Date']=df_states['Date'].astype(str)
df_states.dtypes

Sno                          int64
Date                        object
Time                        object
State/UnionTerritory        object
ConfirmedIndianNational     object
ConfirmedForeignNational    object
Cured                        int64
Deaths                       int64
Confirmed                    int64
Active                       int64
dtype: object

In [65]:
# Create dataframe for animated cloropleth map
# df_statedate = df[df['Confirmed']>0]
# df_statedate = df_statedate.groupby(['Date','State/UnionTerritory']).sum().reset_index()

# Create visualization
fig = px.choropleth(df_states, 
                    geojson="https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/india_states.geojson",
                    featureidkey='properties.ST_NM',
                    locationmode='geojson-id',
                    locations = df_states['State/UnionTerritory'],
                    color="Confirmed", 
                    hover_name=df_states['State/UnionTerritory'], 
                    animation_frame="Date"
)

fig.update_geos(
    visible=False,
    projection=dict(
        type='conic conformal',
        parallels=[12.472944444, 35.172805555556],
        rotation={'lat': 24, 'lon': 80}
    ),
    lonaxis={'range': [68, 98]},
    lataxis={'range': [6, 38]}
)

fig.update_layout(
    title_text = 'Spread of COVID-19 in India',
    title_x = 0.5,
    geo=dict(
        showframe = False,
        showcoastlines = False,
    )
)
    
fig.show()

### Proportion of Active Cases by State/UnionTerritory


In [69]:
# Create dataframe for pie plot
# df_states = df.groupby(['State/UnionTerritory', 'Date']).sum().reset_index().sort_values('Date', ascending=False)
# df_states = df_states.drop_duplicates(subset = ['State/UnionTerritory'])
# df_states = df_states[df_states['Active']>0]

# Create visualization
fig = px.pie(last_update, values = 'Active',names='State/UnionTerritory', height=600)
fig.update_traces(textposition='inside', textinfo='percent+label')

fig.update_layout(
    title_text = 'Proportion of Active Cases by State/UnionTerritory',
    title_x = 0.5,
    geo=dict(
        showframe = False,
        showcoastlines = False,
    ))

fig.show()

### Proportion of Cured Cases by State/UnionTerritory


In [72]:
# Create visualization
fig = px.treemap(last_update, path=['State/UnionTerritory'], values='Cured', height=600, width=1000)

fig.update_layout(
    title_text = 'Cured Cases by State/UnionTerritory',
    title_x = 0.5,
    geo=dict(
        showframe = False,
        showcoastlines = False,
    ))

fig.show()

### Most 5 Infected States
Same for confirmed, active and recovered cases.

In [73]:
# Create datafram for bar charts
# bar_data = df.groupby(['State/UnionTerritory'])['Confirmed', 'Cured', 'Active', 'Deaths'].sum().reset_index().sort_values('Confirmed', ascending=False).reset_index(drop=True)
bar_data = last_update.sort_values('Confirmed', ascending=False)

# Create visualization
fig = px.bar(bar_data[0:5], 
             x="State/UnionTerritory", 
             y="Confirmed",
             color='State/UnionTerritory', 
             text = 'Confirmed', 
             orientation='v', height=600,
             title='Most 5 Infected State/UnionTerritory')
fig.show()

### Confirmed Cases, Cured Cases and Death Over Time in India

In [103]:
# Create dataframe for line graph
line_data = df.groupby('Date').sum().reset_index().sort_values('Date', ascending=True)

# Create visualization
line_data = line_data.melt(id_vars='Date', 
                           value_vars=['Confirmed', 'Cured', 'Deaths', 'Active'], 
                           var_name='Covid cases', 
                           value_name='Cumulative no. of Cases')

fig = px.line(line_data, 
              x="Date", y="Cumulative no. of Cases", 
              color='Covid cases', 
              title='Confirmed Cases, Cured Cases, and Death Over Time')
fig.show()

In [104]:
line_data

Unnamed: 0,Date,Covid cases,Cumulative no. of Cases
0,2020-01-30,Confirmed,1
1,2020-01-31,Confirmed,1
2,2020-02-01,Confirmed,2
3,2020-02-02,Confirmed,3
4,2020-02-03,Confirmed,3
...,...,...,...
1163,2020-11-12,Active,489294
1164,2020-11-13,Active,484547
1165,2020-11-14,Active,480719
1166,2020-11-15,Active,479216


### Confirmed Cases & active cases in top 5 states in India

In [113]:
top5_states=df.loc[df['State/UnionTerritory'].isin(['Maharashtra','Karnataka','Andhra Pradesh','Tamil Nadu','Uttar Pradesh'])]

In [115]:
top5_states.head()

Unnamed: 0,Sno,Date,Time,State/UnionTerritory,ConfirmedIndianNational,ConfirmedForeignNational,Cured,Deaths,Confirmed,Active
39,40,2020-03-04,6:00 PM,Uttar Pradesh,6,0,0,0,6,6
50,51,2020-03-05,6:00 PM,Uttar Pradesh,7,0,0,0,7,7
55,56,2020-03-06,6:00 PM,Uttar Pradesh,7,0,0,0,7,7
58,59,2020-03-07,6:00 PM,Uttar Pradesh,7,0,0,0,7,7
61,62,2020-03-07,6:00 PM,Tamil Nadu,1,0,0,0,1,1


### Confirmed Cases over time in top 5 states in India

In [118]:
# top5_states_confirmed
top5_states_confirmed = top5_states.melt(id_vars=['State/UnionTerritory','Date'], 
                           value_vars=['Confirmed'], 
                           var_name='Covid cases', 
                           value_name='Cumulative no. of Confirmed Cases')
top5_states_confirmed.head()

Unnamed: 0,State/UnionTerritory,Date,Covid cases,Cumulative no. of Confirmed Cases
0,Uttar Pradesh,2020-03-04,Confirmed,6
1,Uttar Pradesh,2020-03-05,Confirmed,7
2,Uttar Pradesh,2020-03-06,Confirmed,7
3,Uttar Pradesh,2020-03-07,Confirmed,7
4,Tamil Nadu,2020-03-07,Confirmed,1


In [120]:
# Create visualization
fig = px.line(top5_states_confirmed, 
              x="Date", y="Cumulative no. of Confirmed Cases", 
              color='State/UnionTerritory', 
              title='Confirmed Cases Over Time for Top 5 States in India')
fig.show()

### Active Cases over time in top 5 states in India

In [121]:
# top5_states_active
top5_states_active = top5_states.melt(id_vars=['State/UnionTerritory','Date'], 
                           value_vars=['Active'], 
                           var_name='Covid cases', 
                           value_name='Cumulative no. of Active Cases')

In [122]:
# Create visualization

fig = px.line(top5_states_active, 
              x="Date", y="Cumulative no. of Active Cases", 
              color='State/UnionTerritory', 
              title='Active Cases Over Time for Top 5 states in India')
fig.show()