### Social analysis on C, R, D, T factors of covid19 in India

In [6]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import requests

import plotly.io as pio
pio.templates.default = "plotly_dark"
pio.renderers.default = 'iframe'

pd.set_option('display.float_format', '{:.2f}'.format)

In [67]:
response = requests.get("https://api.covid19india.org/v4/min/timeseries.min.json")
if response.status_code == 200:
    crdt_api_data = response.json()
    
    date_on = []
    state_abbv = []
    delta_confirmed = []
    delta_recovered = []
    delta_deceased = []
    delta_tested = []
    delta_vaccinated1 = []
    delta_vaccinated2 = []
    
       
    for state in crdt_api_data.keys():
        for date in crdt_api_data[state]['dates'].keys():
            state_abbv.append(state)
            date_on.append(date)
            
            try:
                delta_confirmed.append(crdt_api_data[state]['dates'][date]['delta']['confirmed'])
            except:
                delta_confirmed.append(0)
            try:
                delta_recovered.append(crdt_api_data[state]['dates'][date]['delta']['recovered'])
            except:
                delta_recovered.append(0)
            try:
                delta_deceased.append(crdt_api_data[state]['dates'][date]['delta']['deceased'])
            except:
                delta_deceased.append(0)
            try:
                delta_tested.append(crdt_api_data[state]['dates'][date]['delta']['tested'])
            except:
                delta_tested.append(0)
            try:
                delta_vaccinated1.append(crdt_api_data[state]['dates'][date]['delta']['vaccinated1'])
            except:
                delta_vaccinated1.append(0)
            try:
                delta_vaccinated2.append(crdt_api_data[state]['dates'][date]['delta']['vaccinated2'])
            except:
                delta_vaccinated2.append(0)

            
    crdt_api_data = pd.DataFrame(
        {
            
            "state_abbv": state_abbv,
            "date": date_on,
            "delta_confirmed": delta_confirmed,
            "delta_recovered": delta_recovered,
            "delta_deceased": delta_deceased,
            "delta_tested": delta_tested,
            "delta_vaccinated1": delta_vaccinated1,
            "delta_vaccinated2": delta_vaccinated2,
       }
    )

else:
    print("Error while calling API: {}".format(response.status_code, response.reason))

In [8]:
crdt_api_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19125 entries, 0 to 19124
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   state_abbv         19125 non-null  object
 1   date               19125 non-null  object
 2   delta_confirmed    19125 non-null  int64 
 3   delta_recovered    19125 non-null  int64 
 4   delta_deceased     19125 non-null  int64 
 5   delta_tested       19125 non-null  int64 
 6   delta_vaccinated1  19125 non-null  int64 
 7   delta_vaccinated2  19125 non-null  int64 
dtypes: int64(6), object(2)
memory usage: 1.2+ MB


In [68]:
crdt_api_data.to_csv('../task-1-datasets/crdt_statewise_india_2020.csv', index=False)

In [69]:
# crdt_api_data = pd.read_csv('../task-1-datasets/crdt_statewise_india_2020.csv')

In [9]:
crdt_api_data['date'] = pd.to_datetime(crdt_api_data['date'])
crdt_api_data.head()

Unnamed: 0,state_abbv,date,delta_confirmed,delta_recovered,delta_deceased,delta_tested,delta_vaccinated1,delta_vaccinated2
0,AN,2020-03-26,1,0,0,0,0,0
1,AN,2020-03-27,5,0,0,0,0,0
2,AN,2020-03-28,3,0,0,0,0,0
3,AN,2020-03-29,0,0,0,0,0,0
4,AN,2020-03-30,1,0,0,0,0,0


In [10]:
crdt_api_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19125 entries, 0 to 19124
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   state_abbv         19125 non-null  object        
 1   date               19125 non-null  datetime64[ns]
 2   delta_confirmed    19125 non-null  int64         
 3   delta_recovered    19125 non-null  int64         
 4   delta_deceased     19125 non-null  int64         
 5   delta_tested       19125 non-null  int64         
 6   delta_vaccinated1  19125 non-null  int64         
 7   delta_vaccinated2  19125 non-null  int64         
dtypes: datetime64[ns](1), int64(6), object(1)
memory usage: 1.2+ MB


In [11]:
crdt_api_data.shape

(19125, 8)

In [12]:
# Convert Date column object type datetime type:

def datetime_split(df):
    df['month'] = df.date.dt.month
    df['day'] = df.date.dt.day
    df['year'] = df.date.dt.year

In [13]:
datetime_split(crdt_api_data)

In [14]:
crdt_api_data.head()

Unnamed: 0,state_abbv,date,delta_confirmed,delta_recovered,delta_deceased,delta_tested,delta_vaccinated1,delta_vaccinated2,month,day,year
0,AN,2020-03-26,1,0,0,0,0,0,3,26,2020
1,AN,2020-03-27,5,0,0,0,0,0,3,27,2020
2,AN,2020-03-28,3,0,0,0,0,0,3,28,2020
3,AN,2020-03-29,0,0,0,0,0,0,3,29,2020
4,AN,2020-03-30,1,0,0,0,0,0,3,30,2020


In [15]:
# Total number of records without Total field:
crdt_api_data_without_total = crdt_api_data.loc[crdt_api_data['state_abbv'] != 'TT']
crdt_api_data_without_total

Unnamed: 0,state_abbv,date,delta_confirmed,delta_recovered,delta_deceased,delta_tested,delta_vaccinated1,delta_vaccinated2,month,day,year
0,AN,2020-03-26,1,0,0,0,0,0,3,26,2020
1,AN,2020-03-27,5,0,0,0,0,0,3,27,2020
2,AN,2020-03-28,3,0,0,0,0,0,3,28,2020
3,AN,2020-03-29,0,0,0,0,0,0,3,29,2020
4,AN,2020-03-30,1,0,0,0,0,0,3,30,2020
...,...,...,...,...,...,...,...,...,...,...,...
19120,WB,2021-08-19,731,781,12,45115,224184,76579,8,19,2021
19121,WB,2021-08-20,758,767,9,47262,139769,64154,8,20,2021
19122,WB,2021-08-21,678,709,10,39117,220607,129432,8,21,2021
19123,WB,2021-08-22,561,686,8,36563,65760,36351,8,22,2021


In [16]:
# With Total field, that gived total C, R, D, T for country-wise
crdt_api_data_with_total = crdt_api_data.loc[crdt_api_data['state_abbv'] == 'TT']
crdt_api_data_with_total

Unnamed: 0,state_abbv,date,delta_confirmed,delta_recovered,delta_deceased,delta_tested,delta_vaccinated1,delta_vaccinated2,month,day,year
16929,TT,2020-01-30,1,0,0,0,0,0,1,30,2020
16930,TT,2020-02-02,1,0,0,0,0,0,2,2,2020
16931,TT,2020-02-03,1,0,0,0,0,0,2,3,2020
16932,TT,2020-02-14,0,3,0,0,0,0,2,14,2020
16933,TT,2020-03-02,2,0,0,0,0,0,3,2,2020
...,...,...,...,...,...,...,...,...,...,...,...
17468,TT,2021-08-19,36600,36457,543,2398862,4383862,1409193,8,19,2021
17469,TT,2021-08-20,34308,36285,376,1876456,2817431,1018431,8,20,2021
17470,TT,2021-08-21,31023,38577,401,1680081,3452188,1919839,8,21,2021
17471,TT,2021-08-22,25420,44103,385,1295160,757884,302334,8,22,2021


In [17]:
"""
Aggregating 'crdt_api_data' before combinig it with state_names
"""
crdt_api_data_without_total.groupby('month').agg({'delta_confirmed': 'sum', 
                                    'delta_recovered': 'sum', 
                                    'delta_deceased': 'sum', 
                                    'delta_tested': 'sum', 
                                    'delta_vaccinated1': 'sum', 
                                    'delta_vaccinated2': 'sum'})

Unnamed: 0_level_0,delta_confirmed,delta_recovered,delta_deceased,delta_tested,delta_vaccinated1,delta_vaccinated2
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,472318,552275,5410,27905978,3706723,0
2,353430,350564,2766,21560981,7500222,2409858
3,1110292,688089,5813,28655584,43097155,6218888
4,6969711,4209649,49986,53113803,71081609,17997388
5,9172468,10349303,124323,68957247,44093433,16758027
6,2632028,3798070,79583,67892289,104884457,14710554
7,2355236,2078921,44040,73579597,90698185,43852525
8,2809910,2641434,39637,68553498,89188128,29346018
9,2622324,2432634,33273,34953260,0,0
10,1873130,2219578,23443,34975119,0,0


In [18]:
# crdt_api_data_without_total.groupby('state_abbv').count().head().reset_index()

In [19]:
crdt_api_data_without_total.groupby(['state_abbv', 'month'])['delta_confirmed', 'delta_recovered', 'delta_deceased', 'delta_tested'].sum()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



Unnamed: 0_level_0,Unnamed: 1_level_0,delta_confirmed,delta_recovered,delta_deceased,delta_tested
state_abbv,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AN,1,49,102,0,41497
AN,2,26,24,0,45362
AN,3,73,24,0,51365
AN,4,889,741,5,53880
AN,5,1056,1035,48,20575
...,...,...,...,...,...
WB,8,108067,102915,1882,1953054
WB,9,94271,91489,1730,1339827
WB,10,116615,104178,1883,1328963
WB,11,109820,120825,1583,1316508


In [20]:
# State-wise and year-wise for all covid factors:
crdt_api_data_without_total.groupby(['state_abbv', 'year'])['delta_confirmed', 'delta_recovered', 'delta_deceased', 'delta_tested', 'delta_vaccinated1', 'delta_vaccinated2'].sum()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



Unnamed: 0_level_0,Unnamed: 1_level_0,delta_confirmed,delta_recovered,delta_deceased,delta_tested,delta_vaccinated1,delta_vaccinated2
state_abbv,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AN,2020,4945,4826,62,181640,0,0
AN,2021,2614,2594,67,291163,241644,101276
AP,2020,882286,871916,7108,11825566,0,0
AP,2021,1121056,1103532,6627,14314368,19537294,7080678
AR,2020,16719,16564,56,378151,0,0
...,...,...,...,...,...,...,...
UP,2021,1124160,1123513,14440,46786208,54026148,10201807
UT,2020,90920,83506,1509,1777371,0,0
UT,2021,251851,245524,5868,4967440,5958754,1868470
WB,2020,552063,530366,9712,7110430,0,0


In [21]:
state_month_year_wise_factors_df = crdt_api_data_without_total.groupby(['state_abbv', 'month', 'year'])['delta_confirmed', 
                                                             'delta_recovered', 
                                                             'delta_deceased', 
                                                             'delta_tested', 
                                                             'delta_vaccinated1', 
                                                             'delta_vaccinated2'].sum().reset_index()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [22]:
state_month_year_wise_factors_df

Unnamed: 0,state_abbv,month,year,delta_confirmed,delta_recovered,delta_deceased,delta_tested,delta_vaccinated1,delta_vaccinated2
0,AN,1,2021,49,102,0,41497,2727,0
1,AN,2,2021,26,24,0,45362,3407,2422
2,AN,3,2020,10,0,0,0,0,0
3,AN,3,2021,63,24,0,51365,10346,3186
4,AN,4,2020,23,16,0,2848,0,0
...,...,...,...,...,...,...,...,...,...
631,WB,8,2021,15477,17019,235,958819,5611013,1441781
632,WB,9,2020,94271,91489,1730,1339827,0,0
633,WB,10,2020,116615,104178,1883,1328963,0,0
634,WB,11,2020,109820,120825,1583,1316508,0,0


In [23]:
# state_wise_throughout

In [24]:
state_name = {'AN': 'Andaman and Nicobar Islands', 'AP': 'Andhra Pradesh', 'AR': 'Arunachal Pradesh',
             'AS': 'Assam', 'BR': 'Bihar', 'CH': 'Chandigarh', 'CT': 'Chhattisgarh', 'DL': 'Delhi',
             'DN': 'Dadra and Nagar Haveli', 'GA': 'Goa', 'GJ': 'Gujarat', 'HP': 'Himachal Pradesh',
             'HR': 'Haryana', 'JH': 'Jharkhand', 'JK': 'Jammu and Kashmir', 'KA': 'Karnataka', 'KL': 'Kerala',
             'LA': 'Ladakh', 'LD': 'Lakshadweep', 'MH': 'Maharashtra', 'ML': 'Meghalaya', 'MN': 'Manipur',
             'MP': 'Madhya Pradesh', 'MZ': 'Mizoram', 'NL': 'Nagaland', 'OR': 'Orissa', 'PB': 'Punjab',
             'PY': 'Pondicherry', 'RJ': 'Rajasthan', 'SK': 'Sikkim', 'TG': 'Telangana', 'TN': 'Tamil Nadu',
             'TR': 'Tripura', 'UP': 'Uttar Pradesh', 'UT': 'Uttarakhand', 'WB': 'West Bengal', 'UN': 'Unnamed'}

In [25]:
# state_wise_throughout['state_name'] = state_wise_throughout['state_abbv'].map(state_name)
# state_wise_throughout

In [26]:
state_month_year_wise_factors_df['state_name'] = state_month_year_wise_factors_df['state_abbv'].map(state_name)
state_month_year_wise_factors_df

Unnamed: 0,state_abbv,month,year,delta_confirmed,delta_recovered,delta_deceased,delta_tested,delta_vaccinated1,delta_vaccinated2,state_name
0,AN,1,2021,49,102,0,41497,2727,0,Andaman and Nicobar Islands
1,AN,2,2021,26,24,0,45362,3407,2422,Andaman and Nicobar Islands
2,AN,3,2020,10,0,0,0,0,0,Andaman and Nicobar Islands
3,AN,3,2021,63,24,0,51365,10346,3186,Andaman and Nicobar Islands
4,AN,4,2020,23,16,0,2848,0,0,Andaman and Nicobar Islands
...,...,...,...,...,...,...,...,...,...,...
631,WB,8,2021,15477,17019,235,958819,5611013,1441781,West Bengal
632,WB,9,2020,94271,91489,1730,1339827,0,0,West Bengal
633,WB,10,2020,116615,104178,1883,1328963,0,0,West Bengal
634,WB,11,2020,109820,120825,1583,1316508,0,0,West Bengal


In [27]:
# pd.get_dummies(state_month_year_wise_factors_df['year'], prefix='year')

In [28]:
# State-wise / month-wise per year
state_month_year_wise_factors_df.loc[(state_month_year_wise_factors_df['state_abbv'] == 'AN') & (state_month_year_wise_factors_df['year'] == 2020)]

Unnamed: 0,state_abbv,month,year,delta_confirmed,delta_recovered,delta_deceased,delta_tested,delta_vaccinated1,delta_vaccinated2,state_name
2,AN,3,2020,10,0,0,0,0,0,Andaman and Nicobar Islands
4,AN,4,2020,23,16,0,2848,0,0,Andaman and Nicobar Islands
6,AN,5,2020,0,17,0,4858,0,0,Andaman and Nicobar Islands
8,AN,6,2020,64,12,0,8003,0,0,Andaman and Nicobar Islands
10,AN,7,2020,451,169,5,8329,0,0,Andaman and Nicobar Islands
12,AN,8,2020,2584,2433,41,9447,0,0,Andaman and Nicobar Islands
14,AN,9,2020,703,961,7,25278,0,0,Andaman and Nicobar Islands
15,AN,10,2020,497,492,6,29690,0,0,Andaman and Nicobar Islands
16,AN,11,2020,378,450,2,40936,0,0,Andaman and Nicobar Islands
17,AN,12,2020,235,276,1,52251,0,0,Andaman and Nicobar Islands


In [29]:
# For whole India including all states from this dataset:
state_month_year_wise_factors_df.iloc[:, 3:9].sum()

delta_confirmed       32473763
delta_recovered       31712656
delta_deceased          435141
delta_tested         547549228
delta_vaccinated1    454249912
delta_vaccinated2    131293258
dtype: int64

In [30]:
"""
Pivot table
"""
pd.pivot_table(state_month_year_wise_factors_df, index=['state_abbv'], values=['delta_confirmed',
                                                                              'delta_recovered',
                                                                              'delta_deceased',
                                                                              'delta_tested',
                                                                              'delta_vaccinated1',
                                                                              'delta_vaccinated2'], columns=['year'], aggfunc=np.sum)

Unnamed: 0_level_0,delta_confirmed,delta_confirmed,delta_deceased,delta_deceased,delta_recovered,delta_recovered,delta_tested,delta_tested,delta_vaccinated1,delta_vaccinated1,delta_vaccinated2,delta_vaccinated2
year,2020,2021,2020,2021,2020,2021,2020,2021,2020,2021,2020,2021
state_abbv,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
AN,4945.0,2614.0,62.0,67.0,4826.0,2594.0,181640.0,291163.0,0.0,241644.0,0.0,101276.0
AP,882286.0,1121056.0,7108.0,6627.0,871916.0,1103532.0,11825566.0,14314368.0,0.0,19537294.0,0.0,7080678.0
AR,16719.0,35589.0,56.0,203.0,16564.0,34323.0,378151.0,643722.0,0.0,711695.0,0.0,213308.0
AS,216211.0,368923.0,1045.0,4542.0,211907.0,359645.0,5997450.0,15044261.0,0.0,13029750.0,0.0,2797117.0
BR,252792.0,472804.0,1397.0,8253.0,246685.0,469149.0,18336722.0,22260984.0,0.0,27418434.0,0.0,5390034.0
CH,19748.0,45320.0,317.0,495.0,19045.0,45175.0,181186.0,473162.0,0.0,777153.0,0.0,276138.0
CT,279575.0,724608.0,3371.0,10183.0,264769.0,725151.0,3514707.0,8659162.0,0.0,10458702.0,0.0,3047371.0
DL,625369.0,811965.0,10536.0,14543.0,609322.0,802559.0,8659830.0,16483023.0,0.0,8943199.0,0.0,3531692.0
DN,3364.0,7298.0,2.0,2.0,3320.0,7300.0,72410.0,0.0,0.0,608777.0,0.0,113668.0
GA,51066.0,122155.0,739.0,2447.0,49388.0,119772.0,399206.0,768945.0,0.0,1142024.0,0.0,371291.0


In [31]:
# state_month_year_wise_factors_df['state_abbv'].value_counts()

In [32]:
"""
Importing states name dataset for map graph
"""
states_abbv_names_df = pd.read_csv('../task-1-datasets/states_abbv_names.csv')
states_abbv_names_df.head()

Unnamed: 0,state,state_name,longitude,latitude,state_population
0,AN,Andaman and Nicobar Islands,11.53,92.69,397000
1,AP,Andhra Pradesh,14.94,79.08,52221000
2,AR,Arunachal Pradesh,28.57,94.99,1504000
3,AS,Assam,26.51,92.66,34293000
4,BR,Bihar,25.64,85.85,119520000


In [33]:
states_abbv_names_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37 entries, 0 to 36
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   state             37 non-null     object 
 1   state_name        37 non-null     object 
 2   longitude         37 non-null     float64
 3   latitude          37 non-null     float64
 4   state_population  37 non-null     int64  
dtypes: float64(2), int64(1), object(2)
memory usage: 1.6+ KB


In [34]:
# combined_crdt_states_name_df = pd.concat([state_month_year_wise_factors_df, states_abbv_names_df], axis=1, join='inner')
# combined_crdt_states_name_df

In [35]:
states_longitude = {'AN': 11.53481, 'AP': 14.94011, 'AR': 28.57062, 'AS': 26.50557, 'BR': 25.63716, 'CH': 30.70101, 'CT': 22.15589, 'DL': 28.34020, 'DN': 20.32083, 'GA': 15.29303, 
                    'GJ': 23.16044, 'HP': 32.06414, 'HR': 29.09708, 'JH': 23.68760, 'JK': 33.69450, 'KA': 14.71898, 'KL': 9.27600, 'LA': 34.60545, 'LD': 10.09318, 'MH': 19.48013, 
                    'ML': 25.58248, 'MN': 24.59341, 'MP': 23.50034, 'MZ': 23.42287, 'NL': 25.99085, 'OR': 20.44459, 'PB': 30.27896, 'PY': 11.86268, 'RJ': 26.74981, 'SK': 27.39195, 
                    'TG': 17.84484, 'TN': 11.51220, 'TR': 23.70733, 'UP': 27.87953, 'UT': 30.00196, 'WB': 23.01753, 'UN': 0.0}

In [36]:
states_latitude = {'AN': 92.68677, 'AP': 79.08234, 'AR': 94.99054, 'AS': 92.66144, 'BR': 85.84991, 'CH': 76.85348, 'CT': 82.30122, 'DL': 77.18631, 'DN': 72.96830, 'GA': 73.92505,
                   'GJ': 70.89401, 'HP': 76.83722, 'HR': 76.30233, 'JH': 85.05866, 'JK': 75.50990, 'KA': 75.74763, 'KL': 76.71835, 'LA': 77.24699, 'LD': 73.63830, 'MH': 75.65281, 
                   'ML': 91.02999, 'MN': 93.79062, 'MP': 77.16580, 'MZ': 92.79441, 'NL': 94.61091, 'OR': 84.36696, 'PB': 75.18346, 'PY': 79.95504, 'RJ': 73.48627, 'SK': 88.57663, 
                   'TG': 79.34792, 'TN': 78.82475, 'TR': 91.70517, 'UP': 79.74650, 'UT': 78.95560, 'WB': 87.09422, 'UN': 0.0}

In [37]:
states_pop = {'AN': 397000, 'AP': 52221000, 'AR': 1504000, 'AS': 34293000, 'BR': 119520000, 'CH': 1179000, 'CT': 28724000, 'DL': 19814000, 'DN': 959000, 'GA': 1540000,
              'GJ': 67936000, 'HP': 7300000, 'HR': 28672000, 'JH': 37403000, 'JK': 13203000, 'KA': 65798000, 'KL': 35125000, 'LA': 293000, 'LD': 68000, 'MH': 122153000, 
              'ML': 3224000, 'MN': 3103000, 'MP': 82232000, 'MZ': 1192000, 'NL': 2150000, 'OR': 43671000, 'PB': 29859000, 'PY': 1504000, 'RJ': 77264000, 'SK': 664000, 
              'TG': 37220000, 'TN': 75695000, 'TR': 3992000, 'UP': 224979000, 'UT': 11141000, 'WB': 96906000, 'UN': 0}

In [38]:
state_month_year_wise_factors_df['longitude'] = state_month_year_wise_factors_df['state_abbv'].map(states_longitude)

In [39]:
state_month_year_wise_factors_df['latitude'] = state_month_year_wise_factors_df['state_abbv'].map(states_latitude)

In [40]:
state_month_year_wise_factors_df['state_population'] = state_month_year_wise_factors_df['state_abbv'].map(states_pop)

In [41]:
state_month_year_wise_factors_df.head()

Unnamed: 0,state_abbv,month,year,delta_confirmed,delta_recovered,delta_deceased,delta_tested,delta_vaccinated1,delta_vaccinated2,state_name,longitude,latitude,state_population
0,AN,1,2021,49,102,0,41497,2727,0,Andaman and Nicobar Islands,11.53,92.69,397000
1,AN,2,2021,26,24,0,45362,3407,2422,Andaman and Nicobar Islands,11.53,92.69,397000
2,AN,3,2020,10,0,0,0,0,0,Andaman and Nicobar Islands,11.53,92.69,397000
3,AN,3,2021,63,24,0,51365,10346,3186,Andaman and Nicobar Islands,11.53,92.69,397000
4,AN,4,2020,23,16,0,2848,0,0,Andaman and Nicobar Islands,11.53,92.69,397000


In [42]:
state_wise_throughout = crdt_api_data_without_total.groupby(['state_abbv'])['delta_confirmed', 
                                                             'delta_recovered', 
                                                             'delta_deceased', 
                                                             'delta_tested', 
                                                             'delta_vaccinated1', 
                                                             'delta_vaccinated2'].sum().reset_index()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [43]:
state_wise_throughout.sample(10)

Unnamed: 0,state_abbv,delta_confirmed,delta_recovered,delta_deceased,delta_tested,delta_vaccinated1,delta_vaccinated2
0,AN,7559,7420,129,472803,241644,101276
1,AP,2003342,1975448,13735,26139934,19537294,7080678
34,UP,1709126,1685972,22792,70729377,54026148,10201807
15,KA,2939767,2882331,37155,42071388,28701547,8835665
16,KL,3827688,3653008,19584,30319067,19323498,6948318
23,MZ,52472,46789,200,790315,666271,227831
3,AS,585134,571552,5587,21041711,13029750,2797117
25,OR,1001698,986334,7479,17544497,15841387,4901145
20,ML,74070,69859,1281,941086,1003694,287084
35,UT,342771,329030,7377,6744811,5958754,1868470


In [44]:
state_wise_throughout = state_wise_throughout.drop(state_wise_throughout.index[[-4]]).reset_index(drop=True)

In [45]:
state_wise_throughout['state_name'] = state_wise_throughout['state_abbv'].map(state_name)
state_wise_throughout['longitude'] = state_wise_throughout['state_abbv'].map(states_longitude)
state_wise_throughout['latitude'] = state_wise_throughout['state_abbv'].map(states_latitude)
state_wise_throughout['population'] = state_wise_throughout['state_abbv'].map(states_pop)

state_wise_throughout

Unnamed: 0,state_abbv,delta_confirmed,delta_recovered,delta_deceased,delta_tested,delta_vaccinated1,delta_vaccinated2,state_name,longitude,latitude,population
0,AN,7559,7420,129,472803,241644,101276,Andaman and Nicobar Islands,11.53,92.69,397000
1,AP,2003342,1975448,13735,26139934,19537294,7080678,Andhra Pradesh,14.94,79.08,52221000
2,AR,52308,50887,259,1021873,711695,213308,Arunachal Pradesh,28.57,94.99,1504000
3,AS,585134,571552,5587,21041711,13029750,2797117,Assam,26.51,92.66,34293000
4,BR,725596,715834,9650,40597706,27418434,5390034,Bihar,25.64,85.85,119520000
5,CH,65068,64220,812,654348,777153,276138,Chandigarh,30.7,76.85,1179000
6,CT,1004183,989920,13554,12173869,10458702,3047371,Chhattisgarh,22.16,82.3,28724000
7,DL,1437334,1411881,25079,25142853,8943199,3531692,Delhi,28.34,77.19,19814000
8,DN,10662,10620,4,72410,608777,113668,Dadra and Nagar Haveli,20.32,72.97,959000
9,GA,173221,169160,3186,1168151,1142024,371291,Goa,15.29,73.93,1540000


In [46]:
"""
Computing ration of total confirmed cases of each states population:
"""
state_wise_throughout_high_pop = state_wise_throughout.query('population >= 30000000')
len(state_wise_throughout_high_pop)

15

In [47]:
state_wise_throughout_high_pop = state_wise_throughout_high_pop.reset_index()

In [48]:
state_wise_throughout_high_pop = state_wise_throughout_high_pop[['state_name', 
                                                                 'population', 
                                                                 'delta_confirmed', 
                                                                 'delta_recovered', 
                                                                 'delta_deceased', 
                                                                 'delta_tested', 
                                                                 'delta_vaccinated1', 
                                                                 'delta_vaccinated2']]

In [49]:
state_wise_throughout_high_pop

Unnamed: 0,state_name,population,delta_confirmed,delta_recovered,delta_deceased,delta_tested,delta_vaccinated1,delta_vaccinated2
0,Andhra Pradesh,52221000,2003342,1975448,13735,26139934,19537294,7080678
1,Assam,34293000,585134,571552,5587,21041711,13029750,2797117
2,Bihar,119520000,725596,715834,9650,40597706,27418434,5390034
3,Gujarat,67936000,825316,815066,10079,26973685,32619025,10592128
4,Jharkhand,37403000,347748,342451,5132,12830138,9446948,2356283
5,Karnataka,65798000,2939767,2882331,37155,42071388,28701547,8835665
6,Kerala,35125000,3827688,3653008,19584,30319067,19323498,6948318
7,Maharashtra,122153000,6428294,6238794,136067,52445689,39635783,14269913
8,Madhya Pradesh,82232000,792104,781504,10516,16043313,33592729,6601502
9,Orissa,43671000,1001698,986334,7479,17544497,15841387,4901145


In [50]:
state_wise_throughout_high_pop['pop_total_confirmed_%'] = (state_wise_throughout_high_pop['delta_confirmed'] / state_wise_throughout_high_pop['population']) * 100

In [51]:
state_wise_throughout_high_pop = state_wise_throughout_high_pop.sort_values(by='pop_total_confirmed_%', ascending=False).reset_index(drop=True)
state_wise_throughout_high_pop

Unnamed: 0,state_name,population,delta_confirmed,delta_recovered,delta_deceased,delta_tested,delta_vaccinated1,delta_vaccinated2,pop_total_confirmed_%
0,Kerala,35125000,3827688,3653008,19584,30319067,19323498,6948318,10.9
1,Maharashtra,122153000,6428294,6238794,136067,52445689,39635783,14269913,5.26
2,Karnataka,65798000,2939767,2882331,37155,42071388,28701547,8835665,4.47
3,Andhra Pradesh,52221000,2003342,1975448,13735,26139934,19537294,7080678,3.84
4,Tamil Nadu,75695000,2602489,2548868,34734,41059465,23488856,5552882,3.44
5,Orissa,43671000,1001698,986334,7479,17544497,15841387,4901145,2.29
6,Telangana,37220000,655343,645174,3861,24016637,12670665,4327906,1.76
7,Assam,34293000,585134,571552,5587,21041711,13029750,2797117,1.71
8,West Bengal,96906000,1543496,1515789,18371,16689293,26583407,10126854,1.59
9,Rajasthan,77264000,954030,944917,8954,13521785,30289979,9677263,1.23


In [52]:
state_wise_throughout_high_pop

Unnamed: 0,state_name,population,delta_confirmed,delta_recovered,delta_deceased,delta_tested,delta_vaccinated1,delta_vaccinated2,pop_total_confirmed_%
0,Kerala,35125000,3827688,3653008,19584,30319067,19323498,6948318,10.9
1,Maharashtra,122153000,6428294,6238794,136067,52445689,39635783,14269913,5.26
2,Karnataka,65798000,2939767,2882331,37155,42071388,28701547,8835665,4.47
3,Andhra Pradesh,52221000,2003342,1975448,13735,26139934,19537294,7080678,3.84
4,Tamil Nadu,75695000,2602489,2548868,34734,41059465,23488856,5552882,3.44
5,Orissa,43671000,1001698,986334,7479,17544497,15841387,4901145,2.29
6,Telangana,37220000,655343,645174,3861,24016637,12670665,4327906,1.76
7,Assam,34293000,585134,571552,5587,21041711,13029750,2797117,1.71
8,West Bengal,96906000,1543496,1515789,18371,16689293,26583407,10126854,1.59
9,Rajasthan,77264000,954030,944917,8954,13521785,30289979,9677263,1.23


In [53]:
"""
Bar chart using Plotly
"""

fig = go.Figure()
fig.add_trace(go.Bar(
    x=state_wise_throughout_high_pop.state_name,
    y=state_wise_throughout_high_pop.population,
    name='State Population',
    marker_color='indianred'
))
fig.add_trace(go.Bar(
    x=state_wise_throughout_high_pop.state_name,
    y=state_wise_throughout_high_pop.delta_vaccinated2,
    name='Fully vaccinated population',
    marker_color='lightsalmon'
))

# Here we modify the tickangle of the xaxis, resulting in rotated labels.
fig.update_layout(barmode='group', 
                  xaxis_tickangle=-45, 
                  title='Visual ratio of populated states (more or equals to 30M) VS fully vaccinated population')
fig.show()

In [54]:
# mh_2020 = state_month_year_wise_factors_df.loc[state_month_year_wise_factors_df['state_name'] == 'Maharashtra' & state_month_year_wise_factors_df['year'] == 2020]
# mh_2020

In [55]:
covid_total_cases_2020 = state_month_year_wise_factors_df.query('year == 2020')
covid_total_cases_2020

Unnamed: 0,state_abbv,month,year,delta_confirmed,delta_recovered,delta_deceased,delta_tested,delta_vaccinated1,delta_vaccinated2,state_name,longitude,latitude,state_population
2,AN,3,2020,10,0,0,0,0,0,Andaman and Nicobar Islands,11.53,92.69,397000
4,AN,4,2020,23,16,0,2848,0,0,Andaman and Nicobar Islands,11.53,92.69,397000
6,AN,5,2020,0,17,0,4858,0,0,Andaman and Nicobar Islands,11.53,92.69,397000
8,AN,6,2020,64,12,0,8003,0,0,Andaman and Nicobar Islands,11.53,92.69,397000
10,AN,7,2020,451,169,5,8329,0,0,Andaman and Nicobar Islands,11.53,92.69,397000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
630,WB,8,2020,92590,85896,1647,994235,0,0,West Bengal,23.02,87.09,96906000
632,WB,9,2020,94271,91489,1730,1339827,0,0,West Bengal,23.02,87.09,96906000
633,WB,10,2020,116615,104178,1883,1328963,0,0,West Bengal,23.02,87.09,96906000
634,WB,11,2020,109820,120825,1583,1316508,0,0,West Bengal,23.02,87.09,96906000


In [56]:
covid_total_cases_2020.isna().sum()

state_abbv           0
month                0
year                 0
delta_confirmed      0
delta_recovered      0
delta_deceased       0
delta_tested         0
delta_vaccinated1    0
delta_vaccinated2    0
state_name           0
longitude            0
latitude             0
state_population     0
dtype: int64

In [57]:
# df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/finance-charts-apple.csv')
# months = ['Jan', 'Feb', 'March', 'Apr', 'May', 'June', 'July', 'Aug', 'Sep', 'Oct', 'Nov', 'December']
fig = px.line(covid_total_cases_2020, x='month', y='delta_confirmed', color='state_name')
# fig = px.line(covid_total_cases_2020, x='month', y='delta_recovered')

fig

In [58]:
covid_total_cases_2020 = covid_total_cases_2020.groupby('state_name').agg({'delta_confirmed': 'sum', 'month': 'count'})
covid_total_cases_2020 = covid_total_cases_2020.sort_values(by='delta_confirmed', ascending=False).reset_index()
covid_total_cases_2020

Unnamed: 0,state_name,delta_confirmed,month
0,Maharashtra,1932112,10
1,Karnataka,919496,10
2,Andhra Pradesh,882286,10
3,Tamil Nadu,818014,10
4,Kerala,760934,12
5,Delhi,625369,10
6,Uttar Pradesh,584966,10
7,West Bengal,552063,10
8,Orissa,329621,10
9,Rajasthan,308243,10


In [59]:
px.bar(covid_total_cases_2020, 
       x=covid_total_cases_2020.delta_confirmed, 
       y=covid_total_cases_2020.state_name, 
       title="Total Confirmed Covid cases in 2020 state-wise", 
       color="delta_confirmed", 
       hover_data=['month'],
       height=800)

In [60]:
fig = px.line(covid_total_cases_2020, x="month", y="delta_confirmed", color='state_name', title="Visual trend of confirmed cases in all states month-wise")
fig.update_traces(mode="markers+lines")

fig.show()

In [61]:
state_month_year_wise_factors_df.query('year == 2021')

Unnamed: 0,state_abbv,month,year,delta_confirmed,delta_recovered,delta_deceased,delta_tested,delta_vaccinated1,delta_vaccinated2,state_name,longitude,latitude,state_population
0,AN,1,2021,49,102,0,41497,2727,0,Andaman and Nicobar Islands,11.53,92.69,397000
1,AN,2,2021,26,24,0,45362,3407,2422,Andaman and Nicobar Islands,11.53,92.69,397000
3,AN,3,2021,63,24,0,51365,10346,3186,Andaman and Nicobar Islands,11.53,92.69,397000
5,AN,4,2021,866,725,5,51032,77013,5273,Andaman and Nicobar Islands,11.53,92.69,397000
7,AN,5,2021,1056,1018,48,15717,15591,4300,Andaman and Nicobar Islands,11.53,92.69,397000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
623,WB,4,2021,241451,132587,1015,1259954,4101369,1640943,West Bengal,23.02,87.09,96906000
625,WB,5,2021,548011,570390,4197,1998424,2172453,1569600,West Bengal,23.02,87.09,96906000
627,WB,6,2021,123406,187702,2167,1787215,5998458,1177345,West Bengal,23.02,87.09,96906000
629,WB,7,2021,28236,37280,428,1512282,4138979,3628154,West Bengal,23.02,87.09,96906000


In [62]:
covid_total_cases_2021 = state_month_year_wise_factors_df.query('year == 2021')
covid_total_cases_2021 = covid_total_cases_2021.groupby('state_name').agg({'delta_confirmed': 'sum', 'month': 'count'})
covid_total_cases_2021 = covid_total_cases_2021.sort_values(by='delta_confirmed', ascending=False).reset_index()

px.bar(covid_total_cases_2021, 
       x=covid_total_cases_2021.delta_confirmed, 
       y=covid_total_cases_2021.state_name, 
       title="Total Confirmed Covid cases in 2021 state-wise", 
       color="delta_confirmed", 
       hover_data=['month'],
       height=800)