In [1]:
import requests, json, pandas as pd
from collections import defaultdict
from bokeh.plotting import figure,show
from bokeh.io import output_notebook
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

ImportError: No module named bokeh.plotting

In [7]:
statewise_uri = "https://api.covid19india.org/v3/timeseries.json"

def getTimeSeriesDataStateWise():
    req = requests.get(statewise_uri, verify=False)
    return req.json()

In [8]:
def createTableFromJson(jsondata):
    # col = ['date', 'state','delta_confirmed', 'delta_recovered', 'delta_tested',
    #        'total_confirmed', 'total_deceased', 'total_recovered', 'total_tested']

    d = defaultdict(list)

    for state, data in jsondata.items():
        for date, v in data.items():
            d['state'].append(state)
            d['date'].append(date)
            delta = v.get('delta', {})
            d['delta_confirmed'].append(delta.get('confirmed', 0))
            d['delta_deceased'].append(delta.get('deceased', 0))
            d['delta_recovered'].append(delta.get('recovered', 0))
            d['delta_tested'].append(delta.get('tested', 0))
            total = v.get('total', {})
            d['total_confirmed'].append(total.get('confirmed', 0))
            d['total_deceased'].append(total.get('deceased', 0))
            d['total_recovered'].append(total.get('recovered', 0))
            d['total_tested'].append(total.get('tested', 0))

    df = pd.DataFrame.from_dict(d)
    return df

In [9]:

def cleanData(df):
    mask_curr_date = (df['date'] > '2020-06-26') & (df['state'] != 'TT')
    new_df = df.loc[mask_curr_date].sort_values(by=['total_confirmed'], ascending=False)
    top_states = []
    for state in new_df[['state']].head(5).values.tolist():
        top_states.append(state[0])
    print(top_states)

    basic_mask = (df['date'] > '2020-04-15') & (df['state'] != 'TT') & (df['state'] != 'UN')
    top_states_mask, remaining_states_mask = False, basic_mask
    for state in top_states:
        top_states_mask = top_states_mask | (df['state'] == state)
        remaining_states_mask = remaining_states_mask & (df['state'] != state)

    top_states_mask = basic_mask & top_states_mask
    df['delta_confirmed_sma'] = df.iloc[:, 2].rolling(window=3).mean()
    top_state_df = pd.pivot_table(df.loc[top_states_mask], values='delta_confirmed_sma', index=['state'], 
                                  columns='date')
    rem_state_df = pd.pivot_table(df.loc[top_states_mask], values='delta_confirmed_sma', index=['state'], 
                                  columns='date')
    return (top_state_df, rem_state_df)

In [10]:
with open('raw_data/timeseries.json') as file:
    sj = json.load(file)
df = createTableFromJson(sj)
df.head()

Unnamed: 0,date,delta_confirmed,delta_deceased,delta_recovered,delta_tested,state,total_confirmed,total_deceased,total_recovered,total_tested
0,2020-05-25,0,0,0,407,DN,2,0,1,10085
1,2020-05-24,0,0,0,380,DN,2,0,1,9678
2,2020-05-27,0,0,0,484,DN,2,0,1,10872
3,2020-05-26,0,0,0,303,DN,2,0,1,10388
4,2020-05-21,0,0,0,319,DN,1,0,1,8456


In [11]:
top_state_df, rem_state_df = cleanData(df)

[u'MH', u'DL', u'TN', u'GJ', u'UP']


In [12]:
top_state_df.head()

date,2020-04-16,2020-04-17,2020-04-18,2020-04-19,2020-04-20,2020-04-21,2020-04-22,2020-04-23,2020-04-24,2020-04-25,...,2020-06-18,2020-06-19,2020-06-20,2020-06-21,2020-06-22,2020-06-23,2020-06-24,2020-06-25,2020-06-26,2020-06-27
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
DL,4.0,3.666667,3.0,2.666667,1.666667,1.333333,32.0,5.333333,1.0,1.333333,...,43.666667,22.0,34.0,53.0,66.0,23.666667,57.0,44.0,42.333333,43.0
GJ,3.0,3.0,15.666667,11.666667,10.0,12.0,18.333333,14.666667,14.666667,10.666667,...,25.0,23.0,24.333333,26.333333,22.0,9.666667,27.0,15.0,14.333333,12.0
MH,9.0,12.333333,32.666667,29.333333,13.666667,14.0,73.0,39.0,21.333333,22.333333,...,80.666667,56.333333,73.666667,113.333333,130.0,91.666667,153.333333,147.333333,127.666667,114.0
TN,0.666667,0.666667,1.0,1.0,1.333333,1.666667,7.0,4.666667,1.0,1.0,...,30.0,13.666667,15.333333,31.666667,42.666667,13.0,27.333333,28.0,26.333333,38.0
UP,1.0,0.666667,2.666667,2.666667,1.333333,2.333333,6.333333,6.0,1.666667,1.333333,...,14.0,6.333333,13.0,17.0,20.666667,6.666667,13.0,11.333333,9.0,12.666667


In [13]:
rem_state_df.head()

date,2020-04-16,2020-04-17,2020-04-18,2020-04-19,2020-04-20,2020-04-21,2020-04-22,2020-04-23,2020-04-24,2020-04-25,...,2020-06-18,2020-06-19,2020-06-20,2020-06-21,2020-06-22,2020-06-23,2020-06-24,2020-06-25,2020-06-26,2020-06-27
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
DL,4.0,3.666667,3.0,2.666667,1.666667,1.333333,32.0,5.333333,1.0,1.333333,...,43.666667,22.0,34.0,53.0,66.0,23.666667,57.0,44.0,42.333333,43.0
GJ,3.0,3.0,15.666667,11.666667,10.0,12.0,18.333333,14.666667,14.666667,10.666667,...,25.0,23.0,24.333333,26.333333,22.0,9.666667,27.0,15.0,14.333333,12.0
MH,9.0,12.333333,32.666667,29.333333,13.666667,14.0,73.0,39.0,21.333333,22.333333,...,80.666667,56.333333,73.666667,113.333333,130.0,91.666667,153.333333,147.333333,127.666667,114.0
TN,0.666667,0.666667,1.0,1.0,1.333333,1.666667,7.0,4.666667,1.0,1.0,...,30.0,13.666667,15.333333,31.666667,42.666667,13.0,27.333333,28.0,26.333333,38.0
UP,1.0,0.666667,2.666667,2.666667,1.333333,2.333333,6.333333,6.0,1.666667,1.333333,...,14.0,6.333333,13.0,17.0,20.666667,6.666667,13.0,11.333333,9.0,12.666667


In [None]:
for mask in [top_states_mask, remaining_states_mask]:
        heatmap_table = pd.pivot_table(df.loc[mask], values='delta_confirmed_sma', index=['state'], columns='date')
        ax = sns.heatmap(heatmap_table, xticklabels=True, yticklabels=True, linewidths=.2, cmap='YlGn')
        plt.yticks(fontsize=6)
        plt.xticks(fontsize=6, rotation=45)
        plt.show(ax)