# Covid Data

In [2]:
# Importing modules
import pandas as pd
import numpy as np
import json
from datetime import datetime
import plotly.graph_objects as go

In [148]:
# Collect JSON
def url(i):
    return "https://api.covid19india.org/raw_data"+str(i)+".json"

def get_df(i):
    return pd.json_normalize(pd.read_json(url(i))['raw_data'])

df1 = get_df(1)
df2 = get_df(2)
df3 = get_df(3)
df4 = get_df(4)
df5 = get_df(5)
df6 = get_df(6)
df7 = get_df(7)

In [349]:
print(df1.shape, df2.shape, df3.shape, df4.shape, df5.shape, df6.shape, df7.shape)

(17364, 21) (10819, 21) (10020, 20) (18220, 20) (20438, 20) (23423, 20) (8508, 8)


In [150]:
# Keeping only required Columns
# cols = [df1.columns.sort_values(), df2.columns.sort_values(), 
#         df3.columns.sort_values(), df4.columns.sort_values(), df5.columns.sort_values(), df6.columns.sort_values()]
# cols = list(set(cols[0]).intersection(*cols))

cols = ['dateannounced', 'numcases', 'statecode', 
        'detectedstate', 'detecteddistrict', 'detectedcity', 'nationality', 'currentstatus']

In [151]:
# Arranging old formatted data : df1, df2
dr = pd.json_normalize(pd.read_json('https://api.covid19india.org/deaths_recoveries.json')['deaths_recoveries'])
dr = dr[['date', 'gender', 'statecode', 'state', 'district', 'city', 'nationality', 'patientstatus']].rename(
        columns={'date':'dateannounced', 'state':'detectedstate', 'district':'detecteddistrict', 
                 'city':'detectedcity', 'patientstatus':'currentstatus'}
    )
dr['numcases'] = 1
dr = dr[cols]

prev_data = pd.concat([df1[cols], df2[cols], dr[cols]])
prev_data['numcases'] = 1
prev_data.shape

(35590, 8)

In [203]:
# Latest data has empty rows at the end, Removing them
df7.replace("", float("NaN"), inplace=True)
df7 = df7[cols].dropna(subset=['dateannounced', 'numcases'])
df7.shape

(8508, 8)

In [217]:
# Combining everything and getting the final data
def make_int(x):
    try:
        return int(x)
    except:
        return 0

data = pd.concat([prev_data[cols], df3[cols], df4[cols], df5[cols], df6[cols], df7[cols]])
data.index = range(data.shape[0])
data['dateannounced'] = data['dateannounced'].apply(lambda x : datetime.strptime(x, "%d/%m/%Y"))
data = data.sort_values(by='dateannounced')
data = data[data['currentstatus'].isin(['Hospitalized', 'Recovered', 'Deceased'])]
data['numcases'] = data['numcases'].apply(lambda x:make_int(x))
data.head(2)

Unnamed: 0,dateannounced,numcases,statecode,detectedstate,detecteddistrict,detectedcity,nationality,currentstatus
0,2020-01-30,1,KL,Kerala,Thrissur,Thrissur,India,Recovered
1,2020-02-02,1,KL,Kerala,Alappuzha,Alappuzha,India,Recovered


In [254]:
# Separating active, recovered, deceased cases
h_data = data[data['currentstatus']=='Hospitalized']
r_data = data[data['currentstatus']=='Recovered']
d_data = data[data['currentstatus']=='Deceased']

## Daily cases Total

In [276]:
# Datewise number and cumsum
def date_wise(df, group_by_col='dateannounced', num_case='numcases'):
    h_num = df.groupby(by=group_by_col)[[num_case]].sum()
    h_num['total_cases'] = h_num[num_case].cumsum()
    return h_num

h_num = date_wise(h_data)
r_num = date_wise(r_data)
d_num = date_wise(d_data)
h_num.shape, r_num.shape, d_num.shape

((112, 2), (117, 2), (100, 2))

In [281]:
# Plot daily case

fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=h_num.index,
        y=h_num.numcases,
        name='Active Cases'
    ))
fig.add_trace(
    go.Scatter(
        x=r_num.index,
        y=r_num.numcases,
        name='Recovered Cases'
    ))
fig.add_trace(
    go.Scatter(
        x=d_num.index,
        y=d_num.numcases,
        name='Deceased Cases'
    ))
fig.update_layout(title="Daily Cases of COVID")
fig.show()


In [348]:
# Plot Cumulative case

fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=h_num.index,
        y=h_num.total_cases,
        name='Active Cases'
    ))
fig.add_trace(
    go.Scatter(
        x=r_num.index,
        y=r_num.total_cases,
        name='Recovered Cases'
    ))
fig.add_trace(
    go.Scatter(
        x=d_num.index,
        y=d_num.total_cases,
        name='Deceased Cases'
    ))
fig.update_layout(title="Cumulative Cases of COVID")
fig.show()


## Statewise

In [333]:
def add_active(full_data,  num_case='numcases'):
    full_data = full_data.rename(columns={num_case:'Confirmed'})
    full_data.replace(float("NaN"), 0, inplace=True)

    full_data['Recovered'] = full_data['Recovered'].apply(int)
    full_data['Deceased'] = full_data['Deceased'].apply(int)

    full_data['Active'] = full_data['Confirmed'] - full_data['Recovered'] - full_data['Deceased']
    return full_data

def combine_data(group_by, num_case='numcases'):
    full_data = h_data.groupby(by=group_by)[[num_case]].sum()
    full_data['Recovered'] = r_data.groupby(by=group_by)[[num_case]].sum()
    full_data['Deceased'] = d_data.groupby(by=group_by)[[num_case]].sum()

    return add_active(full_data, num_case)

state_data = combine_data('detectedstate')
state_data

Unnamed: 0_level_0,Confirmed,Recovered,Deceased,Active
detectedstate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
,1,36,0,-35
Andaman and Nicobar Islands,45,52,0,-7
Andhra Pradesh,10888,4990,136,5762
Arunachal Pradesh,160,38,1,121
Assam,6371,3959,9,2403
Bihar,8380,6106,56,2218
Chandigarh,420,322,6,92
Chhattisgarh,2411,1609,12,790
Dadra and Nagar Haveli and Daman and Diu,145,30,0,115
Delhi,70386,41439,2366,26581


### Statistics based on population

## District Wise

In [331]:
h_dist = h_data.groupby(by=['statecode', 'detecteddistrict'])[['numcases']].sum()
r_dist = r_data.groupby(by=['statecode', 'detecteddistrict'])[['numcases']].sum()
d_dist = d_data.groupby(by=['statecode', 'detecteddistrict'])[['numcases']].sum()

In [345]:
def dist_data(h_dist, r_dist, d_dist, statecode, num_case='numcases'):
    dist_data = h_dist.loc[statecode]
    dist_data['Recovered'] = r_dist.loc[statecode]
    dist_data['Deceased'] = d_dist.loc[statecode]
    
    dist_data = add_active(dist_data, num_case)
    return dist_data

district_data = dist_data(h_dist, r_dist, d_dist, 'WB', 'numcases')
district_data

Unnamed: 0_level_0,Confirmed,Recovered,Deceased,Active
detecteddistrict,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
,361,0,-5,366
Alipurduar,178,72,0,106
Bankura,221,177,0,44
Birbhum,289,250,2,37
Cooch Behar,291,266,0,25
Dakshin Dinajpur,162,77,0,85
Darjeeling,359,191,6,162
Hooghly,815,643,19,153
Howrah,2144,1577,86,481
Jalpaiguri,313,200,0,113


# Testing

In [357]:
icmr_test = pd.read_csv('https://api.covid19india.org/csv/latest/tested_numbers_icmr_data.csv')
icmr_test.head()

Unnamed: 0,Update Time Stamp,Tested As Of,Total Samples Tested,Total Individuals Tested,Total Positive Cases,Tests conducted by Private Labs,Sample Reported today,Positive cases from samples reported,Source,Source 1,Test positivity rate,Individuals Tested Per Confirmed Case,Tests Per Confirmed Case,Tests per million
0,13/03/2020 00:00:00,13/03/2020,6500.0,5900.0,78,,,,Press_Release_ICMR_13March2020.pdf,,1.20%,75.64,83.33,5.0
1,18/03/2020 18:00:00,18/03/2020,13125.0,12235.0,150,,,,ICMR_website_update_18March_6PM_IST.pdf,,1.14%,81.57,87.5,10.0
2,19/03/2020 10:00:00,19/03/2020,13316.0,12426.0,168,,,,ICMR_website_update_19March_10AM_IST_V2.pdf,,1.26%,73.96,79.26,10.0
3,19/03/2020 18:00:00,19/03/2020,14175.0,13285.0,182,,,,ICMR_website_update_19March_6PM_IST.pdf,,1.28%,72.99,77.88,11.0
4,20/03/2020 10:00:00,20/03/2020,14376.0,13486.0,206,,,,ICMR_website_update_20March_10AM_IST.pdf,,1.43%,65.47,69.79,11.0


In [370]:
icmr_test.tail(2)

Unnamed: 0,Update Time Stamp,Tested As Of,Total Samples Tested,Total Individuals Tested,Total Positive Cases,Tests conducted by Private Labs,Sample Reported today,Positive cases from samples reported,Source,Source 1,Test positivity rate,Individuals Tested Per Confirmed Case,Tests Per Confirmed Case,Tests per million
105,24/06/2020 09:00:00,23/06/2020,7352911.0,,,,215195,,https://twitter.com/ICMRDELHI/status/127563260...,https://t.me/Covid19india_Auxiliary_Test_Data/628,,,,5516.0
106,25/06/2020 09:00:00,24/06/2020,7560782.0,,,,207871,,https://twitter.com/ICMRDELHI/status/127599592...,,,,,5672.0


In [390]:
test2 = pd.read_csv('https://api.covid19india.org/csv/latest/statewise_tested_numbers_data.csv')
test2['Updated On'] = test2['Updated On'].apply(lambda x : datetime.strptime(x, "%d/%m/%Y"))
test2.head()
# Set the dates properly

Unnamed: 0,Updated On,State,Total Tested,Tag (Total Tested),Positive,Negative,Unconfirmed,Cumulative People In Quarantine,Total People Currently in Quarantine,Tag (People in Quarantine),...,Num Calls State Helpline,Source1,Unnamed: 21,Source2,Unnamed: 23,Test positivity rate,Tests per thousand,Tests per million,Tests per positive case,Population NCP 2019 Projection
0,2020-04-17,Andaman and Nicobar Islands,1403.0,Samples Sent,12.0,1210.0,181.0,,,,...,,https://t.me/indiacovid/2550,,,,0.86%,3.53,3534.0,117.0,397000.0
1,2020-04-24,Andaman and Nicobar Islands,2679.0,Samples Sent,27.0,,246.0,,614.0,Institutional,...,280.0,https://t.me/indiacovid/3147?single,,,,1.01%,6.75,6748.0,99.0,397000.0
2,2020-04-27,Andaman and Nicobar Islands,2848.0,Samples Sent,33.0,,106.0,,724.0,Institutional,...,298.0,https://t.me/indiacovid/3365?single,,,,1.16%,7.17,7174.0,86.0,397000.0
3,2020-05-01,Andaman and Nicobar Islands,3754.0,Samples Sent,33.0,,199.0,,643.0,Institutional,...,340.0,https://t.me/indiacovid/3781,,,,0.88%,9.46,9456.0,114.0,397000.0
4,2020-05-16,Andaman and Nicobar Islands,6677.0,Samples Sent,33.0,,136.0,,16.0,Institutional,...,471.0,https://t.me/indiacovid/4925,,,,0.49%,16.82,16819.0,202.0,397000.0


In [391]:
st_test = test2.groupby(by=['State', 'Updated On']).sum()

In [392]:
st_test.loc['Odisha'].sort_values(by='Updated On')

Unnamed: 0_level_0,Total Tested,Positive,Unconfirmed,Cumulative People In Quarantine,Total People Currently in Quarantine,Total People Released From Quarantine,People in ICU,People on Ventilators,Num Isolation Beds,Num ICU Beds,Num Ventilators,Total PPE,Total N95 Masks,Corona Enquiry Calls,Num Calls State Helpline,Unnamed: 23,Tests per thousand,Tests per million,Tests per positive case,Population NCP 2019 Projection
Updated On,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2020-04-03,1395.0,20.0,267.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,32.0,70.0,43671000.0
2020-04-07,2441.0,42.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,56.0,58.0,43671000.0
2020-04-08,2441.0,42.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,56.0,58.0,43671000.0
2020-04-09,3249.0,48.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07,74.0,68.0,43671000.0
2020-04-10,3547.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,81.0,71.0,43671000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-06-20,219774.0,4856.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,342.0,0.0,0.0,0.0,0.0,0.0,0.0,5.03,5032.0,45.0,43671000.0
2020-06-21,224402.0,5160.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,342.0,0.0,0.0,0.0,0.0,0.0,0.0,5.14,5138.0,43.0,43671000.0
2020-06-22,227860.0,5303.0,0.0,0.0,0.0,0.0,0.0,0.0,5547.0,342.0,0.0,0.0,0.0,0.0,0.0,0.0,5.22,5218.0,43.0,43671000.0
2020-06-23,231356.0,5470.0,0.0,0.0,0.0,0.0,0.0,0.0,5547.0,342.0,0.0,0.0,0.0,0.0,0.0,0.0,5.30,5298.0,42.0,43671000.0
