In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from datetime import date
from datetime import timedelta
import time
%matplotlib inline
from pathlib import Path 
import os
from matplotlib.ticker import ScalarFormatter
from matplotlib import ticker

In [2]:
#Find yesterday's date and convert it to string
today=date.today()
yesterday=today-timedelta(days=1)
ydate=yesterday.strftime("%d-%m-%Y")

#Specify Data folder
data_folder = Path("../data")

#Compute filename
data_file = ydate + ".csv"

#Compute complete filepath for the day
file_path = data_folder / data_file

#Read file to data frame
df=pd.read_csv(file_path)

df['Date']= pd.to_datetime(df['Date'], dayfirst=True)
df=df[df['Date']!=today] #Drop today's data
df_h=df[df['Status']=='Hospitalized']
df_r=df[df['Status']=='Recovered']
df_d=df[df['Status']=='Deceased']

'datetime.date' is coerced to a datetime. In the future pandas will
not coerce, and 'the values will not compare equal to the
'datetime.date'. To retain the current behavior, convert the
'datetime.date' to a datetime with 'pd.Timestamp'.


In [3]:
tamilnadu=df_h[df_h['State']=='Tamil Nadu']
tamilnadu=pd.DataFrame(tamilnadu.groupby(['Date', 'District'])['Num_Cases'].sum())
tamilnadu.rename(columns={'Num_Cases':'Daily New Cases'}, inplace=True)
dist_daily=tamilnadu.pivot_table(index='Date', columns='District', fill_value=0)
dist_daily.tail()

Unnamed: 0_level_0,Daily New Cases,Daily New Cases,Daily New Cases,Daily New Cases,Daily New Cases,Daily New Cases,Daily New Cases,Daily New Cases,Daily New Cases,Daily New Cases,Daily New Cases,Daily New Cases,Daily New Cases,Daily New Cases,Daily New Cases,Daily New Cases,Daily New Cases,Daily New Cases,Daily New Cases,Daily New Cases,Daily New Cases
District,Airport Quarantine,Ariyalur,Chengalpattu,Chennai,Coimbatore,Cuddalore,Dharmapuri,Dindigul,Erode,Kallakurichi,...,Thiruvarur,Thoothukkudi,Tiruchirappalli,Tirunelveli,Tirupathur,Tiruppur,Tiruvannamalai,Vellore,Viluppuram,Virudhunagar
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2020-05-10,0,4,43,509,0,1,0,0,0,0,...,0,0,0,10,1,0,0,3,6,2
2020-05-11,0,33,90,538,0,0,2,1,0,0,...,0,3,0,0,0,0,10,1,0,1
2020-05-12,4,36,35,510,0,1,0,2,0,2,...,0,2,2,3,0,0,13,1,1,4
2020-05-13,5,4,25,380,0,17,0,0,0,0,...,0,1,0,5,0,0,23,0,7,0
2020-05-14,0,0,9,363,0,0,0,1,0,0,...,0,3,0,16,0,0,8,0,0,0


In [4]:
#Create a time series for TN Districts by Cumulative Cases 
dist_cum=dist_daily.cumsum()
dist_cum.rename(columns={'Daily New Cases':'Cumulative Cases'}, inplace=True)
dist_cum.tail()

Unnamed: 0_level_0,Cumulative Cases,Cumulative Cases,Cumulative Cases,Cumulative Cases,Cumulative Cases,Cumulative Cases,Cumulative Cases,Cumulative Cases,Cumulative Cases,Cumulative Cases,Cumulative Cases,Cumulative Cases,Cumulative Cases,Cumulative Cases,Cumulative Cases,Cumulative Cases,Cumulative Cases,Cumulative Cases,Cumulative Cases,Cumulative Cases,Cumulative Cases
District,Airport Quarantine,Ariyalur,Chengalpattu,Chennai,Coimbatore,Cuddalore,Dharmapuri,Dindigul,Erode,Kallakurichi,...,Thiruvarur,Thoothukkudi,Tiruchirappalli,Tirunelveli,Tirupathur,Tiruppur,Tiruvannamalai,Vellore,Viluppuram,Virudhunagar
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2020-05-10,0,265,261,3834,146,394,4,107,70,55,...,33,30,63,93,28,114,83,33,308,40
2020-05-11,0,298,351,4372,146,394,6,108,70,55,...,33,33,63,93,28,114,93,34,308,41
2020-05-12,4,334,386,4882,146,395,6,110,70,57,...,33,35,65,96,28,114,106,35,309,45
2020-05-13,9,338,411,5262,146,412,6,110,70,57,...,33,36,65,101,28,114,129,35,316,45
2020-05-14,9,338,420,5625,146,412,6,111,70,57,...,33,39,65,117,28,114,137,35,316,45


In [5]:
#Arrange districts in descending order of latest cumulative Cases
trans=dist_cum.transpose()
sort_dist=pd.DataFrame(trans[yesterday].sort_values(ascending=False))
mylist=sort_dist.index.tolist() #Export to a list (Array of Cumulative Cases and District names)

In [11]:
dist_list=[row[1] for row in mylist]#Creates a new list from the array above and extracts only the district names
#Split the States into three different categories according to their rank
top10=dist_list[0:10] #Top 10 districts
second10=dist_list[10:20] #10-20 Districts
third10=dist_list[20:30] #20-30 districts
last=dist_list[30:]

In [14]:
#Select Top 10 Districts by Cumulative Cases
top10cum=pd.DataFrame()
for i in range(10):
    top10cum[top10[i]]=dist_cum['Cumulative Cases', top10[i]]
#top10cum.tail()

Unnamed: 0_level_0,Chennai,Thiruvallur,Chengalpattu,Cuddalore,Ariyalur,Viluppuram,Kancheepuram,Perambalur,Coimbatore,Tiruvannamalai
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-05-10,3834,346,261,394,265,308,121,113,146,83
2020-05-11,4372,443,351,394,298,308,129,114,146,93
2020-05-12,4882,470,386,395,334,309,153,141,146,106
2020-05-13,5262,495,411,412,338,316,157,142,146,129
2020-05-14,5625,510,420,412,338,316,165,146,146,137


In [16]:
#Select the second 10 districts by Cumulative Cases
second10cum=pd.DataFrame()
for i in range(10):
    second10cum[second10[i]]=dist_cum['Cumulative Cases', second10[i]]
#second10cum.tail()

Unnamed: 0_level_0,Madurai,Tirunelveli,Tiruppur,Dindigul,Namakkal,Ranipet,Theni,Erode,Thanjavur,Tiruchirappalli
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-05-10,118,93,114,107,77,65,60,70,62,63
2020-05-11,122,93,114,108,77,66,60,70,65,63
2020-05-12,122,96,114,110,77,75,67,70,65,65
2020-05-13,124,101,114,110,77,75,72,70,66,65
2020-05-14,133,117,114,111,77,75,73,70,66,65


In [17]:
#Select Top 10 Districts by Daily New Cases
top10new=pd.DataFrame()
for i in range(10):
    top10new[top10[i]]=dist_daily['Daily New Cases', top10[i]]

In [18]:
#Select the second 10 Districts by Daily New Cases
second10new=pd.DataFrame()
for i in range(10):
    second10new[second10[i]]=dist_daily['Daily New Cases', second10[i]]

In [19]:
#Calculate 7-Day Rolling Average for Top 10 Districts
top10new7dra=pd.DataFrame()
for i in range(10):
    name=top10new.columns[i]
    top10new7dra[name]= top10new.iloc[:,i].rolling(window=7).mean()

In [50]:
top10new7dra2=pd.DataFrame()
for i in range(10):
    name=top10new7dra.columns[i]
    top10new7dra2[name]=top10new7dra.iloc[:,i]
top10new7dra2.reset_index(drop=True,inplace=True)

In [51]:
#Extract Data where average cases >5 (set range 1-6 to include all Districts)
top10new7dra_dat=pd.DataFrame()
for j in range(10):
    for i in range(60):
        if top10new7dra2.iloc[i,j] < 6.0:
            if top10new7dra2.iloc[i,j] > 1.0:
                name=top10new7dra2.columns[j]
                top10new7dra_dat[name]=top10new7dra2.iloc[i:,j]
                break
    continue
    
#Remove the date column and reset index
top10new7dra_dat.reset_index(drop=True,inplace=True)

#Remove NaN at top and align rows for different states
top10new7dra_dat2=pd.DataFrame()
for i in range(10):
    top10new7dra_dat[top10new7dra_dat.columns[i]].dropna(inplace=True)
    top10new7dra_dat[top10new7dra_dat.columns[i]].reset_index(drop=True,inplace=True)
    top10new7dra_dat2[top10new7dra_dat.columns[i]]=top10new7dra_dat[top10new7dra_dat.columns[i]]

In [52]:
#Graph Starting date is 23rd April; Calculate number of days as integer
x=(today-date(2020,3,23)).days

In [57]:
#Daily New Cases for Tamilnadu
tn=df_h[df_h['State']=='Tamil Nadu']
tn_daily=pd.DataFrame(tn.groupby('Date')['Num_Cases'].sum())
tn_daily.rename(columns={'Num_Cases':'Daily New Cases'}, inplace=True)

Unnamed: 0_level_0,Daily New Cases
Date,Unnamed: 1_level_1
2020-05-10,669
2020-05-11,798
2020-05-12,716
2020-05-13,509
2020-05-14,447


In [59]:
#Cumulative Cases for Tamilnadu
tn_cum=tn_daily.cumsum()
tn_cum.rename(columns={'Daily New Cases':'Cumulative Cases'}, inplace=True)

Unnamed: 0_level_0,Cumulative Cases
Date,Unnamed: 1_level_1
2020-05-10,7204
2020-05-11,8002
2020-05-12,8718
2020-05-13,9227
2020-05-14,9674


# Plots Start Here