In [None]:
import pandas as pd
import numpy as np
from collections import defaultdict
from datetime import datetime
import matplotlib.pyplot as plt

In [None]:
## Dataset from incovid19 website

In [None]:
cases_file = '/mnt/d/books/iitm/agentBased/data/tn/incovid19/cases/original.csv'
vaccine_file = '/mnt/d/books/iitm/agentBased/data/tn/incovid19/vaccine/original.csv'

In [None]:
types = {
    "Date": "string",
    "State": "string",
    "District": "string",
    "Confirmed": "Int64",
    "Recovered": "Int64",
    "Deceased": "Int64",
    "Other": "Int64",
    "Tested": "Int64"
}
df_cases = pd.read_csv(cases_file, sep=',', dtype=types)

In [None]:
tn_cases = df_cases[df_cases['State'] == 'Tamil Nadu'].reset_index(drop=True)
tn_cases.rename(columns={"Confirmed":"cum_confirmed", "Recovered":"cum_recovered", "Deceased":"cum_deceased", "Tested":"cum_tested", "State":"state", "District":"district", "Date":"date", "Other":"other"}, inplace=True)
tn_cases = tn_cases.sort_values('date').reset_index(drop=True)
tn_cases.head(n=2)

In [None]:
fig, ax = plt.subplots(figsize=(15, 8))
tn_cases[tn_cases['district'] == 'Chennai'].plot(ax=ax, kind='line', x='date', y='cum_confirmed')

In [None]:
tn_cases[tn_cases['cum_tested'].notna() & (tn_cases['district'] == 'Erode')]

In [None]:
def get_daily_cases(row, key, prev_cnt):
    if  pd.isnull(row[key]) or row[key] == 0:
        return 0
    daily_cnt = row[key] - prev_cnt[row['district']]
    if daily_cnt < 0:
        # print(f'{row["date"]} {row["district"]} {row[key]} {prev_cnt[row["district"]]}')
        daily_cnt = 0
    prev_cnt[row['district']] = row[key]
    return daily_cnt

In [None]:
prev_confirmed_cnt = defaultdict(int)
tn_cases['new_confirmed'] = tn_cases.apply(lambda row: get_daily_cases(row, 'cum_confirmed', prev_confirmed_cnt), axis=1)

In [None]:
prev_recovered_cnt = defaultdict(int)
tn_cases['new_recovered'] = tn_cases.apply(lambda r: get_daily_cases(r, 'cum_recovered', prev_recovered_cnt), axis=1)

In [None]:
prev_deceased_cnt = defaultdict(int)
tn_cases['new_deceased'] = tn_cases.apply(lambda r: get_daily_cases(r, 'cum_deceased', prev_deceased_cnt), axis=1)

In [None]:
prev_tested_cnt = defaultdict(int)
tn_cases['new_tested'] = tn_cases.apply(lambda r: get_daily_cases(r, 'cum_tested', prev_tested_cnt), axis=1)

In [None]:
# tn_cases[tn_cases['district'] == 'Chennai'][325:375]
tn_cases.head(n=2)

In [None]:
refactor_file = '/mnt/d/books/iitm/agentBased/data/tn/incovid19/cases/cases_district_refactor.csv'
tn_cases.to_csv(refactor_file, sep=',', index=False, header=True)