In [1]:
from collections import defaultdict
import pandas as pd
from os.path import isfile, join
from datetime import datetime
import numpy as np

In [2]:
np.random.seed(1)
pd.set_option('display.max_colwidth', None)

In [None]:
pat_file = '/mnt/d/books/iitm/agentBased/data/tn/covid_war_room/patient_data/combined_csv.csv'

In [None]:
types = {
    "Survellence ID": 'float64',
    "Test ID": 'Int64',
    "Lab Patient ID": "string",
    "Age": 'Int64',
    "Gender": "string",
    "Address": "string",
    "Date of Confirmation": "string",
    "Nationality": "string",
    "Total Contact": 'float64',
    "Trace Contact": 'float64',
    "Outcome Status": "string",
    "Name": "string",
    "Contact Number": 'Int64',
}
pdf = pd.read_csv(pat_file, sep=',', dtype=types)

In [None]:
pdf.head(n=2)

In [None]:
dstr = '24-05-2021 09:36:59'
datetime.strptime(dstr, '%d-%m-%Y %H:%M:%S').strftime('%d-%m-%Y')

In [None]:
def convert_to_date(date_str, date_frmt):
    try:
        d = datetime.strptime(date_str, date_frmt).strftime('%Y-%m-%d')
        return d
    except:
        return None

def get_time(df):
    dates = []
    for idx, row in df.iterrows():
        d = convert_to_date(row['Date of Confirmation'], '%d-%m-%Y %H:%M')
        if d is None:
            d = convert_to_date(row['Date of Confirmation'], '%d-%m-%Y %H:%M:%S')
        if d is None:
            d = np.nan
        dates.append(d)
    return dates

In [None]:
dates = get_time(pdf)

In [None]:
pdf['dates'] = dates

In [None]:
pdf['age_bin'] = pdf.apply(lambda r: 15 if r['Age']>=75 else int(r['Age']/5), axis=1)

In [None]:
pdf.head(n=2)

In [None]:
refactor_file = '/mnt/d/books/iitm/agentBased/data/tn/covid_war_room/patient_data/refactored.csv'
pdf.to_csv(refactor_file, sep=',', index=False, header=True)

In [None]:
## run simulations from here

In [3]:
refactor_file = '/mnt/d/books/iitm/agentBased/data/tn/covid_war_room/patient_data/refactored.csv'
types = {
    "Survellence ID": 'float64',
    "Test ID": 'Int64',
    "Lab Patient ID": "string",
    "Age": 'Int64',
    "Gender": "string",
    "Address": "string",
    "Date of Confirmation": "string",
    "Nationality": "string",
    "Total Contact": 'float64',
    "Trace Contact": 'float64',
    "Outcome Status": "string",
    "Name": "string",
    "Contact Number": 'Int64',
    "age_bin": "Int64",
    "dates": "string"
}
pdf = pd.read_csv(refactor_file, sep=',', dtype=types)

In [4]:
pdf.head(n=2)

Unnamed: 0,Survellence ID,Test ID,Lab Patient ID,Age,Gender,Address,Date of Confirmation,Nationality,Total Contact,Trace Contact,Outcome Status,Name,Contact Number,dates,age_bin
0,26933579.0,311629016,COV-4567,22,F,2/25 KOIL ST IDAYAR PALAYAM,24-05-2021 09:36,India,,,Discharge,,,2021-05-24,4
1,26933567.0,311628852,CMCH-21-AN-470,60,M,"7/304, BANNARI AMMAN NAGAR KEERANATHAM (PO) COFEE KADAI",24-05-2021 09:35,India,,,Discharge,,,2021-05-24,12


In [5]:
tn_cases_file = '/mnt/d/books/iitm/agentBased/data/tn/incovid19/cases/cases_district_refactor.csv'
types = {
    "date":"string",
    "state":"string",
    "district":"string",
    "cum_confirmed":"Int64",
    "cum_recovered":"Int64",
    "cum_deceased":"Int64",
    "cum_tested":"Int64",
    "other":"Int64",
    "new_confirmed":"Int64",
    "new_recovered":"Int64",
    "new_deceased":"Int64",
    "new_tested":"Int64"
}
tn_cases = pd.read_csv(tn_cases_file, sep=',', dtype=types)

In [6]:
def get_per_case_age_cnt(df):
    case_cnt = {}
    for idx, row in df.iterrows():
        if row['dates'] not in case_cnt:
            case_cnt[row['dates']] = defaultdict(int)
        case_cnt[row['dates']][row['Outcome Status']] += row['count']
    return case_cnt

def normalize(df):
    total = defaultdict(int)
    for idx, row in df.iterrows():
        total[row['Outcome Status']] += row['prob']
    df['prob'] = df.apply(lambda r: r['prob']/total[r['Outcome Status']], axis=1)

def case_rate(df, frm, to='2021-12-31'):
    from_date = df[(df['dates'] >= frm) & (df['dates'] <= to) & df['Test ID'].notna()]
    date_cnt = from_date.groupby(['Outcome Status', 'dates', 'age_bin'])['Test ID'].agg(['size']).rename(columns={'size':'count'}).reset_index()
    case_cnt = get_per_case_age_cnt(date_cnt)
    date_cnt['daily_prob'] = date_cnt.apply(lambda r: r['count']/case_cnt[r['dates']][r['Outcome Status']] , axis=1)
    age_prob = date_cnt.groupby(['Outcome Status', 'age_bin'])['daily_prob'].agg(['sum']).rename(columns={'sum':'prob'}).reset_index()
    normalize(age_prob)
    final_age_prob = age_prob.groupby(['Outcome Status', 'age_bin'])['prob'].agg('first').unstack().fillna(0).reset_index(level=0)
    final_age_prob.columns.name = ''
    return final_age_prob

In [7]:
case_rate(pdf, '2020-04-01', '2020-10-31')

Unnamed: 0,Outcome Status,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,Active,0.013582,0.0115,0.019719,0.030027,0.074775,0.098734,0.099586,0.099184,0.104207,0.091024,0.099927,0.090169,0.0633,0.0432,0.0272,0.033868
1,Death,0.002332,0.000571,0.00106,0.002061,0.006771,0.009588,0.015824,0.021428,0.035416,0.063286,0.099193,0.131593,0.135605,0.146,0.140373,0.188901
2,Discharge,0.013662,0.017866,0.025228,0.042644,0.088568,0.109626,0.10301,0.099208,0.095046,0.093329,0.086901,0.073249,0.057332,0.040849,0.027526,0.025957
3,Migrated,0.012896,0.011201,0.018267,0.024644,0.0951,0.114142,0.119264,0.106898,0.097974,0.07985,0.090526,0.067501,0.053159,0.044202,0.031055,0.033322
4,Not Updated,0.004304,0.007405,0.015026,0.0197,0.0746,0.181263,0.168763,0.075254,0.10872,0.089815,0.081845,0.077884,0.042476,0.016604,0.012628,0.023713
5,Referred,0.004946,0.00873,0.015818,0.025231,0.068473,0.114175,0.097665,0.097247,0.085383,0.128797,0.092062,0.105449,0.061714,0.04397,0.026132,0.024208


In [8]:
case_rate(pdf, '2021-03-01', '2021-08-31')

Unnamed: 0,Outcome Status,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,Active,0.008533,0.017045,0.026257,0.048191,0.0786,0.096563,0.101312,0.095409,0.088186,0.090003,0.086773,0.077943,0.066593,0.050394,0.035096,0.033104
1,Death,0.001893,0.00133,0.00195,0.002726,0.005921,0.006105,0.018868,0.032315,0.038666,0.056784,0.087445,0.107647,0.130067,0.154694,0.139851,0.213739
2,Discharge,0.008881,0.016531,0.026918,0.044911,0.080747,0.099876,0.102161,0.098471,0.088719,0.086549,0.083509,0.075201,0.067568,0.048521,0.035817,0.035619
3,Migrated,0.007814,0.015997,0.0223,0.031034,0.073204,0.100208,0.119976,0.108137,0.098076,0.078126,0.080204,0.070651,0.064663,0.049465,0.040494,0.039651
4,Not Updated,0.006889,0.012932,0.020782,0.041626,0.078609,0.097636,0.101614,0.102949,0.092065,0.076667,0.069659,0.075416,0.078326,0.06379,0.035472,0.045569
5,Referred,0.002421,0.004495,0.010299,0.018335,0.046714,0.056254,0.077607,0.085271,0.086367,0.086928,0.096206,0.102392,0.126442,0.063345,0.061198,0.075727


In [7]:
map_keys = {
    'active': ['Active', 'new_confirmed'],
    'death': ['Death', 'new_deceased']
}

def get_num_cases(df, prob, key):
    age_group = {g:[] for g in range(16)}
    for idx, row in df.iterrows():
        new_items = int(row[key])
        for g in range(16):
            age_group[g].append(np.random.poisson(new_items * float(prob[g])))
    return pd.DataFrame(age_group)

def create_data(incovid_df, hosp_df, key, frm, to):
    all_rate_df = case_rate(hosp_df, frm, to)
    hosp_key = map_keys[key][0]
    rate_df = all_rate_df[all_rate_df['Outcome Status'] == hosp_key]
    incovid_key = map_keys[key][1]
    wave = incovid_df[(incovid_df['date'] >= frm) & (incovid_df['date'] <= to)][['date', 'district', incovid_key]].reset_index(drop=True)
    age_df = get_num_cases(wave, rate_df, incovid_key)
    return pd.concat([wave, age_df], axis=1)

In [8]:
frm, to = '2020-04-01', '2020-10-31'
first_active = create_data(tn_cases, pdf, 'active', frm, to)
first_death = create_data(tn_cases, pdf, 'death', frm, to)

# first_active_file = '/mnt/d/books/iitm/agentBased/data/tn/incovid19/cases/first_active.csv'
# first_active.to_csv(first_active_file, sep=',', index=False, header=True)

# first_death_file = '/mnt/d/books/iitm/agentBased/data/tn/incovid19/cases/first_death.csv'
# first_death.to_csv(first_death_file, sep=',', index=False, header=True)

In [9]:
frm, to = '2021-03-01', '2021-08-31'
second_active = create_data(tn_cases, pdf, 'active', frm, to)
second_death = create_data(tn_cases, pdf, 'death', frm, to)

# second_active_file = '/mnt/d/books/iitm/agentBased/data/tn/incovid19/cases/second_active.csv'
# second_active.to_csv(second_active_file, sep=',', index=False, header=True)

# second_death_file = '/mnt/d/books/iitm/agentBased/data/tn/incovid19/cases/second_death.csv'
# second_death.to_csv(second_death_file, sep=',', index=False, header=True)