In [1]:
import math
import json
import requests
import matplotlib.pyplot as plt
import pandas as pd
import networkx as nx
import numpy as np

In [15]:
df = pd.read_csv ('data/chennai2.csv')

In [16]:
display(df)

Unnamed: 0,Date,Zip_Code,Center_ID,Center_Name,Available_Capacity
0,01-05-2022,600034,603898,Kanchi Kamakoti Child Trust,9
1,01-05-2022,600089,617607,Srushti Hospital Pvt Ltd,29
2,01-05-2022,600089,617607,Srushti Hospital Pvt Ltd,57
3,01-05-2022,600116,855907,Srushti Clinic Porur,25
4,01-05-2022,600116,855907,Srushti Clinic Porur,27
...,...,...,...,...,...
164,30-05-2022,600093,580753,Sooriya Hospital,74
165,31-05-2022,600024,580742,Medway Hospitals,59
166,31-05-2022,600089,617607,Srushti Hospital Pvt Ltd,29
167,31-05-2022,600089,617607,Srushti Hospital Pvt Ltd,30


In [17]:
df.dtypes

Date                  object
Zip_Code               int64
Center_ID              int64
Center_Name           object
Available_Capacity     int64
dtype: object

In [18]:
for col in ['Date','Zip_Code','Center_ID','Center_Name']:
    df[col]=df[col].astype('category')

In [19]:
df.dtypes

Date                  category
Zip_Code              category
Center_ID             category
Center_Name           category
Available_Capacity       int64
dtype: object

In [20]:
df['Total_available_capacity']=df.groupby(['Date','Zip_Code','Center_ID','Center_Name'])['Available_Capacity'].transform('sum')

In [21]:
hosp_df = df.drop_duplicates(subset=['Date','Zip_Code','Center_ID','Center_Name']).copy()

In [22]:
del hosp_df["Available_Capacity"]
hosp_df

Unnamed: 0,Date,Zip_Code,Center_ID,Center_Name,Total_available_capacity
0,01-05-2022,600034,603898,Kanchi Kamakoti Child Trust,9
1,01-05-2022,600089,617607,Srushti Hospital Pvt Ltd,86
3,01-05-2022,600116,855907,Srushti Clinic Porur,52
5,02-05-2022,600037,604136,MMRV Hospital,19
6,02-05-2022,600085,1037942,Srushti Hospital Kottupuram,29
...,...,...,...,...,...
162,30-05-2022,600089,617607,Srushti Hospital Pvt Ltd,58
164,30-05-2022,600093,580753,Sooriya Hospital,74
165,31-05-2022,600024,580742,Medway Hospitals,59
166,31-05-2022,600089,617607,Srushti Hospital Pvt Ltd,59


In [95]:
hosp_df.to_pickle('data/Hospital_Data_Chennai_May_2022.pkl')

In [96]:
total_centers  = len(hosp_df.Center_ID.unique())
total_dates = len(hosp_df.Date.unique())

In [97]:
zip_codes=hosp_df.Zip_Code.unique().to_list()
total_zips = len(zip_codes)

In [98]:
zip_codes.sort()

In [99]:
distance_dict_df = pd.read_pickle('data/Distance_Data.pkl')

In [100]:
distance_dict = {}
distance_list = distance_dict_df.values.tolist()
for i in distance_list:
    distance_dict[(i[0],i[1])] = i[2]

In [101]:
distance_dict

{(600004, 600006): 4251,
 (600004, 600018): 2206,
 (600004, 600020): 4867,
 (600004, 600024): 6399,
 (600004, 600028): 2442,
 (600004, 600032): 10993,
 (600004, 600034): 5633,
 (600004, 600035): 3715,
 (600004, 600037): 14994,
 (600004, 600083): 9641,
 (600004, 600085): 5174,
 (600004, 600086): 2543,
 (600004, 600089): 12974,
 (600004, 600093): 10940,
 (600004, 600096): 10173,
 (600004, 600102): 8677,
 (600004, 600106): 9870,
 (600004, 600116): 17681,
 (600006, 600018): 3105,
 (600006, 600020): 8056,
 (600006, 600024): 3410,
 (600006, 600028): 4467,
 (600006, 600032): 7933,
 (600006, 600034): 2963,
 (600006, 600035): 3708,
 (600006, 600037): 12005,
 (600006, 600083): 6653,
 (600006, 600085): 4788,
 (600006, 600086): 2026,
 (600006, 600089): 12537,
 (600006, 600093): 7951,
 (600006, 600096): 12848,
 (600006, 600102): 6007,
 (600006, 600106): 6881,
 (600006, 600116): 17245,
 (600018, 600020): 6356,
 (600018, 600024): 5158,
 (600018, 600028): 2345,
 (600018, 600032): 8856,
 (600018, 60003

In [108]:
# nodes = list(hosp_df.Center_ID.unique()).sort()
nodes = list(hosp_df.sort_values(by=['Center_ID']).Center_ID.unique())

In [109]:
hosp_zip_dict = dict(zip(hosp_df.Center_ID, hosp_df.Zip_Code))
center_id_hosp_name_dict = dict(zip(hosp_df.Center_ID, hosp_df.Center_Name))

In [110]:
edges = []
for x in nodes:
    for y in nodes:
        if x>y:
            x_zip = hosp_zip_dict[x]
            y_zip = hosp_zip_dict[y]
            
            if x_zip == y_zip:
                edges.append((x,y,0))
            elif x_zip > y_zip:
                if distance_dict[(hosp_zip_dict[y],hosp_zip_dict[x])]<=5000:
                    edges.append((x,y,distance_dict[(hosp_zip_dict[y],hosp_zip_dict[x])]/1000))
            else:
                if distance_dict[(hosp_zip_dict[x],hosp_zip_dict[y])]<=5000:
                    edges.append((x,y,distance_dict[(hosp_zip_dict[x],hosp_zip_dict[y])]/1000))

In [111]:
G = nx.Graph()

In [112]:
G.add_nodes_from(nodes)

In [113]:
G.add_weighted_edges_from(edges)

In [114]:
# elarge = [(u, v) for (u, v, d) in G.edges(data=True) if d["weight"] > 5]
# esmall = [(u, v) for (u, v, d) in G.edges(data=True) if d["weight"] <= 5]

# pos = nx.spring_layout(G, weight='weight', k=0.7)  # positions for all nodes - seed for reproducibility

# # nodes
# nx.draw_networkx_nodes(G, pos, node_size=50)

# # edges
# nx.draw_networkx_edges(G, pos, edgelist=elarge, width=1)
# nx.draw_networkx_edges(G, pos, edgelist=esmall, width=1, alpha=0.5, edge_color="b", style="dashed")

# # node labels
# nx.draw_networkx_labels(G, pos, font_size=10, font_family="sans-serif")

# # edge weight labels
# edge_labels = nx.get_edge_attributes(G, "weight")
# nx.draw_networkx_edge_labels(G, pos, edge_labels)

# fig, ax = plt.subplots(figsize=(10,10))
# # ax = plt.gca()
# # ax.margins(0.08)
# # plt.axis("off")
# # plt.tight_layout()
# plt.show()
# plt.savefig('test123.png')
# # plt.savefig('foo.pdf')

In [115]:
daily_supply = hosp_df.copy()

In [116]:
daily_supply['Total_available_capacity_per_day']=daily_supply.groupby(['Date'])['Total_available_capacity'].transform('sum')

In [117]:
daily_supply = daily_supply.drop_duplicates(subset=['Date'])

In [118]:
daily_supply = daily_supply[['Date','Total_available_capacity_per_day']]

In [119]:
daily_supply.reset_index(drop=True, inplace=True)
daily_supply

Unnamed: 0,Date,Total_available_capacity_per_day
0,01-05-2022,147
1,02-05-2022,189
2,03-05-2022,178
3,04-05-2022,214
4,05-05-2022,80
5,06-05-2022,537
6,07-05-2022,495
7,08-05-2022,109
8,09-05-2022,170
9,10-05-2022,220


In [120]:
daily_supply.to_pickle('data/daily_supply.pkl')

In [121]:
overall_quota = hosp_df.copy()

In [122]:
overall_quota['Total_available_capacity_per_hosp'] = overall_quota.groupby(['Center_ID'])['Total_available_capacity'].transform('sum')

In [123]:
overall_quota = overall_quota.drop_duplicates(subset=['Center_ID'])

In [124]:
overall_quota = overall_quota[['Center_ID','Center_Name','Total_available_capacity_per_hosp']]

In [125]:
# overall_quota['Center_ID']=overall_quota['Center_ID'].astype('int64')
overall_quota = overall_quota.sort_values(by=['Center_ID'])
overall_quota.reset_index(drop=True, inplace=True)
overall_quota

Unnamed: 0,Center_ID,Center_Name,Total_available_capacity_per_hosp
0,580742,Medway Hospitals,59
1,580753,Sooriya Hospital,74
2,603197,Prime Indian Hospital,30
3,603362,Apollo Spec Hosp,29
4,603579,Apollo Speciality Hospitals,5
5,603702,Trinity Acute Care Hospital,189
6,603731,Apollo Spectra Hospitals,40
7,603898,Kanchi Kamakoti Child Trust,9
8,604136,MMRV Hospital,108
9,604384,Fortis Malar Hospital,1193


In [126]:
overall_quota.to_pickle('data/overall_quota.pkl')

In [127]:
availability = np.random.randint(2, size=(10000,30))

In [128]:
availability_df = pd.DataFrame(availability,  columns =["day_"+str(i+1) for i in range(30)])

In [129]:
availability_df.to_pickle('data/availability_df.pkl')

In [131]:
belongingness = np.zeros((10000,total_centers),dtype=int)
belongingness_df = pd.DataFrame(belongingness, columns=nodes)

In [132]:
for i in range(10000):
    i_hosp = np.random.choice(nodes)
    belongingness_df.at[i, i_hosp] = 1
    i_hosp_neighbors = G.neighbors(i_hosp)
    for j_hosp in i_hosp_neighbors:
        belongingness_df.at[i, j_hosp] = 1

In [133]:
belongingness_df

Unnamed: 0,580742,580753,603197,603362,603579,603702,603731,603898,604136,604384,...,619367,620736,624694,625837,631848,634485,700076,725814,855907,1037942
0,0,0,0,0,1,1,1,0,0,0,...,0,0,1,0,0,0,1,1,0,1
1,1,0,0,0,1,0,0,1,0,0,...,0,1,1,1,0,0,1,0,0,0
2,1,0,1,0,1,0,0,1,0,0,...,1,1,1,1,0,0,1,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,1,1,0,0,0,0,0,0,0,0
4,1,0,0,0,1,0,0,1,0,0,...,0,1,1,1,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
9996,1,0,0,0,1,0,0,1,0,0,...,0,1,1,1,0,0,1,0,0,0
9997,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
9998,1,0,1,0,1,0,0,1,0,0,...,1,1,1,1,0,0,1,0,0,0


In [134]:
belongingness_df.to_pickle('data/belongingness_df.pkl')

In [2]:
age_group_fractions = [0.669527489162747, 0.204899888606438]
age_group_fractions.append(1-age_group_fractions[0]-age_group_fractions[1])
age_group_fractions

[0.669527489162747, 0.204899888606438, 0.12557262223081503]

In [3]:
agents_type = []
for i in range(10000):
    i_age = np.random.choice([0,1,2],
                     p=age_group_fractions)
    agents_type.append([i_age])

In [5]:
agents_type_df = pd.DataFrame(agents_type, columns=['Age_group'])

In [6]:
agents_type_df.to_pickle('data/agents_type.pkl')

In [7]:
agents_type_df

Unnamed: 0,Age_group
0,0
1,2
2,1
3,2
4,0
...,...
9995,1
9996,0
9997,0
9998,0
