In [1]:
import pandas as pd
with open('./coronavirusdataset/PatientInfo.csv', 'rb') as infile:
    patient_info = pd.read_csv(infile)
    
with open('./coronavirusdataset/PatientRoute.csv', 'rb') as infile:
    patient_route = pd.read_csv(infile)

In [2]:
len(patient_info.patient_id.unique())

3519

In [3]:
len(patient_route.patient_id.unique())

1319

In [4]:
data = pd.merge(patient_info, patient_route, on='patient_id')
data = data[data['province_x'] == 'Seoul']

In [5]:
data

Unnamed: 0,patient_id,global_num_x,sex,birth_year,age,country,province_x,city_x,disease,infection_case,...,released_date,deceased_date,state,global_num_y,date,province_y,city_y,type,latitude,longitude
0,1000000001,2.0,male,1964.0,50s,Korea,Seoul,Gangseo-gu,,overseas inflow,...,2020-02-05,,released,2.0,2020-01-22,Gyeonggi-do,Gimpo-si,airport,37.615246,126.715632
1,1000000001,2.0,male,1964.0,50s,Korea,Seoul,Gangseo-gu,,overseas inflow,...,2020-02-05,,released,2.0,2020-01-24,Seoul,Jung-gu,hospital,37.567241,127.005659
2,1000000002,5.0,male,1987.0,30s,Korea,Seoul,Jungnang-gu,,overseas inflow,...,2020-03-02,,released,5.0,2020-01-25,Seoul,Seongbuk-gu,etc,37.592560,127.017048
3,1000000002,5.0,male,1987.0,30s,Korea,Seoul,Jungnang-gu,,overseas inflow,...,2020-03-02,,released,5.0,2020-01-26,Seoul,Seongbuk-gu,store,37.591810,127.016822
4,1000000002,5.0,male,1987.0,30s,Korea,Seoul,Jungnang-gu,,overseas inflow,...,2020-03-02,,released,5.0,2020-01-26,Seoul,Seongdong-gu,public_transportation,37.563992,127.029534
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3257,1000000711,10987.0,,,,Korea,Seoul,Nowon-gu,,overseas inflow,...,,,isolated,10987.0,2020-05-13,Seoul,Jungnang-gu,hospital,37.612772,127.098167
3258,1000000712,,,,,Korea,Seoul,Gangbuk-gu,,,...,,,isolated,,2020-04-30,Seoul,Yeongdeungpo-gu,etc,37.539111,126.903394
3259,1000000712,,,,,Korea,Seoul,Gangbuk-gu,,,...,,,isolated,,2020-05-09,Seoul,Yeongdeungpo-gu,beauty_salon,37.535056,126.899351
3260,1000000712,,,,,Korea,Seoul,Gangbuk-gu,,,...,,,isolated,,2020-05-12,Seoul,Yeongdeungpo-gu,hospital,37.533121,126.903804


In [6]:
len(data.patient_id.unique())

640

In [7]:
pd.isna(data['infected_by']).sum()

2733

In [8]:
import networkx as nx

#instantiation of the undirected graph
G = nx.Graph()
G.add_nodes_from(list(data['patient_id'].unique()))


for idx in data.index:
    if not(pd.isna(data['infected_by'][idx])):
        G.add_edge(data['patient_id'][idx],data['infected_by'][idx], weight=float(1))
        


len(G), G.size()

(644, 111)

In [9]:
## making groups of people who may have been infecting each other
groups = data.groupby(['latitude', 'longitude', 'type'])['patient_id'].unique().apply(list).tolist()

In [28]:
groups[143]

[1000000350, 1000000351]

In [31]:
data[data.patient_id == 1000000350][['latitude', 'longitude', 'type']]

Unnamed: 0,latitude,longitude,type
1807,37.456256,126.705206,airport
1808,37.460191,126.440696,airport
1809,37.500014,126.923893,hospital
1810,37.504569,126.940919,hospital
1811,37.474912,126.980874,lodging
1812,37.473636,126.981142,etc
1813,37.473697,126.981411,etc
1814,37.474912,126.980874,lodging


In [33]:
data[data.patient_id == 1000000351][['latitude', 'longitude', 'type']]

Unnamed: 0,latitude,longitude,type
1815,37.456256,126.705206,airport
1816,37.460191,126.440696,airport
1817,37.500014,126.923893,hospital
1818,37.504569,126.940919,hospital
1819,37.474912,126.980874,lodging
1820,37.473636,126.981142,etc
1821,37.473697,126.981411,etc
1822,37.474912,126.980874,lodging


In [11]:
for elt in groups:
    if len(elt) > 1:
        for i in range(len(elt)-1):
            G.add_edge(elt[i], elt[i++1], t=float(0.1))
            
len(G), G.size()

(644, 1170)

In [15]:
import numpy as np

char_path_length = 'not connected'
pos_max = np.argmax([len(listElem) for listElem in sorted(nx.connected_components(G))])
char_path_length_component = [nx.average_shortest_path_length(G.subgraph(g)) for g in sorted(nx.connected_components(G))][pos_max]

density = nx.density(G)
degree = nx.average_neighbor_degree(G)
degree = sum(degree.values())/len(degree.values())
transitivity = nx.transitivity(G)
clustering_coef = nx.average_clustering(G)

n_connected_comp = nx.number_connected_components(G)
size_connected_comp = sum([len(listElem) for listElem in sorted(nx.connected_components(G))])/len(sorted(nx.connected_components(G)))

print(f'original characteristic path length {char_path_length}, \noriginal characteristic path length of biggest component {char_path_length_component},\noriginal network density {density},\noriginal network average degree {degree},\noriginal transitivity {transitivity}, \noriginal average clustering coefficient {clustering_coef}, \noriginal number of connected components {n_connected_comp}, \noriginal average size of a connected component {size_connected_comp}')

original characteristic path length not connected, 
original characteristic path length of biggest component 5.6461423956740555,
original network density 0.005650918153453822,
original network average degree 4.256744183180517,
original transitivity 0.06745737583395107, 
original average clustering coefficient 0.07180198166223016, 
original number of connected components 14, 
original average size of a connected component 46.0


In [18]:
import Visualizer

In [34]:
def simulate_sirs(G,beta,gamma,xi,f,lst_initial_immune,lst_initial_infected):
    attDict = {}
    for node in lst_initial_immune:
        attDict[node] = [0]
    for node in lst_initial_infected:
        attDict[node] = [1]
    t = 0
    D = []
    D.append([len(lst_initial_immune), len(lst_initial_infected), 0])
    while t < f:
        S = 0
        I = 0
        R = 0
        for node in list(attDict.keys()):
            if attDict[node][t] == 0:
                if np.random.random() < beta:
                    attDict[node].append(1)
                    I += 1
                else:
                    attDict[node].append(0)
                    S += 1
            if attDict[node][t] == 1:
                if np.random.random() < gamma:
                    attDict[node].append(2)
                    R += 1
                else:
                    attDict[node].append(1)
                    I += 1
            if attDict[node][t] == 2:
                if np.random.random() < xi:
                    attDict[node].append(0)
                    S += 1
                else:
                    attDict[node].append(2)
                    R += 1
        D.append([S,I,R])
        t+=1
    nx.set_node_attributes(G,attDict,"state")
    return D

In [45]:
D = simulate_sirs(G, 2.5, 0.4, 0.1, 10, list(G.nodes)[:640], list(G.nodes)[640:])

In [None]:
Visualizer.showSIRS(G, "test_simulation.html", 2.5, 0.4, 0.1, 10, D)

In [None]:
Visualizer.showData(D, "test_data.html", 2.5, 0.4, 0.1)

In [50]:
import ndlib.models.ModelConfig as mc
import ndlib.models.epidemics as ep

# Model selection
model = ep.SIRModel(G)

# Model Configuration
cfg = mc.Configuration()
cfg.add_model_parameter('beta', 0.01)
cfg.add_model_parameter('gamma', 0.005)
cfg.add_model_parameter("fraction_infected", 0.05)
model.set_initial_status(cfg)

# Simulation execution
iterations = model.iteration_bunch(200)

100%|███████████████████████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 237.30it/s]


In [57]:
data[['date', 'released_date', 'deceased_date']]

Unnamed: 0,date,released_date,deceased_date
0,2020-01-22,2020-02-05,
1,2020-01-24,2020-02-05,
2,2020-01-25,2020-03-02,
3,2020-01-26,2020-03-02,
4,2020-01-26,2020-03-02,
...,...,...,...
3257,2020-05-13,,
3258,2020-04-30,,
3259,2020-05-09,,
3260,2020-05-12,,
