In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx

from pprint import pprint
from copy import deepcopy as copy
import numba as nb
jit = nb.jit

print(nb.__version__)
%matplotlib inline

0.42.0


In [2]:
def read_US_states(fname):
    states_abb_dict = {}
    states_abb_rev_dict = {}
    states_abb_ord_list = []
    with open(fname, "r") as f:
        for line in f:
            line = line.strip().split(",")
            name = line[0]
            abbr = line[1]
            states_abb_dict[name] = (abbr, len(states_abb_ord_list))
            states_abb_rev_dict[abbr] = name
            states_abb_ord_list.append(abbr)
    
    return states_abb_dict, states_abb_rev_dict, states_abb_ord_list

def read_travel_network(fname, states_abb_dict, states_abb_rev_dict, normalization=1000):
    num_states = 50

    adjacency_list = {}
    A = np.zeros((num_states, num_states))

    for abb in states_abb_rev_dict:
        adjacency_list[abb] = []

    with open(fname, "r") as f:
        for idx, line in enumerate(f):
            if idx == 0:
                continue
            line = line.strip().split(",")
            orig = line[0]
            dest = line[1]
            weight = float(line[2])*10 / normalization
            if orig == dest:
                continue
            try:
                orig_abb, orig_idx = states_abb_dict[orig]
                dest_abb, dest_idx = states_abb_dict[dest]
                adjacency_list[orig_abb].append((dest_abb, weight))
                A[orig_idx][dest_idx] = weight
            except KeyError:
                pass
    
    return adjacency_list, A

def read_deaths_data(fname):
    df = pd.read_csv(fname)
    sum_df_d = df.groupby(['State', 'Year', 'Quarter'])[['DeathsFromPneumoniaAndInfluenza']].sum().T.to_dict()
    deaths_dict = {}
    for key, deaths in sum_df_d.items():
        state, year, quarter = key
        if state not in deaths_dict:
            deaths_dict[state] = {}
        if year not in deaths_dict[state]:
            deaths_dict[state][year] = {}
        deaths_dict[state][year][quarter] = deaths["DeathsFromPneumoniaAndInfluenza"]
    return deaths_dict

def read_population_dict(fname):
    population_dict = {}
    with open(fname, "r") as f:
        for idx, line in enumerate(f):
            if idx == 0:
                continue
            line = line.strip().split(",")
            state = line[1]
            population_dict[state] = {}
            year = 2009
            for idx, pop in enumerate(line[2:]):
                population_dict[state][year + idx] = int(pop)
    return population_dict

In [3]:
data_dir = "../Data/Clean/"
deaths_fname = "deaths_NCHS_processed.csv"
population_fname = "population.csv"

states_abb_dict, states_abb_rev_dict, stats_abb_ord_list = read_US_states(data_dir + "states_abb.csv")
deaths_dict = read_deaths_data(data_dir + deaths_fname)
population_dict = read_population_dict(data_dir + population_fname)

adj_list = {}
A = {}
A[2009] = {}
A[2009][4] = read_travel_network(data_dir + "2009_Q4.csv", states_abb_dict, states_abb_rev_dict)[-1]
for year in range(2010, 2019):
    A[year] = {}
    for quarter in range(1, 5):
        network_fname = str(year) + "_Q" + str(quarter) + ".csv"
        A[year][quarter] = read_travel_network(data_dir + network_fname, states_abb_dict, states_abb_rev_dict)[-1]
A[2019] = {}
A[2019][1] = read_travel_network(data_dir + "2019_Q1.csv", states_abb_dict, states_abb_rev_dict)[-1]

In [4]:
@jit(nopython=True, nogil=True)
def travel_and_infect_kernel(A, pop_vector, pop_vector_new, p_transfer, approx_binomial=None, verbose=False):
    N = A.shape[0]
    for orig in range(N):
        infected_fraction = np.mean(pop_vector[orig])
        if orig % 10 == 0:
                print(orig, infected_fraction)
        for dest in range(N):
            if orig == dest:
                continue
            
            # Travel the people
            travel_pop_all = A[orig, dest]
            travel_pop_inf = int(travel_pop_all * infected_fraction)

            # Infect people at destination
            for person_id, person in enumerate(pop_vector[dest]):
                #if np.random.binomial(travel_pop_inf, p_transfer) > 0:
                if approx_binomial[travel_pop_inf][np.random.randint(30)] >= 1:
                    pop_vector_new[dest][person_id] = 1
        
    return pop_vector_new

# https://stackoverflow.com/questions/3679694
# Numba can't handle probabilities in np.random.choice
@jit(nopython=True, nogil=True)
def custom_random_choice(values, probabilities):
    # This assumes probs sum to 1
    r = np.random.rand()
    acc = 0
    for v, p in zip(values, probabilities):
        acc += p
        if acc > r:
            return v
    # Should never get here
    return 0

@jit(nopython=True, nogil=True)
def recover_kernel(pop_vector, SIR_params):
    for state_id, state in enumerate(pop_vector):
        for person_id, person in enumerate(state):
            if person == 0:
                continue
            pop_vector[state_id][person_id] = custom_random_choice([0, 1, 2], SIR_params)
    return pop_vector

# @jit(nopython=True, nogil=True)
def remove_deceased_kernel(pop_vector):
    num_deceased = []
    for state_id, state in enumerate(pop_vector):
        num_inf = len(np.where(state==1)[0])
        num_dead = len(np.where(state==2)[0])
        num_tot = len(state)
        new_pop = num_tot - num_dead
        if new_pop > 0:
            pop_vector[state_id] = np.ones(num_inf, dtype=np.float64).tolist() + np.zeros(new_pop-num_inf, dtype=np.float64).tolist()
        else:
            pop_vector[state_id] = np.empty(dtype=np.float64)
        num_deceased.append(num_dead)
    return pop_vector, num_deceased

# @jit(nopython=True, nogil=True)
def inject_population_kernel(pop_vector, new_population):
    # new_population is somehow ordered
    for state_id, state in enumerate(pop_vector):
        current_pop = len(state)
        nextgen_pop = new_population[state_id]
        extra_peeps = nextgen_pop - current_pop
        if extra_peeps > 0:
            pop_vector[state_id] = np.concatenate((state, np.zeros(extra_peeps)))
        elif extra_peeps < 0:
            # population actually decreased... remove infected and susceptible at same rate
            extra_peeps = -extra_peeps
            num_inf = len(np.where(state==1)[0])
            num_safe = current_pop - num_inf
            num_inf_to_remove = extra_peeps // 2
            num_safe_to_remove = extra_peeps - num_inf_to_remove
            new_state = [1]*(num_inf - num_inf_to_remove) + [0]*(num_safe - num_safe_to_remove)
            pop_vector[state_id] = np.array(new_state)
    return pop_vector

def random_infection_kernel(pop_vector, p_inf):
    for state_id, state in enumerate(pop_vector):
        pop_vector[state_id] = np.random.binomial(1, p_inf, len(state))
    return pop_vector

Find p_die

In [5]:
pop_data = pd.read_csv(data_dir+population_fname)
pop_data.drop(['State', 'Abbr'], inplace=True, axis=1)
avg_pop = np.mean(pop_data.sum(axis=0) )

death_data = pd.read_csv(data_dir+deaths_fname).drop(['Week', 'Quarter'], axis=1)
death_data = death_data[~(death_data.Year <= 2009)]
death_data = death_data[~(death_data.Year >= 2019)]
deaths_avg = np.mean(death_data.groupby(['Year']).sum())

In [6]:
N = 50
p_inf = 0.05
p_transfer = 0.1
p_die = np.float64(deaths_avg*(0.125)/avg_pop) #0.01 # https://www.webmd.com/cold-and-flu/flu-statistics --> 5-20% of people get the flu each year
p_rec = 1-p_die
p_stay = 0
SIR = np.array([p_rec, p_stay, p_die], dtype=np.float64)

In [7]:
p_die

7.540044190323758e-05

In [8]:
all_pop = {}
years = [2009,2010,2011,2012,2013,2014,2015,2016,2017,2018]
for year in years:
    all_pop[year] = []


for state,data in population_dict.items():
    for year, pop in data.items():
        all_pop[year].append(pop)

In [9]:
# tmp = all_population
all_population = all_pop
# A_dict_temp = A_dict
A_dict = A

In [10]:
# Instantiate a random infection
pop_vec = []
year_0 = 2009
for state_id in range(N):
    pop_vec.append(np.random.binomial(1, p_inf, all_population[year_0][state_id]))

In [None]:
from time import time

# Propagate infection
num_deceased = {}

# For each year:
for year in years:
    t = time()
    num_deceased[year] = {}
    # For each quarter:
    for quarter in range(1,5):
        # 1. travel and infect
        # 2. Recover
        # 3. Remove deceased and store it
        # 4. Randomly infect
        
        if year == 2009 and quarter != 4:
            continue
        elif year == 2019 and quarter != 1:
            continue
            
        print(year, quarter)
#         print(pop_vec)
        
        pop_vec = [x.astype(int) for x in pop_vec]
        
        max_inf = int(np.max(A_dict[year][quarter]))
        approx_binomial = []
        for i in range(max_inf):
            approx_binomial.append(np.random.binomial(i, p_transfer, 30))

        
        pop_vec_new = copy(pop_vec)
        
        pop_vec = travel_and_infect_kernel(A_dict[year][quarter], pop_vec, pop_vec_new, p_transfer, approx_binomial=approx_binomial)
        print("travel_and_infect_kernel")
        
        pop_vec = recover_kernel(pop_vec, SIR)
        print("recover_kernel")

        pop_vec, dead_peeps = remove_deceased_kernel(pop_vec)
        print("remove_deceased_kernel")

        num_deceased[year][quarter] = dead_peeps
        pop_vec = random_infection_kernel(pop_vec, p_inf)
#         print(pop_vec)
    
    # Inject population
    try:
        pop_vec = inject_population_kernel(pop_vec, all_population[year+1])
    except KeyError:
        # We are at the end of our data
        pass
    
    t = time() - t
    print("Elapsed time for {}: {:.3f}".format(year, t))

2009 4
0 0.05002776412807397
10 0.049789228174887526
20 0.04998379007774779
30 0.05028078330637931


In [None]:
pprint(num_deceased)

In [None]:
pprint(all_population)

In [12]:
deceased_df = pd.DataFrame.from_records([[i, j] + num_deceased[i][j] for i in num_deceased for j in num_deceased[i]])

In [13]:
deceased_df.to_csv('deceased_df_real_data_2.csv')

In [None]:
A_dict[2009][4][9][8] * np.mean(pop_vec[9])

In [None]:
for i in range(len(pop_vec)):
    print(np.mean(pop_vec[i]))