In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx

from pprint import pprint
from copy import deepcopy as copy

%matplotlib inline

## Read data files

**Note** We are keeping only the 50 states. DC not included. Self loops not included

In [2]:
def read_US_states(fname):
    states_abb_dict = {}
    states_abb_rev_dict = {}
    states_abb_ord_list = []
    with open(fname, "r") as f:
        for line in f:
            line = line.strip().split(",")
            name = line[0]
            abbr = line[1]
            states_abb_dict[name] = (abbr, len(states_abb_ord_list))
            states_abb_rev_dict[abbr] = name
            states_abb_ord_list.append(abbr)
    
    return states_abb_dict, states_abb_rev_dict, states_abb_ord_list

def read_travel_network(fname, states_abb_dict, states_abb_rev_dict, normalization=1000):
    num_states = 50

    adjacency_list = {}
    A = np.zeros((num_states, num_states))

    for abb in states_abb_rev_dict:
        adjacency_list[abb] = []

    with open(fname, "r") as f:
        for idx, line in enumerate(f):
            if idx == 0:
                continue
            line = line.strip().split(",")
            orig = line[0]
            dest = line[1]
            weight = float(line[2]) / normalization
            if orig == dest:
                continue
            try:
                orig_abb, orig_idx = states_abb_dict[orig]
                dest_abb, dest_idx = states_abb_dict[dest]
                adjacency_list[orig_abb].append((dest_abb, weight))
                A[orig_idx][dest_idx] = weight
            except KeyError:
                pass
    
    return adjacency_list, A

def read_deaths_data(fname):
    df = pd.read_csv(fname)
    sum_df_d = df.groupby(['State', 'Year', 'Quarter'])[['DeathsFromPneumoniaAndInfluenza']].sum().T.to_dict()
    deaths_dict = {}
    for key, deaths in sum_df_d.items():
        state, year, quarter = key
        if state not in deaths_dict:
            deaths_dict[state] = {}
        if year not in deaths_dict[state]:
            deaths_dict[state][year] = {}
        deaths_dict[state][year][quarter] = deaths["DeathsFromPneumoniaAndInfluenza"]
    return deaths_dict

def read_population_dict(fname):
    population_dict = {}
    with open(fname, "r") as f:
        for idx, line in enumerate(f):
            if idx == 0:
                continue
            line = line.strip().split(",")
            state = line[1]
            population_dict[state] = {}
            year = 2009
            for idx, pop in enumerate(line[2:]):
                population_dict[state][year + idx] = int(pop)
    return population_dict

In [3]:
data_dir = "../Data/Clean/"
deaths_fname = "deaths_NCHS_processed.csv"
population_fname = "population.csv"

states_abb_dict, states_abb_rev_dict, stats_abb_ord_list = read_US_states(data_dir + "states_abb.csv")
deaths_dict = read_deaths_data(data_dir + deaths_fname)
population_dict = read_population_dict(data_dir + population_fname)

adj_list = {}
A = {}
A[2009] = {}
A[2009][4] = read_travel_network(data_dir + "2009_Q4.csv", states_abb_dict, states_abb_rev_dict)
for year in range(2010, 2019):
    A[year] = {}
    for quarter in range(1, 5):
        network_fname = str(year) + "_Q" + str(quarter) + ".csv"
        A[year][quarter] = read_travel_network(data_dir + network_fname, states_abb_dict, states_abb_rev_dict)
A[2019] = {}
A[2019][1] = read_travel_network(data_dir + "2019_Q1.csv", states_abb_dict, states_abb_rev_dict)

## Simulation


1. Randomly infect people in each state with probability `p_inf`
    - Calculate fraction of infected people
2. Air travel the people
    - Fraction of infected air travelers is the same as origin state
    - Each person has a chance to infect every other person in the new state with probability `p_transfer`
3. In each state, either recover, kill, or remain infected the people there with probability `p_rec, p_die, p_stay`
4. Increment, or decrement, population between years (not quarters)

Totally, there are 4 parameters: `p_inf`, `p_transfer`, `p_rec`, `p_die`

Note `p_stay = 1 - p_rec - p_die`

Can make into a 3 parameter model by setting `p_stay = 0` i.e., between quarters, people either heal completely, or die.

### Goal: Find the best parameters that represent the number of deaths observed

### Kernels for simulation

In [4]:
def travel_and_infect_kernel(A, pop_vector, p_transfer, verbose=False):
    N = A.shape[0]
    for orig in range(N):
        infected_fraction = np.mean(pop_vector[orig])
        for dest in range(N):
            if orig == dest:
                continue
            
            # Travel the people
            travel_pop_all = A[orig, dest]
            travel_pop_inf = int(travel_pop_all * infected_fraction)
            
            # Infect people at destination
            if travel_pop_inf == 0:
                if verbose:
                    print("{} -> {}: not infecting".format(orig, dest))
                continue
            for person_id, person in enumerate(pop_vector[dest]):
                if np.random.binomial(travel_pop_inf, p_transfer) > 0:
                    pop_vector[dest][person_id] = 1
        
    return pop_vector

def recover_kernel(pop_vector, SIR_params):
    for state_id, state in enumerate(pop_vector):
        for person_id, person in enumerate(state):
            if person == 0:
                continue
            pop_vector[state_id][person_id] = np.random.choice([0, 1, 2], p=SIR_params)
    return pop_vector

def remove_deceased_kernel(pop_vector):
    num_deceased = []
    for state_id, state in enumerate(pop_vector):
        num_inf = len(np.where(state==1)[0])
        num_dead = len(np.where(state==2)[0])
        num_tot = len(state)
        new_pop = num_tot - num_dead
        if new_pop > 0:
            pop_vector[state_id] = np.array([1]*num_inf + [0]*(new_pop-num_inf))
        else:
            pop_vector[state_id] = np.array([])
        num_deceased.append(num_dead)
    return pop_vector, num_deceased
    
def inject_population_kernel(pop_vector, new_population):
    # new_population is somehow ordered
    for state_id, state in enumerate(pop_vector):
        current_pop = len(state)
        nextgen_pop = new_population[state_id]
        extra_peeps = nextgen_pop - current_pop
        if extra_peeps > 0:
            pop_vector[state_id] = np.concatenate((state, np.zeros(extra_peeps)))
        elif extra_peeps < 0:
            # population actually decreased... remove infected and susceptible at same rate
            extra_peeps = -extra_peeps
            num_inf = len(np.where(state==1)[0])
            num_safe = current_pop - num_inf
            num_inf_to_remove = extra_peeps // 2
            num_safe_to_remove = extra_peeps - num_inf_to_remove
            new_state = [1]*(num_inf - num_inf_to_remove) + [0]*(num_safe - num_safe_to_remove)
            pop_vector[state_id] = np.array(new_state)
    return pop_vector

def random_infection_kernel(pop_vector, p_inf):
    for state_id, state in enumerate(pop_vector):
        pop_vector[state_id] = np.random.binomial(1, p_inf, len(state))
    return pop_vector

### Simple 5 x 5 model

In [5]:
# Fix parameters of the model
p_inf = 0.1
p_transfer = 0.1
p_rec = 0.99
p_die = 0.01
p_stay = 0
SIR = [p_rec, p_stay, p_die]

# Fix population and travel networks
N = 5
num_years = 3

A_dict = {}
for year in range(num_years):    
    A_dict[year] = {}
    for quarter in range(4):
        A = np.random.randint(1, 20, (N, N))
        for i in range(N):
            A[i, i] = 0
        A_dict[year][quarter] = A

all_population = {}
epsilon_plus = 50
epsilon_minus = -10
for idx, year in enumerate(range(num_years)):
    all_population[year] = []
    if idx == 0:
        for state_id in range(N):
            all_population[year].append(np.random.randint(10, 25) * 10)
    else:
        for state_id in range(N):
            all_population[year].append(all_population[year-1][state_id] +
                                        np.random.randint(epsilon_minus, epsilon_plus))

In [7]:
A_dict

{0: {0: array([[ 0,  1,  9, 19, 14],
         [10,  0,  9, 10,  8],
         [ 9,  5,  0, 10, 18],
         [11, 11, 13,  0, 10],
         [ 7,  7,  1,  8,  0]]), 1: array([[ 0, 14,  3, 17, 19],
         [ 3,  0,  8, 10, 15],
         [16, 10,  0, 11, 18],
         [ 8, 18, 10,  0, 16],
         [ 9, 18, 14, 10,  0]]), 2: array([[ 0,  5,  3, 16, 15],
         [ 6,  0, 12,  9,  8],
         [14, 11,  0,  2,  1],
         [16, 16,  8,  0,  9],
         [13, 18,  2,  5,  0]]), 3: array([[ 0, 19, 10,  5, 15],
         [12,  0, 17,  4, 11],
         [ 7,  7,  0,  6,  5],
         [14,  8,  4,  0,  2],
         [ 2,  3, 13,  8,  0]])}, 1: {0: array([[ 0,  7,  4, 17,  8],
         [ 8,  0, 16,  5,  4],
         [ 5, 13,  0,  6, 16],
         [19,  9,  9,  0,  4],
         [12,  2, 14, 15,  0]]), 1: array([[ 0,  1,  9, 18,  6],
         [14,  0, 14,  4, 19],
         [ 2, 16,  0, 14, 13],
         [15,  7,  2,  0, 15],
         [10,  8,  6,  2,  0]]), 2: array([[ 0,  5,  6,  5, 12],
         [

In [None]:
# Instantiate a random infection
pop_vec = []
year_0 = 0
for state_id in range(N):
    pop_vec.append(np.random.binomial(1, p_inf, all_population[year_0][state_id]))

# Propagate infection
num_deceased = {}

# For each year:
for year in range(num_years):
    num_deceased[year] = {}
    # For each quarter:
    for quarter in range(4):
        # 1. travel and infect
        # 2. Recover
        # 3. Remove deceased and store it
        # 4. Randomly infect
        pop_vec = travel_and_infect_kernel(A_dict[year][quarter], pop_vec, p_transfer)
        pop_vec = recover_kernel(pop_vec, SIR)
        pop_vec, dead_peeps = remove_deceased_kernel(pop_vec)
        num_deceased[year][quarter] = dead_peeps
        pop_vec = random_infection_kernel(pop_vec, p_inf)
    
    # Inject population
    try:
        pop_vec = inject_population_kernel(pop_vec, all_population[year+1])
    except KeyError:
        # We are at the end of our data
        pass

In [None]:
pprint(num_deceased)
pprint(all_population)
