In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx

from numba import jit, prange, jitclass
from numba.types import int32
from time import time
from pprint import pprint
from copy import deepcopy as copy


%matplotlib inline

## Read data files

**Note** We are keeping only the 50 states. DC not included. Self loops not included

In [2]:
def read_US_states(fname):
    states_abb_dict = {}
    states_abb_rev_dict = {}
    states_abb_ord_list = []
    with open(fname, "r") as f:
        for line in f:
            line = line.strip().split(",")
            name = line[0]
            abbr = line[1]
            states_abb_dict[name] = (abbr, len(states_abb_ord_list))
            states_abb_rev_dict[abbr] = name
            states_abb_ord_list.append(abbr)
    
    return states_abb_dict, states_abb_rev_dict, states_abb_ord_list

def read_travel_network(fname, states_abb_dict, states_abb_rev_dict, normalization=1000):
    num_states = 50

    adjacency_list = {}
    A = np.zeros((num_states, num_states))

    for abb in states_abb_rev_dict:
        adjacency_list[abb] = []

    with open(fname, "r") as f:
        for idx, line in enumerate(f):
            if idx == 0:
                continue
            line = line.strip().split(",")
            orig = line[0]
            dest = line[1]
            weight = float(line[2])*10 / normalization
            if orig == dest:
                continue
            try:
                orig_abb, orig_idx = states_abb_dict[orig]
                dest_abb, dest_idx = states_abb_dict[dest]
                adjacency_list[orig_abb].append((dest_abb, weight))
                A[orig_idx][dest_idx] = weight
            except KeyError:
                pass
    
    return adjacency_list, A

def read_deaths_data(fname):
    df = pd.read_csv(fname)
    sum_df_d = df.groupby(['State', 'Year', 'Quarter'])[['DeathsFromPneumoniaAndInfluenza']].sum().T.to_dict()
    deaths_dict = {}
    for key, deaths in sum_df_d.items():
        state, year, quarter = key
        if state not in deaths_dict:
            deaths_dict[state] = {}
        if year not in deaths_dict[state]:
            deaths_dict[state][year] = {}
        deaths_dict[state][year][quarter] = deaths["DeathsFromPneumoniaAndInfluenza"]
    return deaths_dict

def read_deaths_data_monthly(fname):
    df = pd.read_csv(fname)
    sum_df_d = df.groupby(['State', 'Year', 'Month'])[['DeathsFromPneumoniaAndInfluenza']].sum().T.to_dict()
    deaths_dict = {}
    for key, deaths in sum_df_d.items():
        state, year, quarter = key
        if state not in deaths_dict:
            deaths_dict[state] = {}
        if year not in deaths_dict[state]:
            deaths_dict[state][year] = {}
        deaths_dict[state][year][quarter] = deaths["DeathsFromPneumoniaAndInfluenza"]
    return deaths_dict

def read_population_dict(fname):
    population_dict = {}
    with open(fname, "r") as f:
        for idx, line in enumerate(f):
            if idx == 0:
                continue
            line = line.strip().split(",")
            state = line[1]
            population_dict[state] = {}
            year = 2009
            for idx, pop in enumerate(line[2:]):
                population_dict[state][year + idx] = int(pop)
    return population_dict

In [3]:
class State:
    def __init__(self, pop):
        self.num_total = pop
        self.num_infected = 0
        self.num_deceased = 0
    
    def infected_fraction(self):
        return self.num_infected / self.num_total

In [4]:
def travel_and_infect_kernel(A, states, p_transfer, verbose=False):
    N = A.shape[0]
    new_states = copy(states)
    
    for i in range(N):
        
        # Let X be number of successful infections.
        # X is Binomial(n, p_transfer) where n is number of travelers.
        # We want X >= 1 for each person at destination i.e.,
        # at least one successful infection for each person at destination.
        # So, calculate p' = P(X >= 1) = 1 - P(X = 0) = 1 - (1-p_transfer)^n.
        # p' is the probability a person at destination gets infected.
        # This reduces problem to calculate a new r.v. Y.
        # Y is Binomial(m, p'), where m is the uninfected population of destination
        
        infected_fraction = states[i].infected_fraction()
        travel_pop_inf = A[i, :] * infected_fraction
        ccdf = 1 - (1 - p_transfer)**travel_pop_inf
        
        for j in range(N):
            if i == j:
                continue
            dest = states[j]
            new_states[j].num_infected += np.random.binomial(dest.num_total - dest.num_infected, ccdf[j])
        
    return new_states

def recover_kernel(states, SIR_params):
    for i, state in enumerate(states):
        x = np.random.multinomial(states[i].num_infected, SIR_params)
        recovered = x[0]
        dead = x[2]
        states[i].num_infected -= (dead + recovered)
        states[i].num_deceased += dead
    return states

def remove_deceased_kernel(states):
    num_deceased = []
    for i, state in enumerate(states):
        num_deceased.append(state.num_deceased)
        state.num_total = max(state.num_total - state.num_deceased, 0)
        state.num_deceased = 0
        states[i] = state
    return states, num_deceased
    
def inject_population_kernel(states, new_population):
    # new_population is somehow ordered
    for i, state in enumerate(states):
        current_pop = state.num_total
        nextgen_pop = new_population[state_id]
        extra_peeps = nextgen_pop - current_pop
        if extra_peeps > 0:
            state.num_total += extra_peeps
        elif extra_peeps < 0:
            # population actually decreased... remove infected and susceptible at same rate
            extra_peeps = -extra_peeps
            if extra_peeps // 2 >= state.num_infected:
                state.num_total = state.num_total - (extra_peeps - state.num_infected)
                state.num_infected = 0
            else:
                state.num_infected -= (extra_peeps - extra_peeps//2)
                state.num_total -= extra_peeps
        states[i] = state
    return states

def random_infection_kernel(states, p_inf):
    for i, state in enumerate(states):
        if state.num_total < state.num_infected:
            continue
        infected = np.random.binomial(state.num_total - state.num_infected, p_inf)
        state.num_infected += infected
        states[i] = state
    return states

In [5]:
data_dir = "../Data/Clean/"
deaths_fname = "deaths_NCHS_processed.csv"
population_fname = "population.csv"

states_abb_dict, states_abb_rev_dict, stats_abb_ord_list = read_US_states(data_dir + "states_abb.csv")
deaths_dict = read_deaths_data_monthly(data_dir + deaths_fname)
population_dict = read_population_dict(data_dir + population_fname)

quarter_to_month = {1:[1,2,3],
                    2:[4,5,6],
                    3:[7,8,9],
                    4:[10,11,12]
                   }


adj_list = {}
A = {}
A[2009] = {}
A[2009][4] = {}
for i in range(10,13):
    A[2009][4][i] = read_travel_network(data_dir + "2009_Q4.csv", states_abb_dict, states_abb_rev_dict)[1]

for year in range(2010, 2019):
    A[year] = {}
    for quarter in range(1, 5):
        A[year][quarter] = {}
        for month in quarter_to_month[quarter]:
            network_fname = str(year) + "_Q" + str(quarter) + ".csv"
            A[year][quarter][month] = read_travel_network(data_dir + network_fname, states_abb_dict, states_abb_rev_dict)[1]
A[2019] = {}
A[2019][1] = {}
for i in range(1,4):
    A[2019][1][i] = read_travel_network(data_dir + "2019_Q1.csv", states_abb_dict, states_abb_rev_dict)[1]

all_population = {}
years = [2009,2010,2011,2012,2013,2014,2015,2016,2017,2018]
for year in years:
    all_population[year] = []


for state,data in population_dict.items():
    for year, pop in data.items():
        all_population[year].append(pop)
        
A_dict = A

In [6]:
deaths_dict

{'AK': {2009: {10: 17.0, 11: 22.0, 12: 20.0},
  2010: {1: 13.0,
   2: 20.0,
   3: 23.0,
   4: 22.0,
   5: 21.0,
   6: 18.0,
   7: 12.0,
   8: 15.0,
   9: 17.0,
   10: 26.0,
   11: 18.0,
   12: 21.0},
  2011: {1: 18.0,
   2: 27.0,
   3: 33.0,
   4: 17.0,
   5: 18.0,
   6: 22.0,
   7: 26.0,
   8: 19.0,
   9: 20.0,
   10: 22.0,
   11: 17.0,
   12: 21.0},
  2012: {1: 24.0,
   2: 24.0,
   3: 20.0,
   4: 15.0,
   5: 14.0,
   6: 12.0,
   7: 13.0,
   8: 16.0,
   9: 18.0,
   10: 23.0,
   11: 18.0,
   12: 25.0},
  2013: {1: 24.0,
   2: 23.0,
   3: 22.0,
   4: 7.0,
   5: 15.0,
   6: 23.0,
   7: 16.0,
   8: 23.0,
   9: 28.0,
   10: 17.0,
   11: 27.0,
   12: 23.0},
  2014: {1: 27.0,
   2: 17.0,
   3: 21.0,
   4: 16.0,
   5: 26.0,
   6: 25.0,
   7: 13.0,
   8: 19.0,
   9: 17.0,
   10: 24.0,
   11: 36.0,
   12: 24.0},
  2015: {1: 22.0,
   2: 21.0,
   3: 19.0,
   4: 17.0,
   5: 29.0,
   6: 10.0,
   7: 10.0,
   8: 24.0,
   9: 21.0,
   10: 21.0,
   11: 22.0,
   12: 18.0},
  2016: {1: 20.0,
   2: 23.0,
 

In [7]:
# Fix parameters of the model
p_inf = 0.1
p_transfer = 0.1
p_rec = 0.99
p_die = 7.540044190323758e-05
p_stay = 1 - p_rec - p_die
SIR = [p_rec, p_stay, p_die]

In [46]:
sim_time = time()

states = []
year_0 = 2009
for state_id in range(50):
    state = State(all_population[year_0][state_id])
    state.num_infected += np.random.binomial(state.num_total, p_inf)
    states.append(state)
    
# Propagate infection
num_deceased = {}

# For each year:
for year in years:
    iter_time = time()
    num_deceased[year] = {}
    # For each quarter:
    for quarter in range(1, 5):
        if year == 2009 and quarter != 4:
            continue
        if year == 2019 and quarter != 1:
            continue
        num_deceased[year][quarter] = {}
        for month in quarter_to_month[quarter]:
            # 1. travel and infect
            # 2. Recover
            # 3. Remove deceased and store it
            # 4. Randomly infect
            states = travel_and_infect_kernel(A_dict[year][quarter][month], states, p_transfer)
            states = recover_kernel(states, SIR)
            states, dead_peeps = remove_deceased_kernel(states)
            num_deceased[year][quarter][month] = dead_peeps
            states = random_infection_kernel(states, p_inf)
    
    # Inject population
    try:
        pop_vec = inject_population_kernel(states, all_population[year+1])
    except KeyError:
        # We are at the end of our data
        pass
    
    iter_time = time() - iter_time
    print("Elapsed time for {}: {:.3f} s".format(year, iter_time))

sim_time = time() - sim_time
print("Total elapsed time for simulation: {:.3f} s".format(sim_time))

Elapsed time for 2009: 0.032 s
Elapsed time for 2010: 0.129 s
Elapsed time for 2011: 0.128 s
Elapsed time for 2012: 0.124 s
Elapsed time for 2013: 0.130 s
Elapsed time for 2014: 0.128 s
Elapsed time for 2015: 0.128 s
Elapsed time for 2016: 0.123 s
Elapsed time for 2017: 0.131 s
Elapsed time for 2018: 0.130 s
Total elapsed time for simulation: 1.184 s


In [47]:
num_deceased = pd.DataFrame.from_records([[i, j, k] + num_deceased[i][j][k]
                                          for i in num_deceased
                                          for j in num_deceased[i]
                                          for k in num_deceased[i][j]])
num_deceased.to_csv('deceased_df_real_data_monthly.csv')


# Plot bar plots

In [48]:
state_abbr = ['AK', 'AL', 'AZ', 'AR', 'CA',
              'CO', 'CT', 'DE', 'FL', 'GA',
              'HI', 'ID', 'IL', 'IN', 'IA',
              'KS', 'KY', 'LA', 'ME', 'MD',
              'MA', 'MI', 'MN', 'MS', 'MO',
              'MT', 'NE', 'NV', 'NH', 'NJ',
              'NM', 'NY', 'NC', 'ND', 'OH',
              'OK', 'OR', 'PA', 'RI', 'SC',
              'SD', 'TN', 'TX', 'UT', 'VT',
              'VA', 'WA', 'WV', 'WI', 'WY'
             ]

In [58]:
deaths_true = pd.read_csv('../Data/Clean/deaths_NCHS_processed.csv')
deaths_monthly = pd.read_csv('deceased_df_real_data_monthly.csv', names=['unnamed', 'Year', 'Quarter', 'Month']+state_abbr)[1:]
deaths_monthly.drop('unnamed', axis=1, inplace=True)


In [59]:
grouped = deaths_true.groupby(['Year', 'Month']).sum()

In [60]:
grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,DeathsFromPneumoniaAndInfluenza,Week,Quarter
Year,Month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2009,10,3273295,19803.0,10920,1040
2009,11,2619608,15862.0,9672,832
2009,12,2620472,16414.0,10504,832
2010,1,3276805,21530.0,780,260
2010,2,2622416,17116.0,1560,208
2010,3,2623280,16792.0,2392,208
2010,4,2624144,14930.0,3224,416
2010,5,3281395,16929.0,5200,520
2010,6,2626088,12775.0,5096,416
2010,7,3283825,15423.0,7540,780


In [61]:
deaths_monthly['total_deaths'] = deaths_monthly.drop(['Year', 'Month'], axis=1).sum(axis=1)

In [62]:
deaths_monthly

Unnamed: 0,Year,Quarter,Month,AK,AL,AZ,AR,CA,CO,CT,...,TN,TX,UT,VT,VA,WA,WV,WI,WY,total_deaths
1,2009,4,10,1497,103,8465,649,59049,8004,1197,...,6377,49384,2362,69,9900,6503,123,3501,34,337383
2,2009,4,11,2499,123,9217,1162,52313,7818,2162,...,7699,40618,3041,123,10596,7511,284,5289,63,346958
3,2009,4,12,2466,142,9032,1105,55042,8174,2009,...,7764,43927,2976,133,10926,7601,262,5183,64,358584
4,2010,1,1,23,4,256,9,1591,280,49,...,180,1231,116,21,394,219,1,184,7,10496
5,2010,1,2,428,67,3119,203,16266,3279,392,...,2001,15776,1034,51,3247,1973,53,1144,49,106963
6,2010,1,3,632,111,3181,334,15043,3058,659,...,2449,13469,1282,88,3410,2226,85,1502,77,109177
7,2010,2,4,711,199,3228,443,16333,3397,771,...,2610,14963,1342,100,3707,2634,139,1673,69,120328
8,2010,2,5,714,148,3320,407,16027,3421,700,...,2632,14405,1392,130,3665,2479,153,1788,68,118823
9,2010,2,6,733,204,3305,400,16261,3335,715,...,2627,14701,1279,115,3780,2562,147,1703,75,119545
10,2010,3,7,720,234,3273,422,16351,3477,814,...,2677,14785,1394,121,3757,2722,146,1757,117,121284


In [None]:
plt.figure(figsize=[20,16])
ind = np.arange(9*12+3)
width = 0.5
# p1 = plt.bar(ind, deaths_monthly['total_deaths'], width,
#              bottom=deaths_monthly['total_deaths'], color='grey')
p2 = plt.bar(ind, deaths_monthly['total_deaths'], width, color='grey')
#              bottom=menMeans, yerr=womenStd)
p3 = plt.bar(ind, grouped['DeathsFromPneumoniaAndInfluenza'][:-3], width,
             bottom=deaths_monthly['total_deaths'], color='red')

plt.xticks(np.linspace(0,110, 37),
           ('2009 Oct', '2009 Nov', '2009 Dec',
            #'2010 Jan', '2010 Feb', '2010 Mar',
            '2010 Apr', '2010 May', '2010 Jun',
            #'2010 Jul', '2010 Aug', '2010 Sep',
            '2010 Oct', '2010 Nov', '2010 Dec',
            #'2011 Jan', '2011 Feb', '2011 Mar',
            '2011 Apr', '2011 May', '2011 Jun',
            #'2011 Jul', '2011 Aug', '2011 Sep',
            '2011 Oct', '2011 Nov', '2011 Dec',
            #'2012 Jan', '2012 Feb', '2012 Mar',
            '2012 Apr', '2012 May', '2012 Jun',
            #'2012 Jul', '2012 Aug', '2012 Sep',
            '2012 Oct', '2012 Nov', '2012 Dec',
            #'2013 Jan', '2013 Feb', '2013 Mar',
            '2013 Apr', '2013 May', '2013 Jun',
            #'2013 Jul', '2013 Aug', '2013 Sep',
            '2013 Oct', '2013 Nov', '2013 Dec',
            #'2014 Jan', '2014 Feb', '2014 Mar',
            '2014 Apr', '2014 May', '2014 Jun',
            #'2014 Jul', '2014 Aug', '2014 Sep',
            '2014 Oct', '2014 Nov', '2014 Dec',
            #'2015 Jan', '2015 Feb', '2015 Mar',
            '2015 Apr', '2015 May', '2015 Jun',
            #'2015 Jul', '2015 Aug', '2015 Sep',
            '2015 Oct', '2015 Nov', '2015 Dec',
            #'2016 Jan', '2016 Feb', '2016 Mar',
            '2016 Apr', '2016 May', '2016 Jun',
            #'2016 Jul', '2016 Aug', '2016 Sep',
            '2016 Oct', '2016 Nov', '2016 Dec',
            #'2017 Jan', '2017 Feb', '2017 Mar',
            '2017 Apr', '2017 May', '2017 Jun',
            #'2017 Jul', '2017 Aug', '2017 Sep',
            '2017 Oct', '2017 Nov', '2017 Dec',
            #'2018 Jan', '2018 Feb', '2018 Mar',
            '2018 Apr', '2018 May', '2018 Jun',
            #'2018 Jul', '2018 Aug', '2018 Sep',
            '2018 Oct', '2018 Nov', '2018 Dec'
            ), rotation=90)
plt.legend((p2[0], p3[0]), ('Model monthly with travel', 'True deaths'), fontsize='12')
plt.xticks(size=12)
plt.yticks(size=12)
plt.xlabel("Year, Month", fontsize=18)
plt.ylabel("Number of Deaths", fontsize=18)
plt.title("Total number of deaths per month", fontsize=22)

Text(0.5, 1.0, 'Total number of deaths per month')