### Runs an extended Kalman filter using IHME SEIIR predictions along with measurement data of confirmed Covid-19 case counts (from New York Times data) and CMU/Facebook symptom data (from Covid-19 Symptom Challenge) to generate updated 7-day predictions of case counts for counties in New York State and Florida.

Developed by the University of Washington team of Les Atlas, Abraham Flaxman and Michael Rhoads.

S - Susceptible
E - Exposed
I1 - Presymptomatic
I2 - Symptomatic
R - Recovered

In [23]:
#%load_ext autoreload
#%autoreload

In [24]:
# %reset

In [25]:
import math
import datetime

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

import data_sets
import seiir_compartmental

In [26]:
# functions to support the Kalman filtering
def get_predicts_prior(day, seiir):
    x_hat = np.array([[seiir['S'].loc[day]],
                      [seiir['E'].loc[day]],
                      [seiir['I1'].loc[day]],
                      [seiir['I2'].loc[day]],
                      [seiir['R'].loc[day]]])

    beta_k = seiir['beta'].loc[day]

    return x_hat, beta_k


def step_seiir(x_hat, constants, beta_k, days=7):
    s_dict = {'S': x_hat[0, 0],
              'E': x_hat[1, 0],
              'I1': x_hat[2, 0],
              'I2': x_hat[3, 0],
              'R': x_hat[4, 0]}

    s = pd.Series(s_dict)

    for i in range(days):
        infectious = s.loc['I1'] + s.loc['I2']
        s = seiir_compartmental.compartmental_covid_step(s, s.sum(),
                                                         infectious,
                                                         constants['alpha'],
                                                         beta_k,
                                                         constants['gamma1'],
                                                         constants['gamma2'],
                                                         constants['sigma'],
                                                         constants['theta'])
    x_hat_future_prior = np.array([[s.loc['S']],
                                   [s.loc['E']],
                                   [s.loc['I1']],
                                   [s.loc['I2']],
                                   [s.loc['R']]])

    return x_hat_future_prior


def predict_step(x_hat_k1_prior, P, Q, beta_k, constants):
    S = x_hat_k1_prior[0, 0]
    E = x_hat_k1_prior[1, 0]
    I1 = x_hat_k1_prior[2, 0]
    I2 = x_hat_k1_prior[3, 0]
    R = x_hat_k1_prior[4, 0]
    N = S + E + I1 + I2 + R
    alpha = constants['alpha']
    sigma = constants['sigma']
    gamma1 = constants['gamma1']
    gamma2 = constants['gamma2']

    part_f_S = np.array([[-beta_k * math.pow(I1 + I2, alpha) / N],
                         [beta_k * math.pow(I1 + I2, alpha) / N],
                         [0],
                         [0],
                         [0]])

    part_f_E = np.array([[0],
                         [-sigma],
                         [sigma],
                         [0],
                         [0]])

    part_f_I1 = np.array([[-alpha * beta_k * S * math.pow(I1+I2, alpha-1) / N],
                          [alpha * beta_k * S * math.pow(I1+I2, alpha-1) / N],
                          [-gamma1],
                          [gamma1],
                          [0]])

    part_f_I2 = np.array([[-alpha * beta_k * S * math.pow(I1+I2, alpha-1) / N],
                          [alpha * beta_k * S * math.pow(I1+I2, alpha-1) / N],
                          [0],
                          [-gamma2],
                          [gamma2]])
    
    part_f_R = np.array([[0],
                         [0],
                         [0],
                         [0],
                         [0]])

    # 5x5
    f_jacob = np.concatenate([part_f_S, part_f_E, part_f_I1, part_f_I2,
                              part_f_R], axis=1)


    # 5x5
    # P_prior = f_jacob * P * f_jacob^T + Q
    P_prior = np.matmul(np.matmul(f_jacob, P), f_jacob.T) + Q
    
    return P_prior


def update_step(x_hat, x_hat_k1, P_prior, Rn, rho1, rho2, z_k, measure):
    # 5x5
    ep = 10**-8
    
    # if there is no survey measurement used, then rho1 is not needed
    if measure is None:
        H = np.array([[ep, 0, 0, 0, 0],
                      [0, ep, 0, 0, 0],
                      [0, 0, ep, 0, 0],
                      [0, 0, 0, rho2, 0],
                      [0, 0, 0, 0, ep]])
    else:
                
        H = np.array([[ep, 0, 0, 0, 0],
                      [0, ep, 0, 0, 0],
                      [0, 0, ep, rho1, 0],
                      [0, 0, 0, rho2, 0],
                      [0, 0, 0, 0, ep]])
    
    
    # Si = H * P_prior * H^T + Rn
    Si = np.matmul(np.matmul(H, P_prior), H.T) + Rn

    # K = P_prior * H.T (H * P_prior * H.T + R)^-1
    # K_new = P_prior * H^T * Si^(-1)
    K_new = np.matmul(np.matmul(P_prior, H.T), np.linalg.inv(Si))
    
    y_new = np.matmul(H, x_hat)

    # 5x1
    diff = z_k - y_new

    x_hat_k1_post = x_hat_k1 + np.matmul(K_new, diff)

    P_post = P_prior - np.matmul(np.matmul(K_new, Si), K_new.T)


    return x_hat_k1_post, P_post, diff, K_new[3, 3]


def create_data_sets():
    seiir_fl = pd.read_csv(r'data/seiir_projections/florida_proj.csv', header=0,
                            index_col='date', parse_dates=True)

    seiir_ny = pd.read_csv(r'data/seiir_projections/new_york_proj.csv', header=0,
                            index_col='date', parse_dates=True)
    
    smell_data = data_sets.create_symptom_df()
    
    case_data = data_sets.create_case_df_county()
    
    return seiir_fl, seiir_ny, smell_data, case_data


def get_smell_data(fips, fb_data):

    if fips == 'New York City':
        nyc_fips = ['36005', '36061', '36047', '36085']
        fb_data_geo = fb_data.loc[(slice(None), '36081'), :].copy()
        fb_data_geo = fb_data_geo.mean(level='date')

        for borough in nyc_fips:
            fb_data_geo += fb_data.loc[(slice(None), borough), :].copy().mean(level='date')

    else:
        fb_data_geo = fb_data.loc[(slice(None), fips), :].copy()

        # collapse down to a single index column (date)
        fb_data_geo.index = fb_data_geo.index.droplevel([0, 1])

    return fb_data_geo
    


def calc_fb_ma7(fb_data):
    """
    Returns a Pandas series
    """
    # the fb_data is a DataFrame while the case_data is a Series
    fb_ma7 = fb_data.rolling(window=7).mean()
    fb_ma7 = fb_ma7.iloc[6:, :]
    prop_ma7 = fb_ma7['num_stl'].div(fb_ma7['n'])

    return prop_ma7, fb_ma7['n'].copy(), fb_ma7['num_stl'].copy()


def calc_mse(prediction, actual):
    """
    Inputs should be two Pandas Series of same length.
    Outputs a float.
    """
    err = prediction - actual
    sum_sq_err = (err**2).sum()
    mse = sum_sq_err / err.count()
    return mse


def create_hh_data():
    # bring in household covid symptom data
    full_data = pd.read_csv(r'data/from_challenge/overall-county.csv', header=0, dtype={'fips': 'str', 'pct_hh_cli': 'float64'},
                            parse_dates=[0])
    time_start = full_data['date'].min()
    time_end = full_data['date'].max()
    full_data.set_index(['fips', 'date'], inplace=True)
    full_data.sort_index(inplace=True)

    # derive count of survey respondents with household members having covid symptoms
    full_data['num_hh_cli'] = full_data['n'].mul(full_data['pct_hh_cli']/100.).round()
    full_data['num_hh_cli'] = full_data['num_hh_cli'].astype('int64')

    # group by county and date
    data_of_interest = full_data[['n', 'num_hh_cli']].copy().groupby(level=(0, 1)).sum()
    idx = pd.IndexSlice

    # create full date range
    date_rng = pd.date_range(time_start, time_end)
    iterables = [data_of_interest.index.levels[0], date_rng]
    new_index = pd.MultiIndex.from_product(iterables, names=['fips', 'date'])

    data_of_interest = data_of_interest.reindex(index=new_index)
    # this will have NaN values in the new index entries for which there was no
    # previous data -- fill them upon extracting a particular county
    
    return data_of_interest


def create_county_lists():
    case_data = data_sets.create_case_df_county()
    ny_counties = case_data[case_data['state'] == 'New York']['county'].unique()
    fl_counties = case_data[case_data['state'] == 'Florida']['county'].unique()
    return ny_counties, fl_counties

In [27]:
# create data sets

seiir_fl, seiir_ny, smell_data, case_data = create_data_sets()
hh_data = create_hh_data()

In [28]:
seiir_ny.index[-1]


Timestamp('2021-12-31 00:00:00')

In [29]:

def run_county(the_state, county_name, seiir, smell_data, hh_data, case_data, measure='smell', K0=datetime.date(2020, 4, 18), delay=6):
    """
    The main function that calculates the forecast for a given county over the entire time range
    Arguments:
        measure -- potential values are: 'smell', 'hh', or None
    """
    # set constants
    #K0 = datetime.date(2020, 4, 12)

    if county_name == 'New York City':
        the_county = county_name
    else:
        the_county = data_sets.get_fips(the_state, county_name)

    constants = {
        'alpha': 0.948786,
        'gamma1': 0.500000,
        'gamma2': 0.662215,
        'sigma': 0.266635,
        'theta': 6.000000
        }

    # set initial values for Kalman filter parameters
    P_mult = 1
    Q_mult = 1

    # Rn is the R noise covariance matrix; it remains constant thru the stepping of the
    # Kalman filter
    Rn_mult = 5*10**-8

    Rn_22 = 10000
    Rn_32 = 1000

    Rn_23 = 1000
    Rn_33 = 100

    Rn = Rn_mult * np.array([[0, 0, 0, 0, 0],
                             [0, 0, 0, 0, 0],
                             [0, 0, Rn_22, Rn_23, 0],
                             [0, 0, Rn_32, Rn_33, 0],
                             [0, 0, 0, 0, 0]])

    Q = Q_mult * np.eye(5)
    P = P_mult * np.eye(5)
    
    if the_county == 'New York City':
        county_pop = 0
        for each in ['36081', '36005', '36061', '36047', '36085']:
            this_count, state_pop = data_sets.get_pops(each)
            county_pop += this_count
    else:
        county_pop, state_pop = data_sets.get_pops(the_county)

    b = county_pop / state_pop

    # generate data
    case_data_geo = case_data.loc[the_county]['case_rate'].copy()
    smell_data_geo = get_smell_data(the_county, smell_data)
    

    if measure == 'hh':
        idx = pd.IndexSlice
        
        if the_county == 'New York City':
            nyc_fips = ['36005', '36061', '36047', '36085']
            hh_cli = hh_data.loc[idx['36081', :], :].loc['36081'].copy()
            hh_cli.fillna(method='pad', inplace=True)

            for borough in nyc_fips:
                bor_hh_cli = hh_data.loc[idx[borough, :], :].loc[borough].copy()
                bor_hh_cli.fillna(method='pad', inplace=True)
                hh_cli += bor_hh_cli

        else:
            hh_cli = hh_data.loc[idx[the_county, :], :].loc[the_county].copy()
            hh_cli.fillna(method='pad', inplace=True)

        # calculate moving averages on the fb and case data
        hh_cli_ma7 = hh_cli.rolling(window=7).mean()
        hh_cli_ma7 = hh_cli_ma7.iloc[6:, :]
        num_survey_ma7 = hh_cli_ma7['num_hh_cli']
        prop_cli_ma7 = num_survey_ma7.div(hh_cli_ma7['n'])
        
    elif measure == 'smell':
        prop_ma7, n_ma7, num_survey_ma7 = calc_fb_ma7(smell_data_geo)
        
    else:
        num_survey_ma7 = None


    case_ma7 = case_data_geo.rolling(window=7).mean()
    case_ma7_all = case_ma7.iloc[6:]
    

    # get starting compartment values for the state level
    x_hat_state_k0, beta_k0 = get_predicts_prior(K0, seiir)

    # convert to the county level
    x_hat_k0 = b * x_hat_state_k0

    I2_county = x_hat_k0[3, 0]


    rho1 = .05
    rho2 = case_ma7_all.loc['2020-04-12'] / I2_county


    # create empty dictionaries to hold the estimated values
    case_est = {}
    seiir_pred = {}

    diff_rat = {}

    K_val_dict = {}


    # Original data run ----------------
    start = K0
    k = start

    while k <= datetime.date(2020, 12, 8):    

    # each cycle of the while loop executes a step

        # get state level compartments
        x_hat_state_k, beta_k = get_predicts_prior(k, seiir)

        # step the state level compartments 1 day forward
        x_hat_state_k1 = step_seiir(x_hat_state_k, constants, beta_k, days=1)
        P = predict_step(x_hat_state_k, P, Q, beta_k, constants)
        
        # then 6 more
        P_est = P.copy()
        for i in range(6):
            x_hat_est = step_seiir(x_hat_state_k1, constants, beta_k, days=1)
            P_est = predict_step(x_hat_state_k1, P_est, Q, beta_k, constants)
            x_hat_state_k1 = x_hat_est.copy()

        # convert the state level compartments to county level values
        x_hat_k = b * x_hat_state_k
        x_hat_k1 = b * x_hat_state_k1

        
        indexDate = k + datetime.timedelta(days=7)
        # store seiir prediction before it's modified by Kalman filter
        # multiply by rho2 to get values equivalent to confirmed case count
        seiir_pred[indexDate] = rho2 * x_hat_k1[3, 0]

        # get measurements for current day
        if measure == 'smell':
            z_k = np.array([[0],
                            [0],
                            [prop_ma7.loc[k - datetime.timedelta(days=delay)]],
                            [case_ma7_all.loc[k]],
                            [0]])    

        elif measure == 'hh':
            z_k = np.array([[0],
                            [0],
                            [prop_cli_ma7.loc[k - datetime.timedelta(days=delay)]],
                            [case_ma7_all.loc[k]],
                            [0]])
            
        else:
            z_k = np.array([[0],
                            [0],
                            [0],
                            [case_ma7_all.loc[k]],
                            [0]])

        # predict step using the stepped fwd SEIIR compartment values 
        #P = predict_step(x_hat_k1, P, Q, beta_k, constants)

        # update step
        x_hat_post, P_post, the_diff, K_val = update_step(x_hat_k, x_hat_k1, P_est, Rn,
                                         rho1, rho2, z_k, measure)


        # store estimated values for proportion and case rate
        K_val_dict[indexDate] = K_val
        case_est[indexDate] = rho2 * x_hat_post[3, 0]


        diff_rat[indexDate] = the_diff[2, 0] / the_diff[3, 0]

        # update the P and k
        # comment out the P update because it has already been stepped once
        #P = P_post
        k += datetime.timedelta(days=1)

    # create pandas series of the estimated case rate
    predicted_case = pd.Series(case_est)
    predicted_seiir_prior = pd.Series(seiir_pred)
    K_val_series = pd.Series(K_val_dict)
    
    return predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_val_series

In [36]:
# a single county run
predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county('NY', 'Nassau', seiir_ny, smell_data, hh_data, case_data, measure='smell', K0=datetime.date(2020, 4, 18), delay=0)

In [41]:
K0 = datetime.date(2020, 4, 18)

ny_counties, fl_counties = create_county_lists()

## Forecast NY State counties

In [42]:

df_index = []
err_data = []

predictions_case_only = pd.DataFrame()
predictions_smell_0delay = pd.DataFrame()
predictions_hh_0delay = pd.DataFrame()


left = K0 + datetime.timedelta(days=7)
right = datetime.date(2020, 10, 23)
shifted_range = pd.date_range(start=left, end=right)
numLoop = 5

In [43]:
# conduct a forecast for all the counties in NY that there exists survey data for

for each in ny_counties:
    if each == 'New York City':
        fips = 'New York City'
    else:
        fips = data_sets.get_fips('NY', each)
        if (fips not in hh_data.index.levels[0]) or (fips not in smell_data.index.levels[1]):
            continue
    
    print('Starting', fips)
    
    county_data = {}
    
    conf_mses = np.zeros(numLoop)
    for i in range(numLoop):
        predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county('NY', each, seiir_ny, smell_data, hh_data, case_data, measure=None, K0=K0, delay=0)
        conf_mses[i] = calc_mse(predicted_case.loc[left:right], case_ma7_all.loc[left:right])
    county_data['mse_confirmed-only'] = conf_mses.mean()
    predictions_case_only[fips] = predicted_case
    
    
    smell0_mses = np.zeros(numLoop)
    for i in range(numLoop):
        predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county('NY', each, seiir_ny, smell_data, hh_data, case_data, measure='smell', K0=K0, delay=0)
        smell0_mses[i] = calc_mse(predicted_case.loc[left:right], case_ma7_all.loc[left:right])
    county_data['mse_smell-0delay'] = smell0_mses.mean()
    predictions_smell_0delay[fips] = predicted_case
    

    hh0_mses = np.zeros(numLoop)
    for i in range(numLoop):
        predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county('NY', each, seiir_ny, smell_data, hh_data, case_data, measure='hh', K0=K0, delay=0)
        hh0_mses[i] = calc_mse(predicted_case.loc[left:right], case_ma7_all.loc[left:right])
    county_data['mse_hh-0delay'] = hh0_mses.mean()
    predictions_hh_0delay[fips] = predicted_case
    
    
    naive_pred = case_ma7_all.loc[left-datetime.timedelta(days=7):right-datetime.timedelta(days=7)].copy()
    naive_pred.index = shifted_range
    county_data['naive'] = calc_mse(naive_pred.loc[left:right], case_ma7_all.loc[left:right])
    
    county_data['ihme'] = calc_mse(predicted_seiir_prior.loc[left:right], case_ma7_all.loc[left:right])
    
    df_index.append(fips)
    err_data.append(county_data)
    print('Complete with', fips)

Starting 36001
Complete with 36001
Starting 36007
Complete with 36007
Starting 36011
Complete with 36011
Starting 36013
Complete with 36013
Starting 36015
Complete with 36015
Starting 36019
Complete with 36019
Starting 36027
Complete with 36027
Starting 36029
Complete with 36029
Starting 36045
Complete with 36045
Starting 36053
Complete with 36053
Starting 36055
Complete with 36055
Starting 36059
Complete with 36059
Starting 36063




Complete with 36063
Starting 36065
Complete with 36065
Starting 36067
Complete with 36067
Starting 36069
Complete with 36069
Starting 36071
Complete with 36071
Starting 36075
Complete with 36075
Starting 36083
Complete with 36083
Starting 36087
Complete with 36087
Starting 36089
Complete with 36089
Starting 36091
Complete with 36091
Starting 36093
Complete with 36093
Starting 36101
Complete with 36101
Starting 36103
Complete with 36103
Starting 36107
Complete with 36107
Starting 36109
Complete with 36109
Starting 36111
Complete with 36111
Starting 36117
Complete with 36117
Starting 36119
Complete with 36119
Starting New York City
Complete with New York City


In [44]:
err_ny_df = pd.DataFrame(err_data, index=df_index)

In [45]:
err_ny_df

Unnamed: 0,mse_confirmed-only,mse_smell-0delay,mse_hh-0delay,naive,ihme
36001,63.18119,140.064984,138.630141,58.118188,204.685166
36007,83.426388,957.571617,943.858539,82.950437,571.845814
36011,1.774223,5.642487,5.54309,1.698924,4.261771
36013,11.005737,97.318431,102.784913,10.96322,36.294082
36015,27.170071,455.333062,453.070886,27.059542,213.868281
36019,0.802063,12.485686,12.318862,0.778762,1.395856
36027,111.574655,102.871271,101.513815,74.615609,50.905489
36029,340.21161,1118.723716,1118.924772,278.86017,2671.435429
36045,0.469671,435.868828,432.047918,0.443709,0.748027
36053,13.830569,15010.506211,13200.424153,13.627719,11.807451


In [46]:
# put error metrics in dataframe and output to .csv

err_ny_df.to_csv(r'output/err_ny_20201023.csv')

# output the dataframe of each type of estimate to .csvs
predictions_case_only.to_csv(r'output/pred_ny_case_only.csv')
predictions_smell_0delay.to_csv(r'output/pred_ny_smell_0delay.csv')
predictions_hh_0delay.to_csv(r'output/pred_ny_hh_0delay.csv')

## Forecast Florida counties

In [49]:
df_index = []
err_data = []

predictions_case_only = pd.DataFrame()
predictions_smell_0delay = pd.DataFrame()
predictions_hh_0delay = pd.DataFrame()

left = K0 + datetime.timedelta(days=7)
right = datetime.date(2020, 10, 23)
shifted_range = pd.date_range(start=left, end=right)
numLoop = 5

In [50]:
state_2L = 'FL'

for each in fl_counties:
    fips = data_sets.get_fips(state_2L, each)
    if (fips not in hh_data.index.levels[0]) or (fips not in smell_data.index.levels[1]):
        continue
    
    print('Starting', fips)
    
    county_data = {}
    
    conf_mses = np.zeros(numLoop)
    for i in range(numLoop):
        predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county(state_2L, each, seiir_fl, smell_data, hh_data, case_data, measure=None, K0=K0, delay=0)
        conf_mses[i] = calc_mse(predicted_case.loc[left:right], case_ma7_all.loc[left:right])
    
    county_data['mse_confirmed-only'] = conf_mses.mean()
    predictions_case_only[fips] = predicted_case
    
    
    smell0_mses = np.zeros(numLoop)
    for i in range(numLoop):
        predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county(state_2L, each, seiir_fl, smell_data, hh_data, case_data, measure='smell', K0=K0, delay=0)
        smell0_mses[i] = calc_mse(predicted_case.loc[left:right], case_ma7_all.loc[left:right])
    county_data['mse_smell-0delay'] = smell0_mses.mean()
    predictions_smell_0delay[fips] = predicted_case
    

    hh0_mses = np.zeros(numLoop)
    for i in range(numLoop):
        predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county(state_2L, each, seiir_fl, smell_data, hh_data, case_data, measure='hh', K0=K0, delay=0)
        hh0_mses[i] = calc_mse(predicted_case.loc[left:right], case_ma7_all.loc[left:right])
    county_data['mse_hh-0delay'] = hh0_mses.mean()
    predictions_hh_0delay[fips] = predicted_case
    
    
    naive_pred = case_ma7_all.loc[left-datetime.timedelta(days=7):right-datetime.timedelta(days=7)].copy()
    naive_pred.index = shifted_range
    county_data['naive'] = calc_mse(naive_pred.loc[left:right], case_ma7_all.loc[left:right])
    
    county_data['ihme'] = calc_mse(predicted_seiir_prior.loc[left:right], case_ma7_all.loc[left:right])
    
    
    df_index.append(fips)
    err_data.append(county_data)
    print('Complete with', fips)


Starting 12001
Complete with 12001
Starting 12005
Complete with 12005
Starting 12009
Complete with 12009
Starting 12011
Complete with 12011
Starting 12015
Complete with 12015
Starting 12017
Complete with 12017
Starting 12019
Complete with 12019
Starting 12021
Complete with 12021
Starting 12031
Complete with 12031
Starting 12033
Complete with 12033
Starting 12035
Complete with 12035
Starting 12053
Complete with 12053
Starting 12057
Complete with 12057
Starting 12061
Complete with 12061
Starting 12069
Complete with 12069
Starting 12071
Complete with 12071
Starting 12073
Complete with 12073
Starting 12081
Complete with 12081
Starting 12083
Complete with 12083
Starting 12085
Complete with 12085
Starting 12086
Complete with 12086
Starting 12091
Complete with 12091
Starting 12095
Complete with 12095
Starting 12097
Complete with 12097
Starting 12099
Complete with 12099
Starting 12101
Complete with 12101
Starting 12103
Complete with 12103
Starting 12105
Complete with 12105
Starting 12109
Compl

In [51]:
err_fl_df = pd.DataFrame(err_data, index=df_index)
err_fl_df.to_csv(r'output/err_fl_20201023.csv')

predictions_case_only.to_csv(r'output/pred_fl_case_only.csv')
predictions_smell_0delay.to_csv(r'output/pred_fl_smell_0delay.csv')
predictions_hh_0delay.to_csv(r'output/pred_fl_hh_0delay.csv')

# All plotting code below this

In [170]:

county = 'Westchester'
state_2l = 'NY'
if state_2l == 'NY':
    the_seiir = seiir_ny
else:
    the_seiir = seiir_fl


In [171]:
if county == 'New York City':
    county_pop = 0
    for each in ['36081', '36005', '36061', '36047', '36085']:
        this_count, state_pop = data_sets.get_pops(each)
        county_pop += this_count
else:
    county_pop, state_pop = data_sets.get_pops(data_sets.get_fips(state_2l, county))

In [172]:
county_pop

967506

In [173]:
state_pop

19453561

In [174]:


county_preds = {}
survey_data = {}
estimates = ['confirmed_only', 'hh_delay0', 'smell_delay0']
measure_type = [None, 'hh', 'smell']
#delay_list = [0, 0, 0]
for i in range(len(estimates)):
    predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county(state_2l, county, the_seiir, smell_data, hh_data, case_data, measure=measure_type[i], K0=datetime.date(2020, 4, 18), delay=0)
    survey_data[estimates[i]] = num_survey_ma7
    county_preds[estimates[i]] = predicted_case


In [93]:
#estimate_type = ['hh_delay6', 'confirmed_only', 'hh_delay0', 'smell_delay0', 'smell_delay6']
labels = ['Forecast using Case Count Only', 'Forecast Case Count w/ Household Symptoms', 'Forecast Case Count w/ Loss of Smell/Taste']

In [None]:
K0 = datetime.date(2020, 4, 18)
d = datetime.date(2020, 10, 23)
gold_right = d + datetime.timedelta(days=7)
tick_end = predicted_seiir_prior.index[-1]

week_interval = pd.date_range(start=K0, end=tick_end, freq='W')
week_interval = [x.to_pydatetime().date() for x in week_interval]

colors = ['blue', 'red', 'green']

for i in range(len(estimates)):
    plt.plot(county_preds[estimates[i]].index, county_preds[estimates[i]], label=labels[i], c=colors[i])
plt.plot(case_ma7_all.loc[K0:gold_right].index, case_ma7_all.loc[K0:gold_right], label='Actual Case Count', c=gold,
         linewidth=width)
plt.plot(predicted_seiir_prior.index, predicted_seiir_prior, label='IHME Forecast', c=)


#naieve estimate
naive_start = K0 + datetime.timedelta(days=7)
naive_d = d + datetime.timedelta(days=7)
plt.plot(case_ma7_all.loc[naive_start:naive_d].index, case_ma7_all.loc[K0:d], label='Naive Estimate', c='orange')
plt.legend(loc='upper center')
plt.xticks(rotation=30, ha='right', rotation_mode='anchor')
#plt.ylim(2700, 3055)
#plt.xlim(datetime.date(2020, 7, 25), datetime.date(2020, 7, 30))


#plt.title(county)
plt.ylabel('Number of Cases per Day')

In [None]:
K_vals.head(50)

In [None]:
plt.plot(K_vals.index, K_vals)
plt.title('Miami-Dade hh6 K[4, 4] Val')

In [43]:
xlim_left = datetime.date(2020, 6, 1)
xlim_right = None

leftylim_low = None
leftylim_high = None

rightylim_low = None
rightylim_high = None

xtick_size = 14
xlabel_size = 14

In [45]:
# plot findings -- multiple plots

# Plotting constants and variables ----------------

plt.style.use('seaborn-whitegrid')
matplotlib.rcParams.update({'font.size': 20})
purple = '#33016F'
gold = '#9E7A27'
gray = '#797979'
width = 3
%matplotlib qt


In [40]:

d = datetime.date(2020, 12, 8)
K0 = datetime.date(2020, 4, 18)
tick_end = predicted_seiir_prior.index[-1]
end = d + datetime.timedelta(days=7)

week_interval = pd.date_range(start=K0, end=tick_end, freq='W')
week_interval = [x.to_pydatetime().date() for x in week_interval]



#start = datetime.date(2020, 6, 1)

In [85]:
day_interval = pd.date_range(start=K0, end=tick_end, freq='D')

In [47]:
# this was to support practice presentation - to show just our forecast for two regions in the same state

plt.plot(nyc_pred.loc[start:end].index, nyc_pred.loc[start:end], label='New York City 7-day forecast', linewidth=width, c=purple)
plt.plot(nassau_pred.loc[start:end].index, nassau_pred.loc[start:end], label='Nassau County 7-day forecast', linewidth=width, c=gold)

plt.xticks(week_interval, rotation=30, ha='right', rotation_mode='anchor', fontsize=xtick_size)
plt.ylabel('Number of Cases per Day')

plt.legend(loc='upper left')
plt.ylim(leftylim_low, leftylim_high)
plt.xlim(xlim_left, xlim_right)

(737577.0, 737783.85)

In [175]:
### This is the one plot

end = d + datetime.timedelta(days=7)
d = datetime.date(2020, 10, 23)
fig4, ax41 = plt.subplots(1)
plt.sca(ax41)
plt.plot(case_ma7_all.loc[K0:end].index, case_ma7_all.loc[K0:end], label='Actual Case Count', c=gold,
         linewidth=width)
plt.plot(predicted_seiir_prior.index, predicted_seiir_prior,
         label='IHME 7-day Forecast', c=gray, linewidth=width)

#naive estimate
naive_start = K0 + datetime.timedelta(days=7)
naive_d = d + datetime.timedelta(days=7)
plt.plot(case_ma7_all.loc[naive_start:naive_d].index, case_ma7_all.loc[K0:d], label='Naive Estimate', c='orange')

"""
ax42 = ax41.twinx()
plt.sca(ax42)
plt.plot(survey_data['hh_delay0'].loc[K0:end].index, survey_data['hh_delay0'].loc[K0:end], c='red',
         label='Count of Household Symptom Response', linewidth=width-1)
plt.plot(survey_data['smell_delay0'].loc[K0:end].index, survey_data['smell_delay0'].loc[K0:end], c='maroon',
         label='Count of Loss of Smell Response', linewidth=width-1)
plt.grid(axis='y', linestyle=':')

plt.ylabel('Number of Responses per Day')
plt.legend(loc='upper right')
plt.ylim(rightylim_low, rightylim_high)
"""

#plt.sca(ax41)
"""
plt.plot(predicted_case.index, predicted_case.loc[K0:], label='Our 7-Day Forecast',
         c=purple, linewidth=width)
"""
plt.plot(county_preds['confirmed_only'].index, county_preds['confirmed_only'], c=purple,
         label='7-Day Forecast using Case Count Only', linewidth=width-1)
plt.plot(county_preds['hh_delay0'].index, county_preds['hh_delay0'], c='cyan',
         label='7-Day Forecast Case Count w/ Household Symptoms', linewidth=width-1)
plt.plot(county_preds['smell_delay0'].index, county_preds['smell_delay0'], c='green',
         label='7-Day Forecast Case Count w/ Loss of Smell/Taste', linewidth=width-1)




plt.xticks(week_interval, rotation=30, ha='right', rotation_mode='anchor', fontsize=xtick_size)
plt.ylabel('Number of Cases per Day')

plt.legend(loc='upper right')
plt.ylim(leftylim_low, leftylim_high)
plt.xlim(xlim_left, xlim_right)

#plt.title('Miami-Dade, Household Symptoms, delay of 6 days')

(737523.25, 737737.75)

In [None]:
county_preds[estimates[0]]


In [None]:
K0

In [21]:
ihme_ny_state = pd.read_csv(r'data/seiir_projections/new_york_proj.csv', index_col='date', parse_dates=True)

In [22]:
ihme_ny_state

Unnamed: 0_level_0,S,E,I1,I2,R,beta
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-02-10,1.979023e+07,106.854381,25.532244,0.000000,0.000000e+00,5.000033
2020-02-11,1.979019e+07,135.370375,34.165081,5.735700,1.868136e+00,3.828568
2020-02-12,1.979014e+07,163.677532,44.895678,12.645587,6.414865e+00,3.065767
2020-02-13,1.979009e+07,185.941663,55.374704,20.009029,1.385817e+01,2.621240
2020-02-14,1.979004e+07,209.578200,66.865630,28.139395,2.479945e+01,2.268072
...,...,...,...,...,...,...
2021-12-27,1.258239e+07,6212.135951,3087.113438,2385.406004,7.196296e+06,0.872742
2021-12-28,1.258052e+07,6413.925937,3187.020352,2464.107692,7.197785e+06,0.874872
2021-12-29,1.257859e+07,6622.087015,3290.015767,2545.245365,7.199323e+06,0.876996
2021-12-30,1.257659e+07,6836.762379,3396.203136,2628.918289,7.200911e+06,0.879116


In [29]:
gray_start = predicted_seiir_prior.index[0]
gray_end = predicted_seiir_prior.index[-1]
plt.plot(predicted_seiir_prior.index, predicted_seiir_prior,
         label='IHME 7-day Forecast', c=gray, linewidth=width)
plt.plot(ihme_ny_state.loc[gray_start:gray_end].index, ihme_ny_state['I2'].loc[gray_start:gray_end],
         label='IHME State I2')
plt.title('Nassau, b = 0.06975')
plt.legend()

<matplotlib.legend.Legend at 0x1d00b276ac8>

In [143]:
# NYC plot settings

xlim_left = datetime.date(2020, 4, 12)
xlim_right = datetime.date(2020, 11, 3)

leftylim_low = -400
leftylim_high = 7500

rightylim_low = -3
rightylim_high = 57

xtick_size = 14
xlabel_size = 14

In [None]:
# Nassau plot settings
xlim_left = datetime.date(2020, 4, 12)
xlim_right = datetime.date(2020, 11, 3)

leftylim_low = -66
leftylim_high = 1000

rightylim_low = -.3
rightylim_high = 8

xtick_size = 14
xlabel_size = 14

In [None]:
# Westchester plot settings
xlim_left = datetime.date(2020, 4, 12)
xlim_right = datetime.date(2020, 11, 3)

leftylim_low = -28
leftylim_high = 700

rightylim_low = -.5
rightylim_high = 17

xtick_size = 14
xlabel_size = 14

In [None]:
# Albany plot settings
xlim_left = datetime.date(2020, 4, 12)
xlim_right = datetime.date(2020, 11, 3)

leftylim_low = -6
leftylim_high = 110

rightylim_low = -.125
rightylim_high = 3.8

xtick_size = 14
xlabel_size = 14

In [None]:
# Erie plot settings
xlim_left = datetime.date(2020, 4, 12)
xlim_right = datetime.date(2020, 11, 3)

leftylim_low = -10
leftylim_high = 280

rightylim_low = -.8
rightylim_high = 22

xtick_size = 14
xlabel_size = 14