### Runs an extended Kalman filter using IHME SEIIR predictions along with measurement data of confirmed Covid-19 case counts (from New York Times data) and Facebook symptom data (loss of smell/taste, from Covid-19 Symptom Challenge) to generate updated 7-day predictions of case counts for counties in New York State.

Developed by the University of Washington team of Les Atlas, Abraham Flaxman and Michael Rhoads.

S - Susceptible
E - Exposed
I1 - Presymptomatic
I2 - Symptomatic
R - Recovered

In [1]:
#%load_ext autoreload
#%autoreload

In [2]:
#%reset

In [3]:
import math
import datetime

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

import data_sets
import seiir_compartmental

In [11]:
# functions to support the Kalman filtering
def get_predicts_prior(day, seiir):
    x_hat = np.array([[seiir['S'].loc[day]],
                      [seiir['E'].loc[day]],
                      [seiir['I1'].loc[day]],
                      [seiir['I2'].loc[day]],
                      [seiir['R'].loc[day]]])

    beta_k = seiir['beta'].loc[day]

    return x_hat, beta_k


def step_seiir(x_hat, constants, beta_k, days=7):
    s_dict = {'S': x_hat[0, 0],
              'E': x_hat[1, 0],
              'I1': x_hat[2, 0],
              'I2': x_hat[3, 0],
              'R': x_hat[4, 0]}

    s = pd.Series(s_dict)

    for i in range(days):
        infectious = s.loc['I1'] + s.loc['I2']
        s = seiir_compartmental.compartmental_covid_step(s, s.sum(),
                                                         infectious,
                                                         constants['alpha'],
                                                         beta_k,
                                                         constants['gamma1'],
                                                         constants['gamma2'],
                                                         constants['sigma'],
                                                         constants['theta'])
    x_hat_future_prior = np.array([[s.loc['S']],
                                   [s.loc['E']],
                                   [s.loc['I1']],
                                   [s.loc['I2']],
                                   [s.loc['R']]])

    return x_hat_future_prior


def predict_step(x_hat_k1_prior, P, Q, beta_k, constants):
    S = x_hat_k1_prior[0, 0]
    E = x_hat_k1_prior[1, 0]
    I1 = x_hat_k1_prior[2, 0]
    I2 = x_hat_k1_prior[3, 0]
    R = x_hat_k1_prior[4, 0]
    N = S + E + I1 + I2 + R
    alpha = constants['alpha']
    sigma = constants['sigma']
    gamma1 = constants['gamma1']
    gamma2 = constants['gamma2']

    part_f_S = np.array([[-beta_k * math.pow(I1 + I2, alpha) / N],
                         [beta_k * math.pow(I1 + I2, alpha) / N],
                         [0],
                         [0],
                         [0]])

    part_f_E = np.array([[0],
                         [-sigma],
                         [sigma],
                         [0],
                         [0]])

    part_f_I1 = np.array([[-alpha * beta_k * S * math.pow(I1+I2, alpha-1) / N],
                          [alpha * beta_k * S * math.pow(I1+I2, alpha-1) / N],
                          [-gamma1],
                          [gamma1],
                          [0]])

    part_f_I2 = np.array([[-alpha * beta_k * S * math.pow(I1+I2, alpha-1) / N],
                          [alpha * beta_k * S * math.pow(I1+I2, alpha-1) / N],
                          [0],
                          [-gamma2],
                          [gamma2]])
    
    part_f_R = np.array([[0],
                         [0],
                         [0],
                         [0],
                         [0]])

    # 5x5
    f_jacob = np.concatenate([part_f_S, part_f_E, part_f_I1, part_f_I2,
                              part_f_R], axis=1)

    print('f_jacob =')
    print(f_jacob)
    print('f_jacob[4,4] =', f_jacob[3, 3])
    # 5x5
    # P_k1_prior = f_jacob * P * f_jacob^T + Q
    P_k1_prior = np.matmul(np.matmul(f_jacob, P), np.transpose(f_jacob)) + Q
    print()
    print('P prior - in predict step =')
    print(P_k1_prior)
    print('P_prior[4,4] =', P_k1_prior[3, 3])
    print()
    print()
    return P_k1_prior


def update_step(x_hat, x_hat_k1, P_k1, Rn, rho1, rho2, z_k, measure):
    # 5x5
    ep = 10**-8
    if measure is None:
        H = np.array([[ep, 0, 0, 0, 0],
                      [0, ep, 0, 0, 0],
                      [0, 0, ep, 0, 0],
                      [0, 0, 0, rho2, 0],
                      [0, 0, 0, 0, ep]])
    else:
                
        H = np.array([[ep, 0, 0, 0, 0],
                      [0, ep, 0, 0, 0],
                      [0, 0, ep, rho1, 0],
                      [0, 0, 0, rho2, 0],
                      [0, 0, 0, 0, ep]])
    
    
    # Si = H * P_k1 * H^T + Rn
    Si = np.matmul(np.matmul(H, P_k1), np.transpose(H)) + Rn

    # K_new = P_k1 * H^T * Si^(-1)
    K_new = np.matmul(np.matmul(P_k1, np.transpose(H)), np.linalg.inv(Si))
    y_new = np.matmul(H, x_hat)

    # 5x1
    diff = z_k - y_new

    x_hat_k1_post = x_hat_k1 + np.matmul(K_new, diff)

    P_k1_post = P_k1 - np.matmul(np.matmul(K_new, Si), np.transpose(K_new))


    return x_hat_k1_post, P_k1_post, diff, K_new[3, 3]


def create_data_sets():
    seiir_fl = pd.read_csv(r'data/seiir_projections/florida_proj.csv', header=0,
                            index_col='date', parse_dates=True)

    seiir_ny = pd.read_csv(r'data/seiir_projections/new_york_proj.csv', header=0,
                            index_col='date', parse_dates=True)
    
    smell_data = data_sets.create_symptom_df()
    
    case_data = data_sets.create_case_df_county()
    
    return seiir_fl, seiir_ny, smell_data, case_data


def get_smell_data(fips, fb_data):

    if fips == 'New York City':
        nyc_fips = ['36005', '36061', '36047', '36085']
        fb_data_geo = fb_data.loc[(slice(None), '36081'), :].copy()
        fb_data_geo = fb_data_geo.mean(level='date')

        for borough in nyc_fips:
            fb_data_geo += fb_data.loc[(slice(None), borough), :].copy().mean(level='date')

    else:
        fb_data_geo = fb_data.loc[(slice(None), fips), :].copy()

        # collapse down to a single index column (date)
        fb_data_geo.index = fb_data_geo.index.droplevel([0, 1])

    return fb_data_geo
    


def calc_fb_ma7(fb_data):
    """
    Returns a Pandas series
    """
    # the fb_data is a DataFrame while the case_data is a Series
    fb_ma7 = fb_data.rolling(window=7).mean()
    fb_ma7 = fb_ma7.iloc[6:, :]
    prop_ma7 = fb_ma7['num_stl'].div(fb_ma7['n'])

    return prop_ma7, fb_ma7['n'].copy(), fb_ma7['num_stl'].copy()


def calc_mse(prediction, actual):
    """
    Inputs should be two Pandas Series of same length.
    Outputs a float.
    """
    err = prediction - actual
    sum_sq_err = (err**2).sum()
    mse = sum_sq_err / err.count()
    return mse


def create_hh_data():
    # bring in new data
    full_data = pd.read_csv(r'data/from_challenge/overall-county.csv', header=0, dtype={'fips': 'str', 'pct_hh_cli': 'float64'},
                            parse_dates=[0])
    time_start = full_data['date'].min()
    time_end = full_data['date'].max()
    full_data.set_index(['fips', 'date'], inplace=True)
    full_data.sort_index(inplace=True)

    # derive count of survey respondents with household members having covid symptoms
    full_data['num_hh_cli'] = full_data['n'].mul(full_data['pct_hh_cli']/100.).round()
    full_data['num_hh_cli'] = full_data['num_hh_cli'].astype('int64')

    # group by county and date
    data_of_interest = full_data[['n', 'num_hh_cli']].copy().groupby(level=(0, 1)).sum()
    idx = pd.IndexSlice

    # create full date range
    date_rng = pd.date_range(time_start, time_end)
    iterables = [data_of_interest.index.levels[0], date_rng]
    new_index = pd.MultiIndex.from_product(iterables, names=['fips', 'date'])

    data_of_interest = data_of_interest.reindex(index=new_index)
    # this will have NaN values in the new index entries for which there was no
    # previous data -- fill them upon extracting a particular county
    
    return data_of_interest


In [5]:
def create_county_lists():
    case_data = data_sets.create_case_df_county()
    ny_counties = case_data[case_data['state'] == 'New York']['county'].unique()
    fl_counties = case_data[case_data['state'] == 'Florida']['county'].unique()
    return ny_counties, fl_counties

In [6]:
seiir_fl, seiir_ny, smell_data, case_data = create_data_sets()

hh_data = create_hh_data()

In [7]:
def run_county(the_state, county_name, seiir, smell_data, hh_data, case_data, measure='smell', K0=datetime.date(2020, 4, 18), delay=6):
    """
    Arguments:
        measure -- potential values are: 'smell', 'hh', or None
    """
    # set constants
    #K0 = datetime.date(2020, 4, 12)

    if county_name == 'New York City':
        the_county = county_name
    else:
        the_county = data_sets.get_fips(the_state, county_name)

    constants = {
        'alpha': 0.948786,
        'gamma1': 0.500000,
        'gamma2': 0.662215,
        'sigma': 0.266635,
        'theta': 6.000000
        }

    # set initial values for Kalman filter parameters
    P_mult = 1
    Q_mult = 1

    # Rn is the R noise covariance matrix; it remains constant thru the stepping of the
    # Kalman filter
    Rn_mult = 5*10**-8

    Rn_22 = 10000
    Rn_32 = 1000

    Rn_23 = 1000
    Rn_33 = 100

    Rn = Rn_mult * np.array([[0, 0, 0, 0, 0],
                             [0, 0, 0, 0, 0],
                             [0, 0, Rn_22, Rn_23, 0],
                             [0, 0, Rn_32, Rn_33, 0],
                             [0, 0, 0, 0, 0]])

    Q = Q_mult * np.eye(5)
    P = P_mult * np.eye(5)
    
    if the_county == 'New York City':
        county_pop = 0
        for each in ['36081', '36005', '36061', '36047', '36085']:
            this_count, state_pop = data_sets.get_pops(each)
            county_pop += this_count
    else:
        county_pop, state_pop = data_sets.get_pops(the_county)

    b = county_pop / state_pop

    # generate data
    case_data_geo = case_data.loc[the_county]['case_rate'].copy()
    smell_data_geo = get_smell_data(the_county, smell_data)
    

    if measure == 'hh':
        idx = pd.IndexSlice
        
        if the_county == 'New York City':
            nyc_fips = ['36005', '36061', '36047', '36085']
            hh_cli = hh_data.loc[idx['36081', :], :].loc['36081'].copy()
            hh_cli.fillna(method='pad', inplace=True)

            for borough in nyc_fips:
                bor_hh_cli = hh_data.loc[idx[borough, :], :].loc[borough].copy()
                bor_hh_cli.fillna(method='pad', inplace=True)
                hh_cli += bor_hh_cli

        else:
            hh_cli = hh_data.loc[idx[the_county, :], :].loc[the_county].copy()
            hh_cli.fillna(method='pad', inplace=True)

        # calculate moving averages on the fb and case data
        hh_cli_ma7 = hh_cli.rolling(window=7).mean()
        hh_cli_ma7 = hh_cli_ma7.iloc[6:, :]
        num_survey_ma7 = hh_cli_ma7['num_hh_cli']
        prop_cli_ma7 = num_survey_ma7.div(hh_cli_ma7['n'])
        
    elif measure == 'smell':
        prop_ma7, n_ma7, num_survey_ma7 = calc_fb_ma7(smell_data_geo)
        
    else:
        num_survey_ma7 = None


    case_ma7 = case_data_geo.rolling(window=7).mean()
    case_ma7_all = case_ma7.iloc[6:]
    

    # get starting compartment values for the state level
    x_hat_state_k0, beta_k0 = get_predicts_prior(K0, seiir)

    # convert to the county level
    x_hat_k0 = b * x_hat_state_k0

    I2_county = x_hat_k0[3, 0]


    rho1 = 0.0001
    rho2 = case_ma7_all.loc['2020-04-12'] / I2_county


    # create empty dictionaries to hold the estimated values
    case_est = {}
    seiir_pred = {}

    diff_rat = {}

    K_val_dict = {}


    # Original data run ----------------
    start = K0
    d = start

    while d <= datetime.date(2020, 10, 23):    

    # each cycle of the while loop executes a step

        # get state level compartments
        x_hat_state_k, beta_k = get_predicts_prior(d, seiir)

        # step the state level compartments 7 days forward
        x_hat_state_k1 = step_seiir(x_hat_state_k, constants, beta_k)

        # convert the state level compartments to county level values
        x_hat_k = b * x_hat_state_k
        x_hat_k1 = b * x_hat_state_k1

        indexDate = d + datetime.timedelta(days=7)
        # store seiir prediction before it's modified by Kalman filter
        seiir_pred[indexDate] = x_hat_k1[3, 0]

        # get measurements for current day
        if measure == 'smell':
            z_k = np.array([[0],
                            [0],
                            [prop_ma7.loc[d - datetime.timedelta(days=delay)]],
                            [case_ma7_all.loc[d]],
                            [0]])    

        elif measure == 'hh':
            z_k = np.array([[0],
                            [0],
                            [prop_cli_ma7.loc[d - datetime.timedelta(days=delay)]],
                            [case_ma7_all.loc[d]],
                            [0]])
            
        else:
            z_k = np.array([[0],
                            [0],
                            [0],
                            [case_ma7_all.loc[d]],
                            [0]])

        # predict step using the stepped fwd SEIIR compartment values 
        P = predict_step(x_hat_k1, P, Q, beta_k, constants)

        # update step
        x_hat_post, P_post, the_diff, K_val = update_step(x_hat_k, x_hat_k1, P, Rn,
                                         rho1, rho2, z_k, measure)


        # store estimated values for proportion and case rate
        K_val_dict[indexDate] = K_val
        case_est[indexDate] = rho2 * x_hat_post[3, 0]


        diff_rat[indexDate] = the_diff[2, 0] / the_diff[3, 0]

        # update the P and d
        P = P_post
        d += datetime.timedelta(days=1)

    # create pandas series of the estimated case rate
    predicted_case = pd.Series(case_est)
    predicted_seiir_prior = pd.Series(seiir_pred)
    K_val_series = pd.Series(K_val_dict)
    
    return predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_val_series

In [12]:
predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county('FL', 'Miami-Dade', seiir_fl, smell_data, hh_data, case_data, measure='hh', K0=datetime.date(2020, 4, 18), delay=6)

f_jacob =
[[-2.79198224e-04  0.00000000e+00 -3.07281626e-01 -3.07281626e-01
   0.00000000e+00]
 [ 2.79198224e-04 -2.66635000e-01  3.07281626e-01  3.07281626e-01
   0.00000000e+00]
 [ 0.00000000e+00  2.66635000e-01 -5.00000000e-01  0.00000000e+00
   0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  5.00000000e-01 -6.62215000e-01
   0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  6.62215000e-01
   0.00000000e+00]]
f_jacob[4,4] = -0.662215

P prior - in predict step =
[[ 1.18884407 -0.18884407  0.15364081  0.04984569 -0.2034865 ]
 [-0.18884407  1.2599383  -0.22473504 -0.04984569  0.2034865 ]
 [ 0.15364081 -0.22473504  1.32109422 -0.25        0.        ]
 [ 0.04984569 -0.04984569 -0.25        1.68852871 -0.43852871]
 [-0.2034865   0.2034865   0.         -0.43852871  1.43852871]]
P_prior[4,4] = 1.688528706225


f_jacob =
[[-2.73941423e-04  0.00000000e+00 -3.05100054e-01 -3.05100054e-01
   0.00000000e+00]
 [ 2.73941423e-04 -2.66635000e-01  3.05100054e-01  3.05100054e-01
 

f_jacob =
[[-2.26658879e-04  0.00000000e+00 -2.81823280e-01 -2.81823280e-01
   0.00000000e+00]
 [ 2.26658879e-04 -2.66635000e-01  2.81823280e-01  2.81823280e-01
   0.00000000e+00]
 [ 0.00000000e+00  2.66635000e-01 -5.00000000e-01  0.00000000e+00
   0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  5.00000000e-01 -6.62215000e-01
   0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  6.62215000e-01
   0.00000000e+00]]
f_jacob[4,4] = -0.662215

P prior - in predict step =
[[ 1.09538064e+00 -9.53806442e-02  1.69220662e-01 -1.69220661e-01
  -5.44418397e-10]
 [-9.53806442e-02  1.09538064e+00 -1.69220662e-01  1.69220661e-01
   5.44418402e-10]
 [ 1.69220662e-01 -1.69220662e-01  1.30022477e+00 -3.00224773e-01
  -9.65886139e-10]
 [-1.69220661e-01  1.69220661e-01 -3.00224773e-01  1.30022477e+00
   9.65886134e-10]
 [-5.44418397e-10  5.44418407e-10 -9.65886143e-10  9.65886134e-10
   1.00000000e+00]]
P_prior[4,4] = 1.3002247722171716


f_jacob =
[[-2.21124336e-04  0.00000000e+00 -2.8

f_jacob =
[[-1.97605390e-04  0.00000000e+00 -2.83051188e-01 -2.83051188e-01
   0.00000000e+00]
 [ 1.97605390e-04 -2.66635000e-01  2.83051188e-01  2.83051188e-01
   0.00000000e+00]
 [ 0.00000000e+00  2.66635000e-01 -5.00000000e-01  0.00000000e+00
   0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  5.00000000e-01 -6.62215000e-01
   0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  6.62215000e-01
   0.00000000e+00]]
f_jacob[4,4] = -0.662215

P prior - in predict step =
[[ 1.09627467e+00 -9.62746704e-02  1.70065830e-01 -1.70065829e-01
  -5.47137464e-10]
 [-9.62746704e-02  1.09627467e+00 -1.70065830e-01  1.70065829e-01
   5.47137474e-10]
 [ 1.70065830e-01 -1.70065830e-01  1.30041532e+00 -3.00415324e-01
  -9.66499158e-10]
 [-1.70065829e-01  1.70065829e-01 -3.00415324e-01  1.30041532e+00
   9.66499149e-10]
 [-5.47137464e-10  5.47137469e-10 -9.66499154e-10  9.66499149e-10
   1.00000000e+00]]
P_prior[4,4] = 1.3004153229929671


f_jacob =
[[-2.02705629e-04  0.00000000e+00 -2.8

f_jacob =
[[-2.21625996e-04  0.00000000e+00 -3.04201464e-01 -3.04201464e-01
   0.00000000e+00]
 [ 2.21625996e-04 -2.66635000e-01  3.04201464e-01  3.04201464e-01
   0.00000000e+00]
 [ 0.00000000e+00  2.66635000e-01 -5.00000000e-01  0.00000000e+00
   0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  5.00000000e-01 -6.62215000e-01
   0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  6.62215000e-01
   0.00000000e+00]]
f_jacob[4,4] = -0.662215

P prior - in predict step =
[[ 1.11078239e+00 -1.10782387e-01  1.82087202e-01 -1.82087201e-01
  -5.85812807e-10]
 [-1.10782387e-01  1.11078239e+00 -1.82087202e-01  1.82087201e-01
   5.85812807e-10]
 [ 1.82087202e-01 -1.82087202e-01  1.29928719e+00 -2.99287187e-01
  -9.62869729e-10]
 [-1.82087201e-01  1.82087201e-01 -2.99287187e-01  1.29928719e+00
   9.62869632e-10]
 [-5.85812807e-10  5.85812807e-10 -9.62869729e-10  9.62869632e-10
   1.00000000e+00]]
P_prior[4,4] = 1.2992871859487534


f_jacob =
[[-2.22301422e-04  0.00000000e+00 -3.0

f_jacob =
[[-2.23975376e-04  0.00000000e+00 -3.06467488e-01 -3.06467488e-01
   0.00000000e+00]
 [ 2.23975376e-04 -2.66635000e-01  3.06467488e-01  3.06467488e-01
   0.00000000e+00]
 [ 0.00000000e+00  2.66635000e-01 -5.00000000e-01  0.00000000e+00
   0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  5.00000000e-01 -6.62215000e-01
   0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  6.62215000e-01
   0.00000000e+00]]
f_jacob[4,4] = -0.662215

P prior - in predict step =
[[ 1.11235306e+00 -1.12353059e-01  1.83303390e-01 -1.83303390e-01
  -5.89725415e-10]
 [-1.12353059e-01  1.11235306e+00 -1.83303390e-01  1.83303390e-01
   5.89725401e-10]
 [ 1.83303390e-01 -1.83303390e-01  1.29905846e+00 -2.99058459e-01
  -9.62133718e-10]
 [-1.83303390e-01  1.83303390e-01 -2.99058459e-01  1.29905846e+00
   9.62133732e-10]
 [-5.89725427e-10  5.89725422e-10 -9.62133746e-10  9.62133751e-10
   1.00000000e+00]]
P_prior[4,4] = 1.299058458214581


f_jacob =
[[-2.21331183e-04  0.00000000e+00 -3.06

f_jacob =
[[-2.49514354e-04  0.00000000e+00 -3.20486489e-01 -3.20486489e-01
   0.00000000e+00]
 [ 2.49514354e-04 -2.66635000e-01  3.20486489e-01  3.20486489e-01
   0.00000000e+00]
 [ 0.00000000e+00  2.66635000e-01 -5.00000000e-01  0.00000000e+00
   0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  5.00000000e-01 -6.62215000e-01
   0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  6.62215000e-01
   0.00000000e+00]]
f_jacob[4,4] = -0.662215

P prior - in predict step =
[[ 1.12265112e+00 -1.22651120e-01  1.91351466e-01 -1.91351466e-01
  -6.15617867e-10]
 [-1.22651120e-01  1.12265112e+00 -1.91351466e-01  1.91351466e-01
   6.15618009e-10]
 [ 1.91351466e-01 -1.91351466e-01  1.29853281e+00 -2.98532812e-01
  -9.60442845e-10]
 [-1.91351466e-01  1.91351466e-01 -2.98532812e-01  1.29853281e+00
   9.60442605e-10]
 [-6.15617890e-10  6.15618032e-10 -9.60442882e-10  9.60442642e-10
   1.00000000e+00]]
P_prior[4,4] = 1.298532810661639


f_jacob =
[[-2.54647226e-04  0.00000000e+00 -3.24

f_jacob =
[[-4.14048479e-04  0.00000000e+00 -3.78456306e-01 -3.78456306e-01
   0.00000000e+00]
 [ 4.14048479e-04 -2.66635000e-01  3.78456306e-01  3.78456306e-01
   0.00000000e+00]
 [ 0.00000000e+00  2.66635000e-01 -5.00000000e-01  0.00000000e+00
   0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  5.00000000e-01 -6.62215000e-01
   0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  6.62215000e-01
   0.00000000e+00]]
f_jacob[4,4] = -0.662215

P prior - in predict step =
[[ 1.16939006e+00 -1.69390056e-01  2.23790769e-01 -2.23790768e-01
  -7.19981788e-10]
 [-1.69390056e-01  1.16939006e+00 -2.23790769e-01  2.23790768e-01
   7.19981783e-10]
 [ 2.23790769e-01 -2.23790769e-01  1.29566262e+00 -2.95662622e-01
  -9.51208598e-10]
 [-2.23790768e-01  2.23790768e-01 -2.95662622e-01  1.29566262e+00
   9.51208603e-10]
 [-7.19981788e-10  7.19981788e-10 -9.51208603e-10  9.51208603e-10
   1.00000000e+00]]
P_prior[4,4] = 1.2956626209877178


f_jacob =
[[-4.51552144e-04  0.00000000e+00 -3.8

f_jacob =
[[-0.00087549  0.         -0.42384474 -0.42384474  0.        ]
 [ 0.00087549 -0.266635    0.42384474  0.42384474  0.        ]
 [ 0.          0.266635   -0.5         0.          0.        ]
 [ 0.          0.          0.5        -0.662215    0.        ]
 [ 0.          0.          0.          0.662215    0.        ]]
f_jacob[4,4] = -0.662215

P prior - in predict step =
[[ 1.21034422e+00 -2.10344216e-01  2.48138285e-01 -2.48138284e-01
  -7.98312827e-10]
 [-2.10344216e-01  1.21034422e+00 -2.48138285e-01  2.48138284e-01
   7.98312836e-10]
 [ 2.48138285e-01 -2.48138285e-01  1.29272309e+00 -2.92723087e-01
  -9.41751562e-10]
 [-2.48138284e-01  2.48138284e-01 -2.92723087e-01  1.29272309e+00
   9.41751649e-10]
 [-7.98312827e-10  7.98312841e-10 -9.41751567e-10  9.41751649e-10
   1.00000000e+00]]
P_prior[4,4] = 1.292723086443624


f_jacob =
[[-0.00091999  0.         -0.42323424 -0.42323424  0.        ]
 [ 0.00091999 -0.266635    0.42323424  0.42323424  0.        ]
 [ 0.          0.266635

f_jacob =
[[-0.00136326  0.         -0.38775912 -0.38775912  0.        ]
 [ 0.00136326 -0.266635    0.38775912  0.38775912  0.        ]
 [ 0.          0.266635   -0.5         0.          0.        ]
 [ 0.          0.          0.5        -0.662215    0.        ]
 [ 0.          0.          0.          0.662215    0.        ]]
f_jacob[4,4] = -0.662215

P prior - in predict step =
[[ 1.17709243e+00 -1.77092426e-01  2.28353655e-01 -2.28353654e-01
  -7.34661650e-10]
 [-1.77092426e-01  1.17709243e+00 -2.28353655e-01  2.28353654e-01
   7.34661631e-10]
 [ 2.28353655e-01 -2.28353655e-01  1.29445298e+00 -2.94452975e-01
  -9.47317026e-10]
 [-2.28353654e-01  2.28353654e-01 -2.94452975e-01  1.29445297e+00
   9.47317045e-10]
 [-7.34661636e-10  7.34661612e-10 -9.47317003e-10  9.47317027e-10
   1.00000000e+00]]
P_prior[4,4] = 1.2944529745207891


f_jacob =
[[-0.00139262  0.         -0.38421653 -0.38421653  0.        ]
 [ 0.00139262 -0.266635    0.38421653  0.38421653  0.        ]
 [ 0.          0.26663

f_jacob =
[[-0.00157318  0.         -0.35129575 -0.35129575  0.        ]
 [ 0.00157318 -0.266635    0.35129575  0.35129575  0.        ]
 [ 0.          0.266635   -0.5         0.          0.        ]
 [ 0.          0.          0.5        -0.662215    0.        ]
 [ 0.          0.          0.          0.662215    0.        ]]
f_jacob[4,4] = -0.662215

P prior - in predict step =
[[ 1.14631492e+00 -1.46314917e-01  2.08250334e-01 -2.08250334e-01
  -6.69985030e-10]
 [-1.46314917e-01  1.14631492e+00 -2.08250334e-01  2.08250334e-01
   6.69985030e-10]
 [ 2.08250334e-01 -2.08250334e-01  1.29640315e+00 -2.96403146e-01
  -9.53591057e-10]
 [-2.08250334e-01  2.08250334e-01 -2.96403146e-01  1.29640314e+00
   9.53590960e-10]
 [-6.69985043e-10  6.69985038e-10 -9.53591071e-10  9.53590978e-10
   1.00000000e+00]]
P_prior[4,4] = 1.2964031446677757


f_jacob =
[[-0.00157209  0.         -0.34674341 -0.34674341  0.        ]
 [ 0.00157209 -0.266635    0.34674341  0.34674341  0.        ]
 [ 0.          0.26663

f_jacob =
[[-0.00119846  0.         -0.28723705 -0.28723705  0.        ]
 [ 0.00119846 -0.266635    0.28723705  0.28723705  0.        ]
 [ 0.          0.266635   -0.5         0.          0.        ]
 [ 0.          0.          0.5        -0.662215    0.        ]
 [ 0.          0.          0.          0.662215    0.        ]]
f_jacob[4,4] = -0.662215

P prior - in predict step =
[[ 1.09893251e+00 -9.89325066e-02  1.72214044e-01 -1.72214043e-01
  -5.54048687e-10]
 [-9.89325066e-02  1.09893251e+00 -1.72214044e-01  1.72214043e-01
   5.54048687e-10]
 [ 1.72214044e-01 -1.72214044e-01  1.29977687e+00 -2.99776865e-01
  -9.64445037e-10]
 [-1.72214043e-01  1.72214043e-01 -2.99776865e-01  1.29977686e+00
   9.64445037e-10]
 [-5.54048687e-10  5.54048687e-10 -9.64445037e-10  9.64445037e-10
   1.00000000e+00]]
P_prior[4,4] = 1.2997768642172378


f_jacob =
[[-0.00114861  0.         -0.28248358 -0.28248358  0.        ]
 [ 0.00114861 -0.266635    0.28248358  0.28248358  0.        ]
 [ 0.          0.26663

f_jacob =
[[-0.00080875  0.         -0.26634801 -0.26634801  0.        ]
 [ 0.00080875 -0.266635    0.26634801  0.26634801  0.        ]
 [ 0.          0.266635   -0.5         0.          0.        ]
 [ 0.          0.          0.5        -0.662215    0.        ]
 [ 0.          0.          0.          0.662215    0.        ]]
f_jacob[4,4] = -0.662215

P prior - in predict step =
[[ 1.08548123e+00 -8.54812345e-02  1.60469070e-01 -1.60469070e-01
  -5.16262730e-10]
 [-8.54812345e-02  1.08548123e+00 -1.60469070e-01  1.60469070e-01
   5.16262720e-10]
 [ 1.60469070e-01 -1.60469070e-01  1.30123948e+00 -3.01239478e-01
  -9.69150642e-10]
 [-1.60469070e-01  1.60469070e-01 -3.01239478e-01  1.30123948e+00
   9.69150555e-10]
 [-5.16262730e-10  5.16262725e-10 -9.69150647e-10  9.69150555e-10
   1.00000000e+00]]
P_prior[4,4] = 1.3012394772009208


f_jacob =
[[-0.00080357  0.         -0.26859187 -0.26859187  0.        ]
 [ 0.00080357 -0.266635    0.26859187  0.26859187  0.        ]
 [ 0.          0.26663

f_jacob =
[[-0.00087302  0.         -0.30233622 -0.30233622  0.        ]
 [ 0.00087302 -0.266635    0.30233622  0.30233622  0.        ]
 [ 0.          0.266635   -0.5         0.          0.        ]
 [ 0.          0.          0.5        -0.662215    0.        ]
 [ 0.          0.          0.          0.662215    0.        ]]
f_jacob[4,4] = -0.662215

P prior - in predict step =
[[ 1.10946764e+00 -1.09467641e-01  1.81036264e-01 -1.81036264e-01
  -5.82431636e-10]
 [-1.09467641e-01  1.10946764e+00 -1.81036264e-01  1.81036264e-01
   5.82431645e-10]
 [ 1.81036264e-01 -1.81036264e-01  1.29939559e+00 -2.99395590e-01
  -9.63218429e-10]
 [-1.81036264e-01  1.81036264e-01 -2.99395590e-01  1.29939559e+00
   9.63218419e-10]
 [-5.82431636e-10  5.82431641e-10 -9.63218424e-10  9.63218419e-10
   1.00000000e+00]]
P_prior[4,4] = 1.2993955894979525


f_jacob =
[[-0.00087551  0.         -0.30372786 -0.30372786  0.        ]
 [ 0.00087551 -0.266635    0.30372786  0.30372786  0.        ]
 [ 0.          0.26663

f_jacob =
[[-0.00083163  0.         -0.29702696 -0.29702696  0.        ]
 [ 0.00083163 -0.266635    0.29702696  0.29702696  0.        ]
 [ 0.          0.266635   -0.5         0.          0.        ]
 [ 0.          0.          0.5        -0.662215    0.        ]
 [ 0.          0.          0.          0.662215    0.        ]]
f_jacob[4,4] = -0.662215

P prior - in predict step =
[[ 1.10569563e+00 -1.05695627e-01  1.77922612e-01 -1.77922611e-01
  -5.72414409e-10]
 [-1.05695627e-01  1.10569563e+00 -1.77922612e-01  1.77922611e-01
   5.72414409e-10]
 [ 1.77922612e-01 -1.77922612e-01  1.29950582e+00 -2.99505823e-01
  -9.63573064e-10]
 [-1.77922611e-01  1.77922611e-01 -2.99505823e-01  1.29950582e+00
   9.63572967e-10]
 [-5.72414409e-10  5.72414414e-10 -9.63573069e-10  9.63572967e-10
   1.00000000e+00]]
P_prior[4,4] = 1.299505821893427


f_jacob =
[[-0.00082973  0.         -0.29610203 -0.29610203  0.        ]
 [ 0.00082973 -0.266635    0.29610203  0.29610203  0.        ]
 [ 0.          0.266635

f_jacob =
[[-0.00082438  0.         -0.30159519 -0.30159519  0.        ]
 [ 0.00082438 -0.266635    0.30159519  0.30159519  0.        ]
 [ 0.          0.266635   -0.5         0.          0.        ]
 [ 0.          0.          0.5        -0.662215    0.        ]
 [ 0.          0.          0.          0.662215    0.        ]]
f_jacob[4,4] = -0.662215

P prior - in predict step =
[[ 1.10893404e+00 -1.08934042e-01  1.80596450e-01 -1.80596449e-01
  -5.81016692e-10]
 [-1.08934042e-01  1.10893404e+00 -1.80596450e-01  1.80596449e-01
   5.81016682e-10]
 [ 1.80596450e-01 -1.80596450e-01  1.29940207e+00 -2.99402070e-01
  -9.63239307e-10]
 [-1.80596449e-01  1.80596449e-01 -2.99402070e-01  1.29940207e+00
   9.63239317e-10]
 [-5.81016681e-10  5.81016671e-10 -9.63239289e-10  9.63239299e-10
   1.00000000e+00]]
P_prior[4,4] = 1.2994020692741295


f_jacob =
[[-0.0008248   0.         -0.30315791 -0.30315791  0.        ]
 [ 0.0008248  -0.266635    0.30315791  0.30315791  0.        ]
 [ 0.          0.26663

f_jacob =
[[-0.0007066   0.         -0.28258384 -0.28258384  0.        ]
 [ 0.0007066  -0.266635    0.28258384  0.28258384  0.        ]
 [ 0.          0.266635   -0.5         0.          0.        ]
 [ 0.          0.          0.5        -0.662215    0.        ]
 [ 0.          0.          0.          0.662215    0.        ]]
f_jacob[4,4] = -0.662215

P prior - in predict step =
[[ 1.09583247e+00 -9.58324748e-02  1.69564676e-01 -1.69564676e-01
  -5.45525119e-10]
 [-9.58324748e-02  1.09583247e+00 -1.69564676e-01  1.69564676e-01
   5.45525139e-10]
 [ 1.69564676e-01 -1.69564676e-01  1.30002543e+00 -3.00025428e-01
  -9.65244741e-10]
 [-1.69564676e-01  1.69564676e-01 -3.00025428e-01  1.30002543e+00
   9.65244721e-10]
 [-5.45525119e-10  5.45525134e-10 -9.65244736e-10  9.65244721e-10
   1.00000000e+00]]
P_prior[4,4] = 1.3000254272225065


f_jacob =
[[-0.00067089  0.         -0.27730918 -0.27730918  0.        ]
 [ 0.00067089 -0.266635    0.27730918  0.27730918  0.        ]
 [ 0.          0.26663

f_jacob =
[[-4.31290874e-04  0.00000000e+00 -2.42076190e-01 -2.42076190e-01
   0.00000000e+00]
 [ 4.31290874e-04 -2.66635000e-01  2.42076190e-01  2.42076190e-01
   0.00000000e+00]
 [ 0.00000000e+00  2.66635000e-01 -5.00000000e-01  0.00000000e+00
   0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  5.00000000e-01 -6.62215000e-01
   0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  6.62215000e-01
   0.00000000e+00]]
f_jacob[4,4] = -0.662215

P prior - in predict step =
[[ 1.07083753e+00 -7.08375288e-02  1.46312466e-01 -1.46312466e-01
  -4.70717919e-10]
 [-7.08375288e-02  1.07083753e+00 -1.46312466e-01  1.46312466e-01
   4.70717924e-10]
 [ 1.46312466e-01 -1.46312466e-01  1.30220334e+00 -3.02203338e-01
  -9.72251518e-10]
 [-1.46312466e-01  1.46312466e-01 -3.02203338e-01  1.30220334e+00
   9.72251416e-10]
 [-4.70717919e-10  4.70717924e-10 -9.72251518e-10  9.72251416e-10
   1.00000000e+00]]
P_prior[4,4] = 1.3022033370324217


f_jacob =
[[-4.15357565e-04  0.00000000e+00 -2.4

P prior - in predict step =
[[ 1.08303810e+00 -8.30381004e-02  1.58270910e-01 -1.58270910e-01
  -5.09190786e-10]
 [-8.30381004e-02  1.08303810e+00 -1.58270910e-01  1.58270910e-01
   5.09190967e-10]
 [ 1.58270910e-01 -1.58270910e-01  1.30166491e+00 -3.01664908e-01
  -9.70519530e-10]
 [-1.58270910e-01  1.58270910e-01 -3.01664908e-01  1.30166491e+00
   9.70519251e-10]
 [-5.09190795e-10  5.09190977e-10 -9.70519549e-10  9.70519270e-10
   1.00000000e+00]]
P_prior[4,4] = 1.3016649069487214


f_jacob =
[[-3.77903924e-04  0.00000000e+00 -2.68666796e-01 -2.68666796e-01
   0.00000000e+00]
 [ 3.77903924e-04 -2.66635000e-01  2.68666796e-01  2.68666796e-01
   0.00000000e+00]
 [ 0.00000000e+00  2.66635000e-01 -5.00000000e-01  0.00000000e+00
   0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  5.00000000e-01 -6.62215000e-01
   0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  6.62215000e-01
   0.00000000e+00]]
f_jacob[4,4] = -0.662215

P prior - in predict step =
[[ 1.08701739e+00 -8

In [21]:
K0 = datetime.date(2020, 4, 18)

ny_counties, fl_counties = create_county_lists()

58

In [6]:

df_index = []
err_data = []

predictions_case_only = pd.DataFrame()
predictions_smell_0delay = pd.DataFrame()
predictions_smell_6delay = pd.DataFrame()
predictions_hh_0delay = pd.DataFrame()
predictions_hh_6delay = pd.DataFrame()

left = K0 + datetime.timedelta(days=7)
right = datetime.date(2020, 10, 23)
shifted_range = pd.date_range(start=left, end=right)
numLoop = 5

In [7]:
for each in ny_counties:
    if each == 'New York City':
        fips = 'New York City'
    else:
        fips = data_sets.get_fips('NY', each)
        if (fips not in hh_data.index.levels[0]) or (fips not in smell_data.index.levels[1]):
            continue
    
    print('Starting', fips)
    
    county_data = {}
    
    conf_mses = np.zeros(numLoop)
    for i in range(numLoop):
        predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county('NY', each, seiir_ny, smell_data, hh_data, case_data, measure=None, K0=K0, delay=0)
        conf_mses[i] = calc_mse(predicted_case.loc[left:right], case_ma7_all.loc[left:right])
    county_data['mse_confirmed-only'] = conf_mses.mean()
    predictions_case_only[fips] = predicted_case
    
    smell0_mses = np.zeros(numLoop)
    for i in range(numLoop):
        predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county('NY', each, seiir_ny, smell_data, hh_data, case_data, measure='smell', K0=K0, delay=0)
        smell0_mses[i] = calc_mse(predicted_case.loc[left:right], case_ma7_all.loc[left:right])
    county_data['mse_smell-0delay'] = smell0_mses.mean()
    predictions_smell_0delay[fips] = predicted_case
    

    smell6_mses = np.zeros(numLoop)
    for i in range(numLoop):
        predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county('NY', each, seiir_ny, smell_data, hh_data, case_data, measure='smell', K0=K0, delay=6)
        smell6_mses[i] = calc_mse(predicted_case.loc[left:right], case_ma7_all.loc[left:right])
    county_data['mse_smell-6delay'] = smell6_mses.mean()
    predictions_smell_6delay[fips] = predicted_case
    

    hh0_mses = np.zeros(numLoop)
    for i in range(numLoop):
        predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county('NY', each, seiir_ny, smell_data, hh_data, case_data, measure='hh', K0=K0, delay=0)
        hh0_mses[i] = calc_mse(predicted_case.loc[left:right], case_ma7_all.loc[left:right])
    county_data['mse_hh-0delay'] = hh0_mses.mean()
    predictions_hh_0delay[fips] = predicted_case
    

    hh6_mses = np.zeros(numLoop)
    for i in range(numLoop):
        predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county('NY', each, seiir_ny, smell_data, hh_data, case_data, measure='hh', K0=K0, delay=6)
        hh6_mses[i] = calc_mse(predicted_case.loc[left:right], case_ma7_all.loc[left:right])
    county_data['mse_hh-6delay'] = hh6_mses.mean()
    predictions_hh_6delay[fips] = predicted_case
    
    naive_pred = case_ma7_all.loc[left-datetime.timedelta(days=7):right-datetime.timedelta(days=7)].copy()
    naive_pred.index = shifted_range
    county_data['naive'] = calc_mse(naive_pred.loc[left:right], case_ma7_all.loc[left:right])
    
    df_index.append(fips)
    err_data.append(county_data)
    print('Complete with', fips)

Starting 36001
Complete with 36001
Starting 36007
Complete with 36007
Starting 36011
Complete with 36011
Starting 36013
Complete with 36013
Starting 36015
Complete with 36015
Starting 36019
Complete with 36019
Starting 36027
Complete with 36027
Starting 36029
Complete with 36029
Starting 36045
Complete with 36045
Starting 36053
Complete with 36053
Starting 36055
Complete with 36055
Starting 36059
Complete with 36059
Starting 36063




Complete with 36063
Starting 36065
Complete with 36065
Starting 36067
Complete with 36067
Starting 36069
Complete with 36069
Starting 36071
Complete with 36071
Starting 36075
Complete with 36075
Starting 36083
Complete with 36083
Starting 36087
Complete with 36087
Starting 36089
Complete with 36089
Starting 36091
Complete with 36091
Starting 36093
Complete with 36093
Starting 36101
Complete with 36101
Starting 36103
Complete with 36103
Starting 36107
Complete with 36107
Starting 36109
Complete with 36109
Starting 36111
Complete with 36111
Starting 36117
Complete with 36117
Starting 36119
Complete with 36119
Starting New York City
Complete with New York City


In [8]:
err_ny_df = pd.DataFrame(err_data, index=df_index)

In [9]:
err_ny_df.to_csv(r'output/err_ny_20201023.csv')

In [10]:
predictions_case_only.to_csv(r'output/pred_ny_case_only.csv')
predictions_smell_0delay.to_csv(r'output/pred_ny_smell_0delay.csv')
predictions_smell_6delay.to_csv(r'output/pred_ny_smell_6delay.csv')
predictions_hh_0delay.to_csv(r'output/pred_ny_hh_0delay.csv')
predictions_hh_6delay.to_csv(r'output/pred_ny_hh_6delay.csv')

In [32]:
data_sets.get_fips('FL', 'Escambia')

'12033'

In [25]:
df_index = []
err_data = []

predictions_case_only = pd.DataFrame()
predictions_smell_0delay = pd.DataFrame()
predictions_smell_6delay = pd.DataFrame()
predictions_hh_0delay = pd.DataFrame()
predictions_hh_6delay = pd.DataFrame()

left = K0 + datetime.timedelta(days=7)
right = datetime.date(2020, 10, 23)
shifted_range = pd.date_range(start=left, end=right)
numLoop = 5

In [26]:
state_2L = 'FL'

for each in fl_counties:
    fips = data_sets.get_fips(state_2L, each)
    if (fips not in hh_data.index.levels[0]) or (fips not in smell_data.index.levels[1]):
        continue
    
    print('Starting', fips)
    
    county_data = {}
    
    conf_mses = np.zeros(numLoop)
    for i in range(numLoop):
        predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county(state_2L, each, seiir_fl, smell_data, hh_data, case_data, measure=None, K0=K0, delay=0)
        conf_mses[i] = calc_mse(predicted_case.loc[left:right], case_ma7_all.loc[left:right])
    county_data['mse_confirmed-only'] = conf_mses.mean()
    predictions_case_only[fips] = predicted_case
    
    smell0_mses = np.zeros(numLoop)
    for i in range(numLoop):
        predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county(state_2L, each, seiir_fl, smell_data, hh_data, case_data, measure='smell', K0=K0, delay=0)
        smell0_mses[i] = calc_mse(predicted_case.loc[left:right], case_ma7_all.loc[left:right])
    county_data['mse_smell-0delay'] = smell0_mses.mean()
    predictions_smell_0delay[fips] = predicted_case
    

    smell6_mses = np.zeros(numLoop)
    for i in range(numLoop):
        predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county(state_2L, each, seiir_fl, smell_data, hh_data, case_data, measure='smell', K0=K0, delay=6)
        smell6_mses[i] = calc_mse(predicted_case.loc[left:right], case_ma7_all.loc[left:right])
    county_data['mse_smell-6delay'] = smell6_mses.mean()
    predictions_smell_6delay[fips] = predicted_case
    

    hh0_mses = np.zeros(numLoop)
    for i in range(numLoop):
        predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county(state_2L, each, seiir_fl, smell_data, hh_data, case_data, measure='hh', K0=K0, delay=0)
        hh0_mses[i] = calc_mse(predicted_case.loc[left:right], case_ma7_all.loc[left:right])
    county_data['mse_hh-0delay'] = hh0_mses.mean()
    predictions_hh_0delay[fips] = predicted_case
    

    hh6_mses = np.zeros(numLoop)
    for i in range(numLoop):
        predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county(state_2L, each, seiir_fl, smell_data, hh_data, case_data, measure='hh', K0=K0, delay=6)
        hh6_mses[i] = calc_mse(predicted_case.loc[left:right], case_ma7_all.loc[left:right])
    county_data['mse_hh-6delay'] = hh6_mses.mean()
    predictions_hh_6delay[fips] = predicted_case
    
    naive_pred = case_ma7_all.loc[left-datetime.timedelta(days=7):right-datetime.timedelta(days=7)].copy()
    naive_pred.index = shifted_range
    county_data['naive'] = calc_mse(naive_pred.loc[left:right], case_ma7_all.loc[left:right])
    
    df_index.append(fips)
    err_data.append(county_data)
    print('Complete with', fips)


Starting 12001
Complete with 12001
Starting 12005
Complete with 12005
Starting 12009
Complete with 12009
Starting 12011
Complete with 12011
Starting 12015
Complete with 12015
Starting 12017
Complete with 12017
Starting 12019
Complete with 12019
Starting 12021
Complete with 12021
Starting 12031
Complete with 12031
Starting 12033
Complete with 12033
Starting 12035
Complete with 12035
Starting 12053
Complete with 12053
Starting 12057
Complete with 12057
Starting 12061
Complete with 12061
Starting 12069
Complete with 12069
Starting 12071
Complete with 12071
Starting 12073
Complete with 12073
Starting 12081
Complete with 12081
Starting 12083
Complete with 12083
Starting 12085
Complete with 12085
Starting 12086
Complete with 12086
Starting 12091
Complete with 12091
Starting 12095
Complete with 12095
Starting 12097
Complete with 12097
Starting 12099
Complete with 12099
Starting 12101
Complete with 12101
Starting 12103
Complete with 12103
Starting 12105
Complete with 12105
Starting 12109
Compl

In [29]:
err_fl_df = pd.DataFrame(err_data, index=df_index)

In [30]:
err_fl_df.to_csv(r'output/err_fl_20201023.csv')

In [31]:
predictions_case_only.to_csv(r'output/pred_fl_case_only.csv')
predictions_smell_0delay.to_csv(r'output/pred_fl_smell_0delay.csv')
predictions_smell_6delay.to_csv(r'output/pred_fl_smell_6delay.csv')
predictions_hh_0delay.to_csv(r'output/pred_fl_hh_0delay.csv')
predictions_hh_6delay.to_csv(r'output/pred_fl_hh_6delay.csv')

In [65]:
county = 'Erie'
state_2l = 'NY'
county_preds = {}
predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county(state_2l, county, seiir_ny, smell_data, hh_data, case_data, measure='hh', K0=datetime.date(2020, 4, 18), delay=6)
county_preds['hh_delay6'] = predicted_case
predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county(state_2l, county, seiir_ny, smell_data, hh_data, case_data, measure='hh', K0=datetime.date(2020, 4, 18), delay=0)
county_preds['hh_delay0'] = predicted_case
predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county(state_2l, county, seiir_ny, smell_data, hh_data, case_data, measure='smell', K0=datetime.date(2020, 4, 18), delay=6)
county_preds['smell_delay6'] = predicted_case
predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county(state_2l, county, seiir_ny, smell_data, hh_data, case_data, measure='smell', K0=datetime.date(2020, 4, 18), delay=0)
county_preds['smell_delay0'] = predicted_case
predicted_case, predicted_seiir_prior, case_ma7_all, num_survey_ma7, K_vals = run_county(state_2l, county, seiir_ny, smell_data, hh_data, case_data, measure=None, K0=datetime.date(2020, 4, 18), delay=0)
county_preds['confirmed_only'] = predicted_case

In [66]:
K0 = datetime.date(2020, 4, 18)
d = datetime.date(2020, 10, 23)
tick_end = predicted_seiir_prior.index[-1]

week_interval = pd.date_range(start=K0, end=tick_end, freq='W')
week_interval = [x.to_pydatetime().date() for x in week_interval]



for each_key in county_preds.keys():
    plt.plot(county_preds[each_key].index, county_preds[each_key], label=each_key)
plt.plot(case_ma7_all.loc[K0:d].index, case_ma7_all.loc[K0:d], label='Confirmed Case Count', c=gold,
         linewidth=width)
#naieve estimate
naive_start = K0 + datetime.timedelta(days=7)
naive_d = d + datetime.timedelta(days=7)
plt.plot(case_ma7_all.loc[naive_start:naive_d].index, case_ma7_all.loc[K0:d], label='Naive Estimate', c='orange')
plt.legend(loc='upper right')

plt.title(county)
plt.ylabel('Number of Cases per Day')

Text(0, 0.5, 'Number of Cases per Day')

In [None]:
K_vals.head(50)

In [None]:
plt.plot(K_vals.index, K_vals)
plt.title('Miami-Dade hh6 K[4, 4] Val')

In [None]:
# error computations

In [None]:
type(case_ma7_all)

In [None]:
calc_mse(predicted_case.loc[left:right], case_ma7_all.loc[left:right])

In [None]:
test_df = pd.DataFrame()

In [None]:
test_df['test'] = predicted_case

In [None]:
test_df

In [None]:
output_df = pd.DataFrame()
output_df[the_county] = predicted_case

In [None]:
output_df.to_csv(r'output/predicted_case_example.csv')

# All plotting code below this

In [34]:
xlim_left = None
xlim_right = None

leftylim_low = None
leftylim_high = None

rightylim_low = None
rightylim_high = None

xtick_size = 14
xlabel_size = 14

In [35]:
# plot findings -- multiple plots

# Plotting constants and variables ----------------

plt.style.use('seaborn-whitegrid')
matplotlib.rcParams.update({'font.size': 18})
purple = '#33016F'
gold = '#9E7A27'
gray = '#797979'
width = 3
%matplotlib qt


In [36]:
K0 = datetime.date(2020, 4, 18)
tick_end = predicted_seiir_prior.index[-1]

week_interval = pd.date_range(start=K0, end=tick_end, freq='W')
week_interval = [x.to_pydatetime().date() for x in week_interval]



In [None]:
# multiple plots -----------
fig1, ax11 = plt.subplots(1)
plt.sca(ax11)
plt.plot(case_ma7_all.loc[start:d].index, case_ma7_all.loc[start:d], label='Confirmed Case Count', c=gold,
         linewidth=width)
plt.xticks(week_interval, rotation=30, ha='right', rotation_mode='anchor', fontsize=xtick_size)
plt.ylabel('Number of Cases per Day')
plt.legend(loc='upper left')
plt.ylim(leftylim_low, leftylim_high)
plt.xlim(xlim_left, xlim_right)


fig2, ax21 = plt.subplots(1)
plt.sca(ax21)
plt.plot(case_ma7_all.loc[start:d].index, case_ma7_all.loc[start:d], label='Confirmed Case Count', c=gold,
         linewidth=width)
plt.plot(predicted_seiir_prior.index, predicted_seiir_prior,
         label='IHME 7-day Forecast', c=gray, linewidth=width)
plt.xticks(week_interval, rotation=30, ha='right', rotation_mode='anchor', fontsize=xtick_size)
plt.ylabel('Number of Cases per Day')
plt.legend(loc='upper left')
plt.ylim(leftylim_low, leftylim_high)
plt.xlim(xlim_left, xlim_right)




fig3, ax31 = plt.subplots(1)
plt.sca(ax31)
plt.plot(case_ma7_all.loc[start:d].index, case_ma7_all.loc[start:d], label='Confirmed Case Count', c=gold,
         linewidth=width)
plt.plot(predicted_seiir_prior.index, predicted_seiir_prior,
         label='IHME 7-day Forecast', c=gray, linewidth=width)

ax32 = ax31.twinx()
plt.sca(ax32)
plt.plot(num_stl_ma7.loc[start:d].index, num_stl_ma7.loc[start:d], c='red', label='FB Positive Symptoms, Smell/Taste Loss',
         linewidth=width)
plt.grid(axis='y', linestyle=':')

plt.ylabel('Number of Positive Symptom Response per Day')
plt.legend(loc='upper right')
plt.ylim(rightylim_low, rightylim_high)


plt.sca(ax31)
plt.xticks(week_interval, rotation=30, ha='right', rotation_mode='anchor', fontsize=xtick_size)
plt.ylabel('Number of Cases per Day')

plt.legend(loc='upper left')
plt.ylim(leftylim_low, leftylim_high)
plt.xlim(xlim_left, xlim_right)






In [37]:
### This is the one plot

d = datetime.date(2020, 10, 23)
fig4, ax41 = plt.subplots(1)
plt.sca(ax41)
plt.plot(case_ma7_all.loc[K0:d].index, case_ma7_all.loc[K0:d], label='Confirmed Case Count', c=gold,
         linewidth=width)
plt.plot(predicted_seiir_prior.index, predicted_seiir_prior,
         label='IHME 7-day Forecast', c=gray, linewidth=width)

#naieve estimate
naive_start = K0 + datetime.timedelta(days=7)
naive_d = d + datetime.timedelta(days=7)
plt.plot(case_ma7_all.loc[naive_start:naive_d].index, case_ma7_all.loc[K0:d], label='Naive Estimate', c='orange')

ax42 = ax41.twinx()
plt.sca(ax42)
plt.plot(num_survey_ma7.loc[K0:d].index, num_survey_ma7.loc[K0:d], c='red', label='FB Positive Household Symptoms',
         linewidth=width)
plt.grid(axis='y', linestyle=':')

plt.ylabel('Number of Positive Household Symptom Response per Day')
plt.legend(loc='upper right')
plt.ylim(rightylim_low, rightylim_high)


plt.sca(ax41)
plt.plot(predicted_case.loc[K0:].index, predicted_case.loc[K0:], label='Our 7-Day Forecast',
         c=purple, linewidth=width)

plt.xticks(week_interval, rotation=30, ha='right', rotation_mode='anchor', fontsize=xtick_size)
plt.ylabel('Number of Cases per Day')

plt.legend(loc='upper left')
plt.ylim(leftylim_low, leftylim_high)
plt.xlim(xlim_left, xlim_right)

plt.title('Miami-Dade, Household Symptoms, delay of 6 days')

Text(0.5, 1.0, 'Miami-Dade, Household Symptoms, delay of 6 days')

In [None]:
case_ma7_all.loc[d]


In [None]:
plt.grid()

In [None]:
## special end date

# plot findings -- multiple plots

# Plotting constants and variables ----------------
matplotlib.rcParams.update({'font.size': 18})
plt.style.use('seaborn-whitegrid')
purple = '#33016F'
gold = '#9E7A27'
gray = '#797979'
width = 4
%matplotlib qt

tick_end = predicted_seiir_prior.index[-1]

week_interval = pd.date_range(start=start, end=tick_end, freq='W')
week_interval = [x.to_pydatetime().date() for x in week_interval]


fig1, ax11 = plt.subplots(1)
plt.sca(ax11)
plt.plot(case_ma7_all.loc[start:tick_end].index, case_ma7_all.loc[start:tick_end], label='Confirmed Case Count', c=gold,
         linewidth=width)
plt.xticks(week_interval, rotation=30, ha='right', rotation_mode='anchor', fontsize=xtick_size)
plt.ylabel('Number of Cases per Day')
plt.legend(loc='upper left')
plt.ylim(leftylim_low, leftylim_high)
plt.xlim(xlim_left, xlim_right)


fig2, ax21 = plt.subplots(1)
plt.sca(ax21)
plt.plot(case_ma7_all.loc[start:tick_end].index, case_ma7_all.loc[start:tick_end], label='Confirmed Case Count', c=gold,
         linewidth=width)
plt.plot(predicted_seiir_prior.index, predicted_seiir_prior,
         label='IHME 7-day Forecast', c=gray, linewidth=width)
plt.xticks(week_interval, rotation=30, ha='right', rotation_mode='anchor', fontsize=xtick_size)
plt.ylabel('Number of Cases per Day')
plt.legend(loc='upper left')
plt.ylim(leftylim_low, leftylim_high)
plt.xlim(xlim_left, xlim_right)




fig3, ax31 = plt.subplots(1)
plt.sca(ax31)
plt.plot(case_ma7_all.loc[start:tick_end].index, case_ma7_all.loc[start:tick_end], label='Confirmed Case Count', c=gold,
         linewidth=width)
plt.plot(predicted_seiir_prior.index, predicted_seiir_prior,
         label='IHME 7-day Forecast', c=gray, linewidth=width)

ax32 = ax31.twinx()
plt.sca(ax32)
plt.plot(num_stl_ma7.loc[start:tick_end].index, num_stl_ma7.loc[start:tick_end], c='red', label='FB Positive Symptoms, Smell/Taste Loss',
         linewidth=width)
plt.grid(axis='y', linestyle=':')

plt.ylabel('Number of Positive Symptom Response per Day')
plt.legend(loc='upper right')
plt.ylim(rightylim_low, rightylim_high)


plt.sca(ax31)
plt.xticks(week_interval, rotation=30, ha='right', rotation_mode='anchor', fontsize=xtick_size)
plt.ylabel('Number of Cases per Day')

plt.legend(loc='upper left')
plt.ylim(leftylim_low, leftylim_high)
plt.xlim(xlim_left, xlim_right)



fig4, ax41 = plt.subplots(1)
plt.sca(ax41)
plt.plot(case_ma7_all.loc[start:tick_end].index, case_ma7_all.loc[start:tick_end], label='Confirmed Case Count', c=gold,
         linewidth=width)
plt.plot(predicted_seiir_prior.index, predicted_seiir_prior,
         label='IHME 7-day Forecast', c=gray, linewidth=width)

ax42 = ax41.twinx()
plt.sca(ax42)
plt.plot(num_stl_ma7.loc[start:tick_end].index, num_stl_ma7.loc[start:tick_end], c='red', label='FB Positive Symptoms, Smell/Taste Loss',
         linewidth=width)
plt.grid(axis='y', linestyle=':')

plt.ylabel('Number of Positive Symptom Response per Day')
plt.legend(loc='upper right')
plt.ylim(rightylim_low, rightylim_high)


plt.sca(ax41)
plt.plot(predicted_case.loc[start:].index, predicted_case.loc[start:], label='Our 7-Day Forecast',
         c=purple, linewidth=width)

plt.xticks(week_interval, rotation=30, ha='right', rotation_mode='anchor', fontsize=xtick_size)
plt.ylabel('Number of Cases per Day')

plt.legend(loc='upper left')
plt.ylim(leftylim_low, leftylim_high)
plt.xlim(xlim_left, xlim_right)


In [None]:
# NYC plot settings

xlim_left = datetime.date(2020, 4, 2)
xlim_right = datetime.date(2020, 11, 3)

leftylim_low = -700
leftylim_high = 12000

rightylim_low = -5
rightylim_high = 85

xtick_size = 14
xlabel_size = 14

In [None]:
# Nassau plot settings
xlim_left = datetime.date(2020, 4, 2)
xlim_right = datetime.date(2020, 11, 3)

leftylim_low = -40
leftylim_high = 1650

rightylim_low = -.3
rightylim_high = 12

xtick_size = 14
xlabel_size = 14

In [None]:
# Westchester plot settings
xlim_left = datetime.date(2020, 4, 2)
xlim_right = datetime.date(2020, 11, 3)

leftylim_low = -28
leftylim_high = 1000

rightylim_low = -.2
rightylim_high = 7

xtick_size = 14
xlabel_size = 14

In [None]:
# Albany plot settings
xlim_left = datetime.date(2020, 4, 2)
xlim_right = datetime.date(2020, 11, 3)

leftylim_low = -3
leftylim_high = 75

rightylim_low = -.125
rightylim_high = 3.1

xtick_size = 14
xlabel_size = 14

In [None]:
# Erie plot settings
xlim_left = datetime.date(2020, 4, 2)
xlim_right = datetime.date(2020, 10, 13)

leftylim_low = -10
leftylim_high = 250

rightylim_low = -.5
rightylim_high = 12.5

xtick_size = 14
xlabel_size = 14