In [1]:
import pandas as pd
import numpy as np

In [2]:
census_df = pd.read_csv('co-est2019-alldata.csv', encoding='latin-1')
census_df = census_df[census_df.SUMLEV == 50]
census_df

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,COUNTY,STNAME,CTYNAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,...,RDOMESTICMIG2019,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015,RNETMIG2016,RNETMIG2017,RNETMIG2018,RNETMIG2019
1,50,3,6,1,1,Alabama,Autauga County,54571,54597,54773,...,4.847310,6.018182,-6.226119,-3.902226,1.970443,-1.712875,4.777171,0.849656,0.540916,4.560062
2,50,3,6,1,3,Alabama,Baldwin County,182265,182265,183112,...,24.017829,16.641870,17.488579,22.751474,20.184334,17.725964,21.279291,22.398256,24.727215,24.380567
3,50,3,6,1,5,Alabama,Barbour County,27457,27455,27327,...,-5.690302,0.292676,-6.897817,-8.132185,-5.140431,-15.724575,-18.238016,-24.998528,-8.754922,-5.165664
4,50,3,6,1,7,Alabama,Bibb County,22915,22915,22870,...,1.385134,-4.998356,-3.787545,-5.797999,1.331144,1.329817,-0.708717,-3.234669,-6.857092,1.831952
5,50,3,6,1,9,Alabama,Blount County,57322,57322,57376,...,1.020788,0.208812,-1.650165,-0.347225,-2.049590,-1.338525,-1.391062,6.193562,-0.069229,1.124597
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3188,50,4,8,56,37,Wyoming,Sweetwater County,43806,43806,43574,...,-17.605427,0.502513,15.035735,-4.614071,-13.140225,-13.762031,-18.484164,-23.515727,-18.743773,-17.065527
3189,50,4,8,56,39,Wyoming,Teton County,21294,21298,21296,...,-1.583464,-2.575509,1.905293,23.259519,12.508872,4.670450,1.253214,-0.171608,-12.432212,1.797445
3190,50,4,8,56,41,Wyoming,Uinta County,21118,21121,21089,...,-9.181105,-18.196975,-4.487730,-10.870861,-15.033634,-10.484550,-11.485101,-18.923455,-13.554993,-9.181105
3191,50,4,8,56,43,Wyoming,Washakie County,8533,8528,8530,...,-7.652085,-12.839390,-3.084589,-1.307811,-18.938032,0.000000,-15.204038,-15.950541,-17.246806,-8.289759


In [3]:
combine_fips = lambda a, b: '{:0>2}{:0>3}'.format(a, b)
census_df['FIPS'] = census_df.STATE.combine(census_df.COUNTY, combine_fips).astype(int)
census_df = census_df[['FIPS', 'POPESTIMATE2019']].set_index('FIPS')
census_df

Unnamed: 0_level_0,POPESTIMATE2019
FIPS,Unnamed: 1_level_1
1001,55869
1003,223234
1005,24686
1007,22394
1009,57826
...,...
56037,42343
56039,23464
56041,20226
56043,7805


In [4]:
county_np = np.loadtxt('county_matrix.csv', delimiter=',', dtype=int)
map_fips, fips_sq_miles = np.unique(county_np, return_counts=True)
fip_sq_mile_dict = dict(zip(map_fips, fips_sq_miles))
county_np.shape

census_df = census_df[census_df.index.isin(map_fips)].copy()
census_df['sq_miles'] = [fip_sq_mile_dict[i] for i in census_df.index]
census_df['pop_density'] = census_df.POPESTIMATE2019/census_df.sq_miles
census_df.to_csv('census.csv')
census_df

Unnamed: 0_level_0,POPESTIMATE2019,sq_miles,pop_density
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1001,55869,633,88.260664
1003,223234,1742,128.148106
1005,24686,932,26.487124
1007,22394,643,34.827372
1009,57826,682,84.788856
...,...,...,...
56037,42343,10803,3.919559
56039,23464,4348,5.396504
56041,20226,2157,9.376912
56043,7805,2328,3.352663


In [5]:
# JHU Dataset 
# https://github.com/CSSEGISandData/COVID-19
# set dates based files in COVID-19\csse_covid_19_data\csse_covid_19_daily_reports_us
dates = [f'04-{12+i:02d}-2020' for i in range(19)]
dates += [f'05-{1+i:02d}-2020' for i in range(8)]

def get_jhu_table(date):
    df = pd.read_csv(f'git/COVID-19/csse_covid_19_data/csse_covid_19_daily_reports/{date}.csv')
    df['date'] = pd.to_datetime(date)
    
    df.dropna(subset=['FIPS'], inplace=True)
    df.FIPS = df.FIPS.astype(int)
    df = df[df.FIPS.isin(census_df.index)]
    
    return df
    
jhu_corona_df = pd.concat([get_jhu_table(i) for i in dates])
jhu_corona_df

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,date
0,45001,Abbeville,South Carolina,US,2020-04-12 23:18:00,34.223334,-82.461707,9,0,0,9,"Abbeville, South Carolina, US",2020-04-12
1,22001,Acadia,Louisiana,US,2020-04-12 23:18:00,30.295065,-92.414197,99,5,0,94,"Acadia, Louisiana, US",2020-04-12
2,51001,Accomack,Virginia,US,2020-04-12 23:18:00,37.767072,-75.632346,15,0,0,15,"Accomack, Virginia, US",2020-04-12
3,16001,Ada,Idaho,US,2020-04-12 23:18:00,43.452658,-116.241552,517,6,0,511,"Ada, Idaho, US",2020-04-12
4,19001,Adair,Iowa,US,2020-04-12 23:18:00,41.330756,-94.471059,1,0,0,1,"Adair, Iowa, US",2020-04-12
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2953,4027,Yuma,Arizona,US,2020-05-09 02:32:35,32.768957,-113.906667,159,1,0,158,"Yuma, Arizona, US",2020-05-08
2954,8125,Yuma,Colorado,US,2020-05-09 02:32:35,40.003468,-102.425867,10,0,0,10,"Yuma, Colorado, US",2020-05-08
2955,48505,Zapata,Texas,US,2020-05-09 02:32:35,27.001564,-99.169872,7,0,0,7,"Zapata, Texas, US",2020-05-08
2956,48507,Zavala,Texas,US,2020-05-09 02:32:35,28.866172,-99.760508,1,0,0,1,"Zavala, Texas, US",2020-05-08


In [6]:
jhu_pivot = jhu_corona_df.pivot('FIPS', 'date').Confirmed
jhu_pivot

date,2020-04-12,2020-04-13,2020-04-14,2020-04-15,2020-04-16,2020-04-17,2020-04-18,2020-04-19,2020-04-20,2020-04-21,...,2020-04-29,2020-04-30,2020-05-01,2020-05-02,2020-05-03,2020-05-04,2020-05-05,2020-05-06,2020-05-07,2020-05-08
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,19.0,19.0,23.0,24.0,26.0,26.0,25.0,26.0,28.0,30.0,...,43.0,44.0,42.0,45.0,48.0,53.0,53.0,58.0,61.0,67.0
1003,71.0,72.0,87.0,91.0,101.0,103.0,109.0,112.0,117.0,123.0,...,174.0,174.0,175.0,181.0,187.0,188.0,189.0,196.0,205.0,208.0
1005,10.0,10.0,11.0,12.0,14.0,15.0,18.0,20.0,22.0,28.0,...,37.0,39.0,42.0,43.0,45.0,45.0,47.0,47.0,51.0,53.0
1007,16.0,17.0,17.0,18.0,22.0,24.0,26.0,28.0,32.0,32.0,...,42.0,42.0,42.0,42.0,43.0,42.0,43.0,43.0,44.0,44.0
1009,13.0,14.0,16.0,17.0,18.0,20.0,20.0,21.0,22.0,26.0,...,36.0,37.0,39.0,40.0,40.0,40.0,40.0,42.0,44.0,44.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56035,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0
56037,7.0,9.0,9.0,10.0,10.0,10.0,10.0,10.0,10.0,16.0,...,17.0,18.0,18.0,18.0,18.0,18.0,18.0,19.0,19.0,19.0
56039,56.0,56.0,57.0,58.0,59.0,61.0,62.0,62.0,62.0,92.0,...,96.0,96.0,96.0,97.0,98.0,98.0,98.0,98.0,98.0,98.0
56041,4.0,4.0,4.0,4.0,4.0,6.0,6.0,6.0,6.0,7.0,...,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,9.0


In [7]:
# NYTimes Data
# https://github.com/nytimes/covid-19-data
start_date_nyt = '2020-04-12' # Match the John Hopkins Data

nyt_corona_df = pd.read_csv('git/covid-19-data/us-counties.csv')
nyt_corona_df.date = pd.to_datetime(nyt_corona_df.date)

nyt_corona_df.rename(columns={'fips': 'FIPS'}, inplace=True)
nyt_corona_df.dropna(subset=['FIPS'], inplace=True)
nyt_corona_df.FIPS = nyt_corona_df.FIPS.astype(int)
nyt_corona_df = nyt_corona_df[nyt_corona_df.FIPS.isin(census_df.index)]

nyt_corona_df = nyt_corona_df[nyt_corona_df.date >= start_date_nyt]
nyt_corona_df

Unnamed: 0,date,county,state,FIPS,cases,deaths
51046,2020-04-12,Autauga,Alabama,1001,19,1
51047,2020-04-12,Baldwin,Alabama,1003,71,1
51048,2020-04-12,Barbour,Alabama,1005,10,0
51049,2020-04-12,Bibb,Alabama,1007,16,0
51050,2020-04-12,Blount,Alabama,1009,13,0
...,...,...,...,...,...,...
126830,2020-05-08,Sublette,Wyoming,56035,3,0
126831,2020-05-08,Sweetwater,Wyoming,56037,19,0
126832,2020-05-08,Teton,Wyoming,56039,98,1
126833,2020-05-08,Uinta,Wyoming,56041,9,0


In [8]:
nyt_pivot = nyt_corona_df.pivot('FIPS', 'date').cases
nyt_pivot

date,2020-04-12,2020-04-13,2020-04-14,2020-04-15,2020-04-16,2020-04-17,2020-04-18,2020-04-19,2020-04-20,2020-04-21,...,2020-04-29,2020-04-30,2020-05-01,2020-05-02,2020-05-03,2020-05-04,2020-05-05,2020-05-06,2020-05-07,2020-05-08
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,19.0,19.0,23.0,25.0,25.0,26.0,25.0,27.0,28.0,30.0,...,43.0,42.0,42.0,45.0,48.0,53.0,53.0,58.0,61.0,67.0
1003,71.0,78.0,87.0,98.0,102.0,103.0,109.0,114.0,117.0,123.0,...,173.0,174.0,175.0,181.0,187.0,188.0,189.0,196.0,205.0,208.0
1005,10.0,10.0,11.0,13.0,14.0,15.0,18.0,20.0,22.0,28.0,...,37.0,39.0,42.0,43.0,45.0,45.0,47.0,47.0,51.0,53.0
1007,16.0,17.0,17.0,19.0,24.0,23.0,26.0,28.0,32.0,32.0,...,42.0,42.0,42.0,42.0,43.0,42.0,43.0,43.0,44.0,44.0
1009,13.0,15.0,16.0,17.0,18.0,20.0,20.0,21.0,22.0,26.0,...,36.0,37.0,39.0,40.0,40.0,40.0,40.0,42.0,44.0,44.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56035,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0
56037,7.0,9.0,9.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,...,17.0,18.0,18.0,18.0,18.0,18.0,18.0,19.0,19.0,19.0
56039,56.0,56.0,57.0,59.0,59.0,61.0,62.0,62.0,62.0,62.0,...,95.0,96.0,96.0,97.0,98.0,98.0,98.0,98.0,98.0,98.0
56041,4.0,4.0,4.0,4.0,4.0,6.0,6.0,6.0,6.0,6.0,...,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,9.0


In [9]:
#combine John Hopkins and NYT data, prioritizing John Hopkins
take_first = lambda s1, s2: s1 if s1 is not None else s2
corona_pivot = jhu_pivot.combine(nyt_pivot, take_first)
corona_pivot = corona_pivot.bfill(axis = 1).ffill(axis = 1).dropna()
corona_pivot

date,2020-04-12,2020-04-13,2020-04-14,2020-04-15,2020-04-16,2020-04-17,2020-04-18,2020-04-19,2020-04-20,2020-04-21,...,2020-04-29,2020-04-30,2020-05-01,2020-05-02,2020-05-03,2020-05-04,2020-05-05,2020-05-06,2020-05-07,2020-05-08
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,19.0,19.0,23.0,24.0,26.0,26.0,25.0,26.0,28.0,30.0,...,43.0,44.0,42.0,45.0,48.0,53.0,53.0,58.0,61.0,67.0
1003,71.0,72.0,87.0,91.0,101.0,103.0,109.0,112.0,117.0,123.0,...,174.0,174.0,175.0,181.0,187.0,188.0,189.0,196.0,205.0,208.0
1005,10.0,10.0,11.0,12.0,14.0,15.0,18.0,20.0,22.0,28.0,...,37.0,39.0,42.0,43.0,45.0,45.0,47.0,47.0,51.0,53.0
1007,16.0,17.0,17.0,18.0,22.0,24.0,26.0,28.0,32.0,32.0,...,42.0,42.0,42.0,42.0,43.0,42.0,43.0,43.0,44.0,44.0
1009,13.0,14.0,16.0,17.0,18.0,20.0,20.0,21.0,22.0,26.0,...,36.0,37.0,39.0,40.0,40.0,40.0,40.0,42.0,44.0,44.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56035,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0
56037,7.0,9.0,9.0,10.0,10.0,10.0,10.0,10.0,10.0,16.0,...,17.0,18.0,18.0,18.0,18.0,18.0,18.0,19.0,19.0,19.0
56039,56.0,56.0,57.0,58.0,59.0,61.0,62.0,62.0,62.0,92.0,...,96.0,96.0,96.0,97.0,98.0,98.0,98.0,98.0,98.0,98.0
56041,4.0,4.0,4.0,4.0,4.0,6.0,6.0,6.0,6.0,7.0,...,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,9.0


In [10]:
# set how many days it takes for an average case to resolve
num_days = 14

In [11]:
sir_df = pd.DataFrame(corona_pivot.iloc[:, -1].rename("cases_now"))
sir_df['cases_old'] = corona_pivot.iloc[:, -num_days]
sir_df['POPULATION'] = [census_df.POPESTIMATE2019.loc[i] for i in sir_df.index]
sir_df.to_csv('corona.csv')
sir_df

Unnamed: 0_level_0,cases_now,cases_old,POPULATION
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1001,67.0,36.0,55869
1003,208.0,147.0,223234
1005,53.0,32.0,24686
1007,44.0,34.0,22394
1009,44.0,31.0,57826
...,...,...,...
56035,3.0,3.0,9831
56037,19.0,16.0,42343
56039,98.0,95.0,23464
56041,9.0,7.0,20226


In [12]:
sir_df['SUSCEPTIBLE'] = (sir_df.POPULATION - sir_df.cases_now)/sir_df.POPULATION
sir_df['INFECTED'] = (sir_df.cases_now - sir_df.cases_old)/sir_df.POPULATION
sir_df['REMOVED'] = (sir_df.cases_old)/sir_df.POPULATION
sir_df.SUSCEPTIBLE = [float('NaN') if i>1 or i<0 else i for i in sir_df.SUSCEPTIBLE]
sir_df.INFECTED = [float('NaN') if i>1 or i<0 else i for i in sir_df.INFECTED]
sir_df.REMOVED = [float('NaN') if i>1 or i<0 else i for i in sir_df.REMOVED]
sir_df.dropna(inplace=True)
sir_df

Unnamed: 0_level_0,cases_now,cases_old,POPULATION,SUSCEPTIBLE,INFECTED,REMOVED
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1001,67.0,36.0,55869,0.998801,0.000555,0.000644
1003,208.0,147.0,223234,0.999068,0.000273,0.000659
1005,53.0,32.0,24686,0.997853,0.000851,0.001296
1007,44.0,34.0,22394,0.998035,0.000447,0.001518
1009,44.0,31.0,57826,0.999239,0.000225,0.000536
...,...,...,...,...,...,...
56035,3.0,3.0,9831,0.999695,0.000000,0.000305
56037,19.0,16.0,42343,0.999551,0.000071,0.000378
56039,98.0,95.0,23464,0.995823,0.000128,0.004049
56041,9.0,7.0,20226,0.999555,0.000099,0.000346


In [13]:
# calculate number of people infected everyday
corona_pivot_diff = corona_pivot.diff(axis=1)
corona_pivot_diff.dropna(axis=1, how='all', inplace=True)
corona_pivot_diff

date,2020-04-13,2020-04-14,2020-04-15,2020-04-16,2020-04-17,2020-04-18,2020-04-19,2020-04-20,2020-04-21,2020-04-22,...,2020-04-29,2020-04-30,2020-05-01,2020-05-02,2020-05-03,2020-05-04,2020-05-05,2020-05-06,2020-05-07,2020-05-08
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,0.0,4.0,1.0,2.0,0.0,-1.0,1.0,2.0,2.0,2.0,...,3.0,1.0,-2.0,3.0,3.0,5.0,0.0,5.0,3.0,6.0
1003,1.0,15.0,4.0,10.0,2.0,6.0,3.0,5.0,6.0,9.0,...,3.0,0.0,1.0,6.0,6.0,1.0,1.0,7.0,9.0,3.0
1005,0.0,1.0,1.0,2.0,1.0,3.0,2.0,2.0,6.0,1.0,...,0.0,2.0,3.0,1.0,2.0,0.0,2.0,0.0,4.0,2.0
1007,1.0,0.0,1.0,4.0,2.0,2.0,2.0,4.0,0.0,2.0,...,0.0,0.0,0.0,0.0,1.0,-1.0,1.0,0.0,1.0,0.0
1009,1.0,2.0,1.0,1.0,2.0,0.0,1.0,1.0,4.0,3.0,...,2.0,1.0,2.0,1.0,0.0,0.0,0.0,2.0,2.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56035,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
56037,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
56039,0.0,1.0,1.0,1.0,2.0,1.0,0.0,0.0,30.0,1.0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
56041,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [14]:
# normalize by dividing by population
corona_pivot_diff_pct = corona_pivot_diff.apply(lambda x: x/[census_df.POPESTIMATE2019.loc[i] for i in x.index])
corona_pivot_diff_pct

date,2020-04-13,2020-04-14,2020-04-15,2020-04-16,2020-04-17,2020-04-18,2020-04-19,2020-04-20,2020-04-21,2020-04-22,...,2020-04-29,2020-04-30,2020-05-01,2020-05-02,2020-05-03,2020-05-04,2020-05-05,2020-05-06,2020-05-07,2020-05-08
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,0.000000,0.000072,0.000018,0.000036,0.000000,-0.000018,0.000018,0.000036,0.000036,0.000036,...,0.000054,0.000018,-0.000036,0.000054,0.000054,0.000089,0.000000,0.000089,0.000054,0.000107
1003,0.000004,0.000067,0.000018,0.000045,0.000009,0.000027,0.000013,0.000022,0.000027,0.000040,...,0.000013,0.000000,0.000004,0.000027,0.000027,0.000004,0.000004,0.000031,0.000040,0.000013
1005,0.000000,0.000041,0.000041,0.000081,0.000041,0.000122,0.000081,0.000081,0.000243,0.000041,...,0.000000,0.000081,0.000122,0.000041,0.000081,0.000000,0.000081,0.000000,0.000162,0.000081
1007,0.000045,0.000000,0.000045,0.000179,0.000089,0.000089,0.000089,0.000179,0.000000,0.000089,...,0.000000,0.000000,0.000000,0.000000,0.000045,-0.000045,0.000045,0.000000,0.000045,0.000000
1009,0.000017,0.000035,0.000017,0.000017,0.000035,0.000000,0.000017,0.000017,0.000069,0.000052,...,0.000035,0.000017,0.000035,0.000017,0.000000,0.000000,0.000000,0.000035,0.000035,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56035,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000203,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
56037,0.000047,0.000000,0.000024,0.000000,0.000000,0.000000,0.000000,0.000000,0.000142,0.000000,...,0.000024,0.000024,0.000000,0.000000,0.000000,0.000000,0.000000,0.000024,0.000000,0.000000
56039,0.000000,0.000043,0.000043,0.000043,0.000085,0.000043,0.000000,0.000000,0.001279,0.000043,...,0.000000,0.000000,0.000000,0.000043,0.000043,0.000000,0.000000,0.000000,0.000000,0.000000
56041,0.000000,0.000000,0.000000,0.000000,0.000099,0.000000,0.000000,0.000000,0.000049,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000049


In [15]:
# Calculate average change 

def func(x):
    change_pcts = [i for i in x if i != 0]
    avg = np.sum(change_pcts)/len(x)
    
    # drop samples without too few changes or negative change
    if len(change_pcts) < 3 or avg <= 1e-9:
        return float("NaN")
    
    return avg

average_row = lambda x: func(x)

hyperparam_df = pd.DataFrame(corona_pivot_diff_pct.iloc[:, -num_days:].T.apply(average_row).rename("beta"))
hyperparam_df['gamma'] = corona_pivot_diff_pct.iloc[:, :-num_days].T.apply(average_row)
hyperparam_df = hyperparam_df.dropna(how='all')
display(hyperparam_df)

corona_quartiles = pd.concat([sir_df.describe(), hyperparam_df.describe()], axis=1)
corona_quartiles

Unnamed: 0_level_0,beta,gamma
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1
1001,0.000040,0.000025
1003,0.000020,0.000028
1005,0.000061,0.000074
1007,0.000032,0.000067
1009,0.000016,0.000026
...,...,...
56021,0.000031,0.000052
56025,,0.000017
56037,0.000005,0.000018
56039,0.000009,0.000139


Unnamed: 0,cases_now,cases_old,POPULATION,SUSCEPTIBLE,INFECTED,REMOVED,beta,gamma
count,2852.0,2852.0,2852.0,2852.0,2852.0,2852.0,1975.0,1838.0
mean,445.52244,325.243689,111316.6,0.997838,0.000796,0.001366,8.6e-05,7.423255e-05
std,3964.839621,3288.52771,341843.7,0.005102,0.003019,0.003295,0.000266,0.0001931977
min,0.0,0.0,463.0,0.879918,0.0,0.0,1e-06,7.54339e-07
25%,7.0,4.0,13685.75,0.9979,6.6e-05,0.000274,1.5e-05,1.451147e-05
50%,28.0,18.0,29337.0,0.999131,0.000244,0.00057,3.5e-05,3.027899e-05
75%,123.0,80.0,76598.5,0.999586,0.000724,0.001317,8.2e-05,7.232858e-05
max,181783.0,155113.0,10039110.0,1.0,0.115916,0.095237,0.008394,0.004453317


In [16]:
fip_pop_density_pct_dict = census_df.pop_density.rank(pct=True).to_dict()

fip_s_dict = sir_df.SUSCEPTIBLE.to_dict()
fip_i_dict = sir_df.INFECTED.to_dict()
fip_r_dict = sir_df.REMOVED.to_dict()
fip_beta_dict = hyperparam_df.beta.dropna().to_dict()
fip_gamma_dict = hyperparam_df.gamma.dropna().to_dict()

def generate_data(fip, mode, dict1, dict2):
    try:
        return dict1[fip]
    except KeyError:
        # Generate default values based on infection rate percentile
        return dict2[fip]*(corona_quartiles[mode]['75%']-corona_quartiles[mode]['25%']) + corona_quartiles[mode]['25%']
    
matrix_df = census_df.copy()
matrix_df['SUSCEPTIBLE'] = [generate_data(i, 'SUSCEPTIBLE', fip_s_dict, fip_pop_density_pct_dict) for i in matrix_df.index]
matrix_df['INFECTED'] = [generate_data(i, 'INFECTED', fip_i_dict, fip_pop_density_pct_dict) for i in matrix_df.index]
matrix_df['REMOVED'] = [generate_data(i, 'REMOVED', fip_r_dict, fip_pop_density_pct_dict) for i in matrix_df.index]

inf_dict = matrix_df.INFECTED.rank(pct=True).to_dict()
rem_dict = matrix_df.REMOVED.rank(pct=True).to_dict()

matrix_df['beta'] = [generate_data(i, 'beta', fip_beta_dict, inf_dict) for i in matrix_df.index]
matrix_df['gamma'] = [generate_data(i, 'gamma', fip_gamma_dict, rem_dict) for i in matrix_df.index]

matrix_df['US'] = 1
matrix_df.drop(columns=['POPESTIMATE2019'], inplace=True)

matrix_df = matrix_df[['beta', 'gamma', 'SUSCEPTIBLE', 'INFECTED', 'REMOVED', 'US']]
display(matrix_df.describe())
display(matrix_df)
matrix_df.to_csv('params.csv')

Unnamed: 0,beta,gamma,SUSCEPTIBLE,INFECTED,REMOVED,US
count,3108.0,3108.0,3108.0,3108.0,3108.0,3108.0
mean,6.6e-05,5.657647e-05,0.997866,0.000745,0.00129,1.0
std,0.000214,0.0001502648,0.00489,0.002897,0.003167,0.0
min,1e-06,7.54339e-07,0.879918,0.0,0.0,1.0
25%,2e-05,1.802508e-05,0.997943,7.2e-05,0.000287,1.0
50%,3.1e-05,2.950245e-05,0.999029,0.000218,0.000535,1.0
75%,5.5e-05,4.846908e-05,0.999566,0.000646,0.00121,1.0
max,0.008394,0.004453317,1.0,0.115916,0.095237,1.0


Unnamed: 0_level_0,beta,gamma,SUSCEPTIBLE,INFECTED,REMOVED,US
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1001,0.000040,0.000025,0.998801,0.000555,0.000644,1
1003,0.000020,0.000028,0.999068,0.000273,0.000659,1
1005,0.000061,0.000074,0.997853,0.000851,0.001296,1
1007,0.000032,0.000067,0.998035,0.000447,0.001518,1
1009,0.000016,0.000026,0.999239,0.000225,0.000536,1
...,...,...,...,...,...,...
56037,0.000005,0.000018,0.999551,0.000071,0.000378,1
56039,0.000009,0.000139,0.995823,0.000128,0.004049,1
56041,0.000036,0.000034,0.999555,0.000099,0.000346,1
56043,0.000020,0.000032,0.998975,0.000000,0.001025,1


### Combine param matrix with county matrix and export

In [17]:
matrix_df = pd.read_csv('params.csv').set_index('FIPS')
matrix_df

Unnamed: 0_level_0,beta,gamma,SUSCEPTIBLE,INFECTED,REMOVED,US
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1001,0.000040,0.000025,0.998801,0.000555,0.000644,1
1003,0.000020,0.000028,0.999068,0.000273,0.000659,1
1005,0.000061,0.000074,0.997853,0.000851,0.001296,1
1007,0.000032,0.000067,0.998035,0.000447,0.001518,1
1009,0.000016,0.000026,0.999239,0.000225,0.000536,1
...,...,...,...,...,...,...
56037,0.000005,0.000018,0.999551,0.000071,0.000378,1
56039,0.000009,0.000139,0.995823,0.000128,0.004049,1
56041,0.000036,0.000034,0.999555,0.000099,0.000346,1
56043,0.000020,0.000032,0.998975,0.000000,0.001025,1


In [18]:
matrix_dict = dict(zip(matrix_df.index, matrix_df.to_numpy()))
matrix_dict

{1001: array([3.96335305e-05, 2.53569362e-05, 9.98800766e-01, 5.54869427e-04,
        6.44364496e-04, 1.00000000e+00]),
 1003: array([1.95182761e-05, 2.83708276e-05, 9.99068242e-01, 2.73255866e-04,
        6.58501841e-04, 1.00000000e+00]),
 1005: array([6.07631856e-05, 7.42661157e-05, 9.97853034e-01, 8.50684599e-04,
        1.29628129e-03, 1.00000000e+00]),
 1007: array([3.18962988e-05, 6.69822274e-05, 9.98035188e-01, 4.46548183e-04,
        1.51826382e-03, 1.00000000e+00]),
 1009: array([1.60580263e-05, 2.59398886e-05, 9.99239097e-01, 2.24812368e-04,
        5.36091032e-04, 1.00000000e+00]),
 1011: array([6.36429208e-05, 6.60000660e-05, 9.97920998e-01, 8.91000891e-04,
        1.18800119e-03, 1.00000000e+00]),
 1013: array([5.17864488e-04, 5.99890306e-05, 9.91670095e-01, 7.25010284e-03,
        1.07980255e-03, 1.00000000e+00]),
 1015: array([2.13773287e-05, 2.20060737e-05, 9.98917301e-01, 2.99282602e-04,
        7.83416223e-04, 1.00000000e+00]),
 1017: array([6.22911100e-05, 2.10500992

In [19]:
county_np = np.loadtxt('county_matrix.csv', delimiter=',', dtype=int)
county_np.shape

(1792, 2944)

In [20]:
matrix = np.zeros([county_np.shape[0], county_np.shape[1], 6])

for i in range(county_np.shape[0]):
    for j in range(county_np.shape[1]):
        try:
            # set parameter values based on the county
            matrix[i, j] = matrix_dict[county_np[i, j]]
            # Add noise to the matrix
            matrix[i, j, :2] = matrix[i, j, :2] * np.random.uniform(0.9, 1.1, 2)
        except KeyError:
            pass
        
matrix.shape

(1792, 2944, 6)

In [21]:
np.save('matrix', matrix)