In [1]:
import os
import numpy as np
import pandas as pd
import addfips
from src.utils.paths import get_parent_dir
from linearmodels.panel import PooledOLS, PanelOLS
import statsmodels.api as sm

### 1) Prepare data

In [2]:
def read_csse(path):
    df = pd.read_csv(path)
    df = df.set_index("Unnamed: 0")
    df.index = pd.to_datetime(df.index)
    return df

pdir = get_parent_dir(2)

Read health data

In [3]:
health_dir = os.path.join(pdir, 'data', 'raw', 'health')
sahie = pd.read_csv(os.path.join(health_dir, "SAHIE_2017.csv"),
                    header=68,
                    sep=',')

print(sahie.head())
print(sahie.shape)

   year   version  statefips  countyfips  geocat  agecat  racecat  sexcat  \
0  2017                    1           0      40       0        0       0   
1  2017                    1           0      40       0        0       0   
2  2017                    1           0      40       0        0       0   
3  2017                    1           0      40       0        0       0   
4  2017                    1           0      40       0        0       0   

   iprcat     NIPR  ... pctui_moe PCTIC pctic_moe PCTELIG pctelig_moe PCTLIIC  \
0       0  3966117  ...       0.3    89       0.3      11         0.3      89   
1       1  1487986  ...       0.6  81.7       0.6     6.9         0.2    30.6   
2       2  1836277  ...       0.5  82.7       0.5       8         0.3    38.3   
3       3  1014380  ...       0.7  80.6       0.7       5         0.2    20.6   
4       4  2701984  ...       0.4  85.5       0.4     9.8         0.3    58.3   

  pctliic_moe                                     

  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
#   prepare
sahie = sahie.drop(columns=['Unnamed: 25', 'year', 'version',
                            'statefips', 'countyfips', 'geocat'])
sahie.columns

Index(['agecat', 'racecat', 'sexcat', 'iprcat', 'NIPR', 'nipr_moe', 'NUI',
       'nui_moe', 'NIC', 'nic_moe', 'PCTUI', 'pctui_moe', 'PCTIC', 'pctic_moe',
       'PCTELIG', 'pctelig_moe', 'PCTLIIC', 'pctliic_moe', 'state_name',
       'county_name'],
      dtype='object')

In [5]:
# sahie.head()
# sahie_long = sahie.stack().reset_index()
# sahie_long.drop(columns=['level_0'], inplace=True)

In [6]:
# sahie_long.columns

Index(['level_1', 0], dtype='object')

In [7]:
# sahie_long.rename(columns={'level_1': 'variable',
#                            0: 'measurement'}, inplace=True)
# 

In [8]:
# sahie_pivot = sahie_long.pivot(columns='variable', values='measurement')

In [5]:
cols = sahie.columns.to_list()
cols = cols[-2:] + cols[:-2]
sahie = sahie[cols]

In [6]:
# remove whitespace
sahie = sahie.apply(lambda s : s.str.strip() if s.dtype == "object" else s)

In [12]:
# query for county data only
sahie_county_data_only = sahie.query("county_name != ''")
sahie_county_data_only.reset_index(drop=True, inplace=True)

In [91]:
sahie_cleaned = pd.get_dummies(sahie_county_data_only, 
               columns=['sexcat', 'iprcat', 'agecat', 'racecat'])

In [92]:
numeric_cols = ['NIPR', 'nipr_moe', 'NUI', 'nui_moe',
       'NIC', 'nic_moe', 'PCTUI', 'pctui_moe', 'PCTIC', 'pctic_moe', 'PCTELIG',
       'pctelig_moe', 'PCTLIIC', 'pctliic_moe']
for col in numeric_cols:
    sahie_cleaned[col] = sahie_cleaned[col].replace('.', np.nan).astype(float)
sahie_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 301632 entries, 0 to 301631
Data columns (total 32 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   state_name   301632 non-null  object 
 1   county_name  301632 non-null  object 
 2   NIPR         30110 non-null   float64
 3   nipr_moe     30110 non-null   float64
 4   NUI          30110 non-null   float64
 5   nui_moe      30110 non-null   float64
 6   NIC          30110 non-null   float64
 7   nic_moe      30110 non-null   float64
 8   PCTUI        30110 non-null   float64
 9   pctui_moe    30110 non-null   float64
 10  PCTIC        30110 non-null   float64
 11  pctic_moe    30110 non-null   float64
 12  PCTELIG      30110 non-null   float64
 13  pctelig_moe  30110 non-null   float64
 14  PCTLIIC      30110 non-null   float64
 15  pctliic_moe  30110 non-null   float64
 16  sexcat_0     301632 non-null  uint8  
 17  sexcat_1     301632 non-null  uint8  
 18  sexcat_2     301632 non-

In [93]:
sahie_cleaned = sahie_cleaned.groupby(['county_name'], as_index=False).first()
sahie_cleaned.rename(columns={'county_name': 'county',
                              'state_name': 'state'},
                     inplace=True)


In [94]:
# add fips
af = addfips.AddFIPS()

sahie_county_fips_codes = []
for i, row in sahie_cleaned.iterrows():
    county_fips_code = af.get_county_fips(county=row.county, state=row.state)
    sahie_county_fips_codes.append(county_fips_code)
sahie_county_fips_codes

sahie_cleaned['FIPS'] = sahie_county_fips_codes
sahie_county_fips_codes

['45001',
 '22001',
 '51001',
 '16001',
 '19001',
 '08001',
 '50001',
 '45003',
 '27001',
 '12001',
 '37001',
 '06001',
 '08003',
 '36001',
 '51003',
 '26001',
 '28003',
 '02013',
 '02016',
 '17003',
 '51510',
 '40003',
 '26003',
 '19005',
 '26005',
 '24001',
 '37005',
 '42003',
 '18003',
 '22003',
 '45005',
 '26007',
 '06003',
 '06005',
 '51007',
 '51009',
 '28005',
 None,
 '20003',
 '29003',
 '48003',
 '23001',
 '48005',
 '24003',
 '27003',
 '37007',
 '31003',
 '26009',
 '04001',
 '19007',
 '13001',
 '51011',
 '48007',
 '08005',
 '48009',
 '08007',
 '26011',
 '05001',
 '51013',
 '42005',
 '23003',
 '31005',
 '22005',
 '37009',
 '39005',
 '05003',
 '39007',
 '53003',
 '22007',
 '48013',
 '20005',
 '39009',
 '13003',
 '34001',
 '40005',
 '28007',
 '29007',
 '19009',
 '39011',
 '51015',
 '46003',
 '48015',
 '01001',
 '37011',
 '22009',
 '08009',
 '13005',
 '48017',
 '12003',
 '01003',
 '21007',
 '24005',
 '24510',
 '45009',
 '48019',
 '13011',
 '31007',
 '16005',
 '26013',
 '20007',
 '0

In [95]:
#sahie_cleaned[sahie_cleaned['county'] == "Anchorage Borough"]['FIPS'] = '02020'
#print(sahie_cleaned['FIPS'].head(40))
sahie_cleaned.head(40)

Unnamed: 0,county,state,NIPR,nipr_moe,NUI,nui_moe,NIC,nic_moe,PCTUI,pctui_moe,...,iprcat_4,iprcat_5,agecat_0,agecat_1,agecat_2,agecat_3,agecat_4,agecat_5,racecat_0,FIPS
0,Abbeville County,South Carolina,,,,,,,,,...,0,0,1,0,0,0,0,0,1,45001.0
1,Acadia Parish,Louisiana,,,,,,,,,...,0,0,1,0,0,0,0,0,1,22001.0
2,Accomack County,Virginia,,,,,,,,,...,0,0,1,0,0,0,0,0,1,51001.0
3,Ada County,Idaho,388515.0,0.0,33969.0,2623.0,354546.0,2623.0,8.7,0.7,...,0,0,1,0,0,0,0,0,1,16001.0
4,Adair County,Iowa,,,,,,,,,...,0,0,1,0,0,0,0,0,1,19001.0
5,Adams County,Colorado,2934.0,0.0,422.0,53.0,2512.0,53.0,14.4,1.8,...,0,0,1,0,0,0,0,0,1,8001.0
6,Addison County,Vermont,,,,,,,,,...,0,0,1,0,0,0,0,0,1,50001.0
7,Aiken County,South Carolina,,,,,,,,,...,0,0,1,0,0,0,0,0,1,45003.0
8,Aitkin County,Minnesota,,,,,,,,,...,0,0,1,0,0,0,0,0,1,27001.0
9,Alachua County,Florida,217434.0,0.0,25800.0,2358.0,191634.0,2358.0,11.9,1.1,...,0,0,1,0,0,0,0,0,1,12001.0


In [98]:
# drop Anchorage Borough with missing FIPS, deal with that later
sahie_cleaned = sahie_cleaned.drop([37])

Unnamed: 0,county,state,NIPR,nipr_moe,NUI,nui_moe,NIC,nic_moe,PCTUI,pctui_moe,...,iprcat_4,iprcat_5,agecat_0,agecat_1,agecat_2,agecat_3,agecat_4,agecat_5,racecat_0,FIPS
0,Abbeville County,South Carolina,,,,,,,,,...,0,0,1,0,0,0,0,0,1,45001
1,Acadia Parish,Louisiana,,,,,,,,,...,0,0,1,0,0,0,0,0,1,22001
2,Accomack County,Virginia,,,,,,,,,...,0,0,1,0,0,0,0,0,1,51001
3,Ada County,Idaho,388515.0,0.0,33969.0,2623.0,354546.0,2623.0,8.7,0.7,...,0,0,1,0,0,0,0,0,1,16001
4,Adair County,Iowa,,,,,,,,,...,0,0,1,0,0,0,0,0,1,19001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1872,Yukon-Koyukuk Census Area,Alaska,,,,,,,,,...,0,0,1,0,0,0,0,0,1,02290
1873,Yuma County,Arizona,,,,,,,,,...,0,0,1,0,0,0,0,0,1,04027
1874,Zapata County,Texas,,,,,,,,,...,0,0,1,0,0,0,0,0,1,48505
1875,Zavala County,Texas,,,,,,,,,...,0,0,1,0,0,0,0,0,1,48507


In [99]:
sahie_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1877 entries, 0 to 1876
Data columns (total 33 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   county       1877 non-null   object 
 1   state        1877 non-null   object 
 2   NIPR         274 non-null    float64
 3   nipr_moe     274 non-null    float64
 4   NUI          274 non-null    float64
 5   nui_moe      274 non-null    float64
 6   NIC          274 non-null    float64
 7   nic_moe      274 non-null    float64
 8   PCTUI        274 non-null    float64
 9   pctui_moe    274 non-null    float64
 10  PCTIC        274 non-null    float64
 11  pctic_moe    274 non-null    float64
 12  PCTELIG      274 non-null    float64
 13  pctelig_moe  274 non-null    float64
 14  PCTLIIC      274 non-null    float64
 15  pctliic_moe  274 non-null    float64
 16  sexcat_0     1877 non-null   uint8  
 17  sexcat_1     1877 non-null   uint8  
 18  sexcat_2     1877 non-null   uint8  
 19  iprcat

In [None]:
csse_dir = os.path.join(pdir, 'data', 'processed', 'csse', 'US')

fname_confirmed = "time_series_covid19_confirmed_US_timeseries.csv" 
fname_deaths = "time_series_covid19_deaths_US_timeseries.csv"

path_confirmed = os.path.join(csse_dir, fname_confirmed)
path_deaths = os.path.join(csse_dir, fname_deaths)

In [25]:
ts_confirmed = read_csse(path_confirmed)
ts_deaths = read_csse(path_deaths)

ts_confirmed.index.name = 'time'
ts_deaths.index.name = 'time'

In [82]:
demographic_dir = os.path.join(pdir, 'data', 'raw', 'demography')
popdata = pd.read_csv(os.path.join(demographic_dir, 
                                   "POPEST_2019.csv"),
                      encoding = "ISO-8859-1")

# POPESTIMATE2019: 7/1/2019 resident total population estimate
df_pop = popdata[['STNAME', 'CTYNAME', 'POPESTIMATE2019']]
df_pop_counties = df_pop.query("STNAME != CTYNAME")
df_pop_counties = df_pop_counties.rename(columns={'STNAME': 'state',
                                                  'CTYNAME': 'county',
                                                  'POPESTIMATE2019': 'pop2019_county'})

df_pop_states = df_pop.query("STNAME == CTYNAME")
df_pop_states = df_pop_states.reset_index(drop=True)
df_pop_states = df_pop_states.rename(columns={'STNAME': 'state',
                                              'CTYNAME': 'county',
                                              'POPESTIMATE2019': 'pop2019_state'})
df_pop_states.drop(columns='county', inplace=True)

af = addfips.AddFIPS()

county_fips_codes = []
for i, row in df_pop_counties.iterrows():
    county_fips_code = af.get_county_fips(county=row.county, state=row.state)
    county_fips_codes.append(county_fips_code)
    
state_fips_codes = []
for i, row in df_pop_states.iterrows():
    state_fips_code = af.get_state_fips(state=row.state)
    state_fips_codes.append(state_fips_code)
    
df_pop_counties['FIPS'] = county_fips_codes
df_pop_states['FIPS_state'] = state_fips_codes
print(df_pop_states.head())
print(df_pop_counties.head())

        state  pop2019_state FIPS_state
0     Alabama        4903185         01
1      Alaska         731545         02
2     Arizona        7278717         04
3    Arkansas        3017804         05
4  California       39512223         06
     state          county  pop2019_county   FIPS
1  Alabama  Autauga County           55869  01001
2  Alabama  Baldwin County          223234  01003
3  Alabama  Barbour County           24686  01005
4  Alabama     Bibb County           22394  01007
5  Alabama   Blount County           57826  01009


In [81]:
df_pop_counties

Unnamed: 0,state,county,pop2019_county,FIPS
1,Alabama,Autauga County,55869,01001
2,Alabama,Baldwin County,223234,01003
3,Alabama,Barbour County,24686,01005
4,Alabama,Bibb County,22394,01007
5,Alabama,Blount County,57826,01009
...,...,...,...,...
3188,Wyoming,Sweetwater County,42343,56037
3189,Wyoming,Teton County,23464,56039
3190,Wyoming,Uinta County,20226,56041
3191,Wyoming,Washakie County,7805,56043


In [83]:
ts_confirmedT = ts_confirmed.transpose()
ts_confirmedT.index.name = "FIPS"
tsconfm = ts_confirmedT.stack()

In [84]:
tsconfm.name = "confirmed_cases" 
tsconfm = tsconfm.reset_index()

### 2) Merge independent and dependent variables 

In [85]:
df_merged = pd.merge(left=tsconfm.reset_index(),
                     right=df_pop_counties,
                     on='FIPS')
df_merged = pd.merge(left=df_merged,
                     right=df_pop_states,
                     on='state')

In [100]:
# merge sahie
df_merged = pd.merge(left=df_merged,
                     right=sahie_cleaned,
                     on='FIPS')

In [102]:
df_merged.drop(columns='index', inplace=True)


In [103]:
df_merged

Unnamed: 0,FIPS,time,confirmed_cases,state_x,county_x,pop2019_county,pop2019_state,FIPS_state,county_y,state_y,...,iprcat_3,iprcat_4,iprcat_5,agecat_0,agecat_1,agecat_2,agecat_3,agecat_4,agecat_5,racecat_0
0,10001,2020-01-22,0,Delaware,Kent County,180786,973764,10,Kent County,Delaware,...,0,0,0,1,0,0,0,0,0,1
1,10001,2020-01-23,0,Delaware,Kent County,180786,973764,10,Kent County,Delaware,...,0,0,0,1,0,0,0,0,0,1
2,10001,2020-01-24,0,Delaware,Kent County,180786,973764,10,Kent County,Delaware,...,0,0,0,1,0,0,0,0,0,1
3,10001,2020-01-25,0,Delaware,Kent County,180786,973764,10,Kent County,Delaware,...,0,0,0,1,0,0,0,0,0,1
4,10001,2020-01-26,0,Delaware,Kent County,180786,973764,10,Kent County,Delaware,...,0,0,0,1,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115554,56045,2020-03-30,0,Wyoming,Weston County,6927,578759,56,Weston County,Wyoming,...,0,0,0,1,0,0,0,0,0,1
115555,56045,2020-03-31,0,Wyoming,Weston County,6927,578759,56,Weston County,Wyoming,...,0,0,0,1,0,0,0,0,0,1
115556,56045,2020-04-01,0,Wyoming,Weston County,6927,578759,56,Weston County,Wyoming,...,0,0,0,1,0,0,0,0,0,1
115557,56045,2020-04-02,0,Wyoming,Weston County,6927,578759,56,Weston County,Wyoming,...,0,0,0,1,0,0,0,0,0,1


### 3) Construct panel using pandas Multi-index 

In [104]:
# fips => entity FE, time => time FE
panel = df_merged.set_index(['FIPS', 'time'])

In [105]:
# county pop share with respect to state pop
# TODO: meaningful? 
panel['county_pop_share_2019'] = \
    panel['pop2019_county'].divide(panel['pop2019_state'])

Unnamed: 0_level_0,Unnamed: 1_level_0,confirmed_cases,state_x,county_x,pop2019_county,pop2019_state,FIPS_state,county_y,state_y,NIPR,nipr_moe,...,iprcat_4,iprcat_5,agecat_0,agecat_1,agecat_2,agecat_3,agecat_4,agecat_5,racecat_0,county_pop_share_2019
FIPS,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
10001,2020-01-22,0,Delaware,Kent County,180786,973764,10,Kent County,Delaware,143160.0,0.0,...,0,0,1,0,0,0,0,0,1,0.185657
10001,2020-01-23,0,Delaware,Kent County,180786,973764,10,Kent County,Delaware,143160.0,0.0,...,0,0,1,0,0,0,0,0,1,0.185657
10001,2020-01-24,0,Delaware,Kent County,180786,973764,10,Kent County,Delaware,143160.0,0.0,...,0,0,1,0,0,0,0,0,1,0.185657
10001,2020-01-25,0,Delaware,Kent County,180786,973764,10,Kent County,Delaware,143160.0,0.0,...,0,0,1,0,0,0,0,0,1,0.185657
10001,2020-01-26,0,Delaware,Kent County,180786,973764,10,Kent County,Delaware,143160.0,0.0,...,0,0,1,0,0,0,0,0,1,0.185657
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56045,2020-03-30,0,Wyoming,Weston County,6927,578759,56,Weston County,Wyoming,,,...,0,0,1,0,0,0,0,0,1,0.011969
56045,2020-03-31,0,Wyoming,Weston County,6927,578759,56,Weston County,Wyoming,,,...,0,0,1,0,0,0,0,0,1,0.011969
56045,2020-04-01,0,Wyoming,Weston County,6927,578759,56,Weston County,Wyoming,,,...,0,0,1,0,0,0,0,0,1,0.011969
56045,2020-04-02,0,Wyoming,Weston County,6927,578759,56,Weston County,Wyoming,,,...,0,0,1,0,0,0,0,0,1,0.011969


In [108]:
panel

Unnamed: 0_level_0,Unnamed: 1_level_0,confirmed_cases,state_x,county_x,pop2019_county,pop2019_state,FIPS_state,county_y,state_y,NIPR,nipr_moe,...,iprcat_4,iprcat_5,agecat_0,agecat_1,agecat_2,agecat_3,agecat_4,agecat_5,racecat_0,county_pop_share_2019
FIPS,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
10001,2020-01-22,0,Delaware,Kent County,180786,973764,10,Kent County,Delaware,143160.0,0.0,...,0,0,1,0,0,0,0,0,1,0.185657
10001,2020-01-23,0,Delaware,Kent County,180786,973764,10,Kent County,Delaware,143160.0,0.0,...,0,0,1,0,0,0,0,0,1,0.185657
10001,2020-01-24,0,Delaware,Kent County,180786,973764,10,Kent County,Delaware,143160.0,0.0,...,0,0,1,0,0,0,0,0,1,0.185657
10001,2020-01-25,0,Delaware,Kent County,180786,973764,10,Kent County,Delaware,143160.0,0.0,...,0,0,1,0,0,0,0,0,1,0.185657
10001,2020-01-26,0,Delaware,Kent County,180786,973764,10,Kent County,Delaware,143160.0,0.0,...,0,0,1,0,0,0,0,0,1,0.185657
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56045,2020-03-30,0,Wyoming,Weston County,6927,578759,56,Weston County,Wyoming,,,...,0,0,1,0,0,0,0,0,1,0.011969
56045,2020-03-31,0,Wyoming,Weston County,6927,578759,56,Weston County,Wyoming,,,...,0,0,1,0,0,0,0,0,1,0.011969
56045,2020-04-01,0,Wyoming,Weston County,6927,578759,56,Weston County,Wyoming,,,...,0,0,1,0,0,0,0,0,1,0.011969
56045,2020-04-02,0,Wyoming,Weston County,6927,578759,56,Weston County,Wyoming,,,...,0,0,1,0,0,0,0,0,1,0.011969


In [111]:
# select vars
panel_subset = panel[
    ['confirmed_cases', 'pop2019_county', 'county_pop_share_2019', 'NIPR', 
     'nipr_moe', 'NUI', 'nui_moe', 'NIC', 'nic_moe', 'PCTUI', 'pctui_moe',
     'PCTIC', 'pctic_moe', 'PCTELIG', 'pctelig_moe', 'PCTLIIC',
     'pctliic_moe', 'sexcat_0', 'sexcat_1', 'sexcat_2', 'iprcat_0',
     'iprcat_1', 'iprcat_2', 'iprcat_3', 'iprcat_4', 'iprcat_5', 'agecat_0',
     'agecat_1', 'agecat_2', 'agecat_3', 'agecat_4', 'agecat_5', 'racecat_0']]

### 4) Run pooled and panel regression 
entity fixed effects don't work. this makes sense as the population
shares per county add up to 100% per county.
time fixed effects on the other hand yield the same results as 
the simple pooled regression above, which makes sense because there
is no variation over time in our current data. looking good!

A) Merged with SAHIE data (significantly less data!)

In [135]:
exog_vars = \
    ['pop2019_county', # County population
     'county_pop_share_2019', # Share of county population relative to state population
     'NIPR', # Number in demographic group for <income category>
     'NUI', # Number uninsured
     'PCTELIG'] # Percent uninsured in demographic group for all income levels
exog = sm.add_constant(panel_subset[exog_vars])

# pooled regression
mod_pooled = PooledOLS(dependent=panel_subset.confirmed_cases, 
                       exog=exog)
pooled_res = mod_pooled.fit()
print(pooled_res)

# panel regression
mod_panel_entity = PanelOLS(dependent=panel_subset.confirmed_cases, 
                            exog=exog,
                            time_effects=True)
panel_entity_res = mod_panel_entity.fit()
print(panel_entity_res)

Inputs contain missing values. Dropping rows with missing observations.


                          PooledOLS Estimation Summary                          
Dep. Variable:        confirmed_cases   R-squared:                        0.1067
Estimator:                  PooledOLS   R-squared (Between):              0.8861
No. Observations:               15695   R-squared (Within):               0.0000
Date:                Mon, Apr 06 2020   R-squared (Overall):              0.1067
Time:                        22:44:23   Log-likelihood                 -8.56e+04
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      374.82
Entities:                         215   P-value                           0.0000
Avg Obs:                       73.000   Distribution:                 F(5,15689)
Min Obs:                       73.000                                           
Max Obs:                       73.000   F-statistic (robust):             374.82
                            

A) Population data only

In [136]:
exog_vars = \
    ['pop2019_county', # County population
     'county_pop_share_2019'] # Percent uninsured in demographic group for all income levels
exog = sm.add_constant(panel_subset[exog_vars])

# pooled regression
mod_pooled = PooledOLS(dependent=panel_subset.confirmed_cases, 
                       exog=exog)
pooled_res = mod_pooled.fit()
print(pooled_res)

# panel regression
mod_panel_entity = PanelOLS(dependent=panel_subset.confirmed_cases, 
                            exog=exog,
                            time_effects=True)
panel_entity_res = mod_panel_entity.fit()
print(panel_entity_res)

                          PooledOLS Estimation Summary                          
Dep. Variable:        confirmed_cases   R-squared:                        0.0127
Estimator:                  PooledOLS   R-squared (Between):              0.0806
No. Observations:              115559   R-squared (Within):               0.0000
Date:                Mon, Apr 06 2020   R-squared (Overall):              0.0127
Time:                        22:45:35   Log-likelihood                -8.501e+05
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      743.41
Entities:                        1583   P-value                           0.0000
Avg Obs:                       73.000   Distribution:                F(2,115556)
Min Obs:                       73.000                                           
Max Obs:                       73.000   F-statistic (robust):             743.41
                            