In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np

In [2]:
pd.set_option('display.max_columns', None)

## Load data

In [3]:
sahie_2018 = pd.read_csv("../raw_data/sahie_2018.csv", header=68)

  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
sahie_2017 = pd.read_csv("../raw_data/sahie_2017.csv", header=68)

In [5]:
sahie_2016 = pd.read_csv("../raw_data/sahie_2016.csv", header=68)

In [6]:
sahie_2015 = pd.read_csv("../raw_data/sahie_2015.csv", header=68)

In [7]:
sahie_2014 = pd.read_csv("../raw_data/sahie_2014.csv", header=68)

## Common methods

In [8]:
def filter_nc(df):
    df = df.loc[df["statefips"] == 37].reset_index(drop=True)
    return df

In [9]:
def drop_unnec(df, ver=False):
    if ver == True:
        df.drop(["version", "Unnamed: 25"], axis=1, inplace=True)
    else:
        df.drop(["Unnamed: 25"], axis=1, inplace=True)
    return df

In [10]:
def fix_data_types(df):
    df = df.infer_objects()
    df = df.astype({"statefips": str, "countyfips": str})
    df["countyfips"] = df["countyfips"].apply(lambda x: x.zfill(3))
    df["GEOID20"] = df["statefips"] + df["countyfips"]
    return df

## 2018 SAHIE NC data

Source for all SAHIE data: https://www.census.gov/data/datasets/time-series/demo/sahie/estimates-acs.html 

In [11]:
sahie_2018.head()

Unnamed: 0,year,version,statefips,countyfips,geocat,agecat,racecat,sexcat,iprcat,NIPR,nipr_moe,NUI,nui_moe,NIC,nic_moe,PCTUI,pctui_moe,PCTIC,pctic_moe,PCTELIG,pctelig_moe,PCTLIIC,pctliic_moe,state_name,county_name,Unnamed: 25
0,2018,,1,0,40,0,0,0,0,3955117,0,470052,13365,3485065,13365,11.9,0.3,88.1,0.3,11.9,0.3,88.1,0.3,Alabama ...,,
1,2018,,1,0,40,0,0,0,1,1460808,14401,286457,9710,1174351,14558,19.6,0.6,80.4,0.6,7.2,0.2,29.7,0.4,Alabama ...,,
2,2018,,1,0,40,0,0,0,2,1805111,14730,334174,10549,1470937,15318,18.5,0.6,81.5,0.6,8.4,0.3,37.2,0.4,Alabama ...,,
3,2018,,1,0,40,0,0,0,3,989540,13194,203801,8046,785739,12787,20.6,0.8,79.4,0.8,5.2,0.2,19.9,0.3,Alabama ...,,
4,2018,,1,0,40,0,0,0,4,2679733,14962,415673,12137,2264060,16839,15.5,0.4,84.5,0.4,10.5,0.3,57.2,0.4,Alabama ...,,


In [12]:
sahie_2018.columns

Index(['year', 'version', 'statefips', 'countyfips', 'geocat', 'agecat',
       'racecat', 'sexcat', 'iprcat', 'NIPR', 'nipr_moe', 'NUI', 'nui_moe',
       'NIC', 'nic_moe', 'PCTUI', 'pctui_moe', 'PCTIC', 'pctic_moe', 'PCTELIG',
       'pctelig_moe', 'PCTLIIC', 'pctliic_moe', 'state_name', 'county_name',
       'Unnamed: 25'],
      dtype='object')

In [13]:
sahie_2018 = filter_nc(sahie_2018)

In [14]:
sahie_2018["version"].unique()

array(['        '], dtype=object)

In [15]:
sahie_2018["Unnamed: 25"].unique()

array([nan])

In [16]:
sahie_2018 = drop_unnec(sahie_2018, True)

In [17]:
sahie_2018 = fix_data_types(sahie_2018)

In [18]:
sahie_2018.dtypes

year             int64
statefips       object
countyfips      object
geocat           int64
agecat           int64
racecat          int64
sexcat           int64
iprcat           int64
NIPR             int64
nipr_moe         int64
NUI              int64
nui_moe          int64
NIC              int64
nic_moe          int64
PCTUI          float64
pctui_moe      float64
PCTIC          float64
pctic_moe      float64
PCTELIG        float64
pctelig_moe    float64
PCTLIIC        float64
pctliic_moe    float64
state_name      object
county_name     object
GEOID20         object
dtype: object

In [19]:
sahie_2018.head()

Unnamed: 0,year,statefips,countyfips,geocat,agecat,racecat,sexcat,iprcat,NIPR,nipr_moe,NUI,nui_moe,NIC,nic_moe,PCTUI,pctui_moe,PCTIC,pctic_moe,PCTELIG,pctelig_moe,PCTLIIC,pctliic_moe,state_name,county_name,GEOID20
0,2018,37,0,40,0,0,0,0,8450311,0,1076670,21788,7373641,21788,12.7,0.3,87.3,0.3,12.7,0.3,87.3,0.3,North Carolina ...,,37000
1,2018,37,0,40,0,0,0,1,2902508,23848,610840,15236,2291668,23237,21.0,0.5,79.0,0.5,7.2,0.2,27.1,0.3,North Carolina ...,,37000
2,2018,37,0,40,0,0,0,2,3642271,24549,731164,16732,2911107,24536,20.1,0.4,79.9,0.4,8.7,0.2,34.4,0.3,North Carolina ...,,37000
3,2018,37,0,40,0,0,0,3,1886381,21259,406729,12291,1479652,20002,21.6,0.6,78.4,0.6,4.8,0.1,17.5,0.2,North Carolina ...,,37000
4,2018,37,0,40,0,0,0,4,5491779,25654,935302,19516,4556477,27446,17.0,0.3,83.0,0.3,11.1,0.2,53.9,0.3,North Carolina ...,,37000


## 2017 SAHIE NC data

In [20]:
sahie_2017.head()

Unnamed: 0,year,version,statefips,countyfips,geocat,agecat,racecat,sexcat,iprcat,NIPR,nipr_moe,NUI,nui_moe,NIC,nic_moe,PCTUI,pctui_moe,PCTIC,pctic_moe,PCTELIG,pctelig_moe,PCTLIIC,pctliic_moe,state_name,county_name,Unnamed: 25
0,2017,,1,0,40,0,0,0,0,3966117,0,438049,12783,3528068,12783,11.0,0.3,89.0,0.3,11.0,0.3,89.0,0.3,Alabama ...,,
1,2017,,1,0,40,0,0,0,1,1487986,14535,272778,9386,1215208,14688,18.3,0.6,81.7,0.6,6.9,0.2,30.6,0.4,Alabama ...,,
2,2017,,1,0,40,0,0,0,2,1836277,14820,316919,10266,1519358,15377,17.3,0.5,82.7,0.5,8.0,0.3,38.3,0.4,Alabama ...,,
3,2017,,1,0,40,0,0,0,3,1014380,13304,196526,7874,817854,13037,19.4,0.7,80.6,0.7,5.0,0.2,20.6,0.3,Alabama ...,,
4,2017,,1,0,40,0,0,0,4,2701984,15038,390599,11708,2311385,16820,14.5,0.4,85.5,0.4,9.8,0.3,58.3,0.4,Alabama ...,,


In [21]:
sahie_2017 = filter_nc(sahie_2017)

In [22]:
sahie_2017["version"].unique()

array(['        '], dtype=object)

In [23]:
sahie_2017 = drop_unnec(sahie_2017, True)

In [24]:
sahie_2017 = fix_data_types(sahie_2017)

In [25]:
sahie_2017.dtypes

year             int64
statefips       object
countyfips      object
geocat           int64
agecat           int64
racecat          int64
sexcat           int64
iprcat           int64
NIPR             int64
nipr_moe         int64
NUI              int64
nui_moe          int64
NIC              int64
nic_moe          int64
PCTUI          float64
pctui_moe      float64
PCTIC          float64
pctic_moe      float64
PCTELIG        float64
pctelig_moe    float64
PCTLIIC        float64
pctliic_moe    float64
state_name      object
county_name     object
GEOID20         object
dtype: object

In [26]:
sahie_2017.head()

Unnamed: 0,year,statefips,countyfips,geocat,agecat,racecat,sexcat,iprcat,NIPR,nipr_moe,NUI,nui_moe,NIC,nic_moe,PCTUI,pctui_moe,PCTIC,pctic_moe,PCTELIG,pctelig_moe,PCTLIIC,pctliic_moe,state_name,county_name,GEOID20
0,2017,37,0,40,0,0,0,0,8409430,0,1063335,21516,7346095,21516,12.6,0.3,87.4,0.3,12.6,0.3,87.4,0.3,North Carolina ...,,37000
1,2017,37,0,40,0,0,0,1,2948384,24072,632103,15643,2316281,23396,21.4,0.5,78.6,0.5,7.5,0.2,27.5,0.3,North Carolina ...,,37000
2,2017,37,0,40,0,0,0,2,3696054,24682,748935,17197,2947119,24819,20.3,0.4,79.7,0.4,8.9,0.2,35.0,0.3,North Carolina ...,,37000
3,2017,37,0,40,0,0,0,3,1952192,21387,435483,12765,1516709,20268,22.3,0.6,77.7,0.6,5.2,0.2,18.0,0.2,North Carolina ...,,37000
4,2017,37,0,40,0,0,0,4,5510194,25293,937605,19723,4572589,27312,17.0,0.3,83.0,0.3,11.1,0.2,54.4,0.3,North Carolina ...,,37000


## 2016 SAHIE data

In [27]:
sahie_2016.head()

Unnamed: 0,year,version,statefips,countyfips,geocat,agecat,racecat,sexcat,iprcat,NIPR,nipr_moe,NUI,nui_moe,NIC,nic_moe,PCTUI,pctui_moe,PCTIC,pctic_moe,PCTELIG,pctelig_moe,PCTLIIC,pctliic_moe,state_name,county_name,Unnamed: 25
0,2016,,1,0,40,0,0,0,0,3973078,0,427972,12298,3545106,12298,10.8,0.3,89.2,0.3,10.8,0.3,89.2,0.3,Alabama ...,,
1,2016,,1,0,40,0,0,0,1,1514292,13706,274697,9339,1239595,14056,18.1,0.6,81.9,0.6,6.9,0.2,31.2,0.4,Alabama ...,,
2,2016,,1,0,40,0,0,0,2,1876603,13894,319119,10099,1557484,14769,17.0,0.5,83.0,0.5,8.0,0.3,39.2,0.4,Alabama ...,,
3,2016,,1,0,40,0,0,0,3,1048013,12789,201733,7856,846280,12602,19.2,0.7,80.8,0.7,5.1,0.2,21.3,0.3,Alabama ...,,
4,2016,,1,0,40,0,0,0,4,2726753,13798,386721,11378,2340032,15930,14.2,0.4,85.8,0.4,9.7,0.3,58.9,0.4,Alabama ...,,


In [28]:
sahie_2016 = filter_nc(sahie_2016)

In [29]:
sahie_2016["version"].unique()

array(['        '], dtype=object)

In [30]:
sahie_2016 = drop_unnec(sahie_2016, True)

In [31]:
sahie_2016 = fix_data_types(sahie_2016)

In [32]:
sahie_2016.dtypes

year             int64
statefips       object
countyfips      object
geocat           int64
agecat           int64
racecat          int64
sexcat           int64
iprcat           int64
NIPR             int64
nipr_moe         int64
NUI              int64
nui_moe          int64
NIC              int64
nic_moe          int64
PCTUI          float64
pctui_moe      float64
PCTIC          float64
pctic_moe      float64
PCTELIG        float64
pctelig_moe    float64
PCTLIIC        float64
pctliic_moe    float64
state_name      object
county_name     object
GEOID20         object
dtype: object

In [33]:
sahie_2016.head()

Unnamed: 0,year,statefips,countyfips,geocat,agecat,racecat,sexcat,iprcat,NIPR,nipr_moe,NUI,nui_moe,NIC,nic_moe,PCTUI,pctui_moe,PCTIC,pctic_moe,PCTELIG,pctelig_moe,PCTLIIC,pctliic_moe,state_name,county_name,GEOID20
0,2016,37,0,40,0,0,0,0,8355457,0,1023107,20306,7332350,20306,12.2,0.2,87.8,0.2,12.2,0.2,87.8,0.2,North Carolina ...,,37000
1,2016,37,0,40,0,0,0,1,3033199,22440,627323,15104,2405876,22540,20.7,0.5,79.3,0.5,7.5,0.2,28.8,0.3,North Carolina ...,,37000
2,2016,37,0,40,0,0,0,2,3801574,22647,742427,16418,3059147,23612,19.5,0.4,80.5,0.4,8.9,0.2,36.6,0.3,North Carolina ...,,37000
3,2016,37,0,40,0,0,0,3,2049082,20568,443135,12509,1605947,19919,21.6,0.6,78.4,0.6,5.3,0.1,19.2,0.2,North Carolina ...,,37000
4,2016,37,0,40,0,0,0,4,5557876,22887,913434,18611,4644442,25702,16.4,0.3,83.6,0.3,10.9,0.2,55.6,0.3,North Carolina ...,,37000


## 2015 SAHIE data

In [34]:
sahie_2015.head()

Unnamed: 0,year,version,statefips,countyfips,geocat,agecat,racecat,sexcat,iprcat,NIPR,nipr_moe,NUI,nui_moe,NIC,nic_moe,PCTUI,pctui_moe,PCTIC,pctic_moe,PCTELIG,pctelig_moe,PCTLIIC,pctliic_moe,state_name,county_name,Unnamed: 25
0,2015,,1,0,40,0,0,0,0,3994181,0,475233,12979,3518948,12979,11.9,0.3,88.1,0.3,11.9,0.3,88.1,0.3,Alabama ...,,
1,2015,,1,0,40,0,0,0,1,1588535,13145,315278,10117,1273257,14077,19.8,0.6,80.2,0.6,7.9,0.3,31.9,0.4,Alabama ...,,
2,2015,,1,0,40,0,0,0,2,1948390,13248,362561,10890,1585829,14760,18.6,0.5,81.4,0.5,9.1,0.3,39.7,0.4,Alabama ...,,
3,2015,,1,0,40,0,0,0,3,1104165,12338,233672,8493,870493,12529,21.2,0.7,78.8,0.7,5.9,0.2,21.8,0.3,Alabama ...,,
4,2015,,1,0,40,0,0,0,4,2798291,12978,432968,12111,2365323,15807,15.5,0.4,84.5,0.4,10.8,0.3,59.2,0.4,Alabama ...,,


In [35]:
sahie_2015 = filter_nc(sahie_2015)

In [36]:
sahie_2015["version"].unique()

array(['        '], dtype=object)

In [37]:
sahie_2015 = drop_unnec(sahie_2015, True)

In [38]:
sahie_2015 = fix_data_types(sahie_2015)

In [39]:
sahie_2015.dtypes

year             int64
statefips       object
countyfips      object
geocat           int64
agecat           int64
racecat          int64
sexcat           int64
iprcat           int64
NIPR             int64
nipr_moe         int64
NUI              int64
nui_moe          int64
NIC              int64
nic_moe          int64
PCTUI          float64
pctui_moe      float64
PCTIC          float64
pctic_moe      float64
PCTELIG        float64
pctelig_moe    float64
PCTLIIC        float64
pctliic_moe    float64
state_name      object
county_name     object
GEOID20         object
dtype: object

In [40]:
sahie_2015.head()

Unnamed: 0,year,statefips,countyfips,geocat,agecat,racecat,sexcat,iprcat,NIPR,nipr_moe,NUI,nui_moe,NIC,nic_moe,PCTUI,pctui_moe,PCTIC,pctic_moe,PCTELIG,pctelig_moe,PCTLIIC,pctliic_moe,state_name,county_name,GEOID20
0,2015,37,0,40,0,0,0,0,8315067,0,1080102,20666,7234965,20666,13.0,0.2,87.0,0.2,13.0,0.2,87.0,0.2,North Carolina ...,,37000
1,2015,37,0,40,0,0,0,1,3159844,21468,692745,15899,2467099,22141,21.9,0.5,78.1,0.5,8.3,0.2,29.7,0.3,North Carolina ...,,37000
2,2015,37,0,40,0,0,0,2,3916279,21599,808388,17196,3107891,23189,20.6,0.4,79.4,0.4,9.7,0.2,37.4,0.3,North Carolina ...,,37000
3,2015,37,0,40,0,0,0,3,2143465,19650,496886,13209,1646579,19382,23.2,0.6,76.8,0.6,6.0,0.2,19.8,0.2,North Carolina ...,,37000
4,2015,37,0,40,0,0,0,4,5662919,21222,975211,19209,4687708,24859,17.2,0.3,82.8,0.3,11.7,0.2,56.4,0.3,North Carolina ...,,37000


## 2014 SAHIE data

In [41]:
sahie_2014.head()

Unnamed: 0,year,version,statefips,countyfips,geocat,agecat,racecat,sexcat,iprcat,NIPR,nipr_moe,NUI,nui_moe,NIC,nic_moe,PCTUI,pctui_moe,PCTIC,pctic_moe,PCTELIG,pctelig_moe,PCTLIIC,pctliic_moe,state_name,county_name,Unnamed: 25
0,2014,,1,0,40,0,0,0,0,4006946,0,567439,13761,3439507,13761,14.2,0.3,85.8,0.3,14.2,0.3,85.8,0.3,Alabama ...,,
1,2014,,1,0,40,0,0,0,1,1641542,12760,380759,10716,1260783,13943,23.2,0.6,76.8,0.6,9.5,0.3,31.5,0.3,Alabama ...,,
2,2014,,1,0,40,0,0,0,2,2008272,12812,436502,11506,1571770,14619,21.7,0.6,78.3,0.6,10.9,0.3,39.2,0.4,Alabama ...,,
3,2014,,1,0,40,0,0,0,3,1144885,12023,280059,9026,864826,12368,24.5,0.7,75.5,0.7,7.0,0.2,21.6,0.3,Alabama ...,,
4,2014,,1,0,40,0,0,0,4,2852930,12136,518676,12805,2334254,15625,18.2,0.4,81.8,0.4,12.9,0.3,58.3,0.4,Alabama ...,,


In [42]:
sahie_2014 = filter_nc(sahie_2014)

In [43]:
sahie_2014["version"].unique()

array(['        '], dtype=object)

In [44]:
sahie_2014 = drop_unnec(sahie_2014, True)
sahie_2014 = fix_data_types(sahie_2014)

In [45]:
sahie_2014.dtypes

year             int64
statefips       object
countyfips      object
geocat           int64
agecat           int64
racecat          int64
sexcat           int64
iprcat           int64
NIPR             int64
nipr_moe         int64
NUI              int64
nui_moe          int64
NIC              int64
nic_moe          int64
PCTUI          float64
pctui_moe      float64
PCTIC          float64
pctic_moe      float64
PCTELIG        float64
pctelig_moe    float64
PCTLIIC        float64
pctliic_moe    float64
state_name      object
county_name     object
GEOID20         object
dtype: object

In [46]:
sahie_2014

Unnamed: 0,year,statefips,countyfips,geocat,agecat,racecat,sexcat,iprcat,NIPR,nipr_moe,NUI,nui_moe,NIC,nic_moe,PCTUI,pctui_moe,PCTIC,pctic_moe,PCTELIG,pctelig_moe,PCTLIIC,pctliic_moe,state_name,county_name,GEOID20
0,2014,37,000,40,0,0,0,0,8269110,0,1254138,21758,7014972,21758,15.2,0.3,84.8,0.3,15.2,0.3,84.8,0.3,North Carolina ...,,37000
1,2014,37,000,40,0,0,0,1,3247517,20648,810758,16688,2436759,21498,25.0,0.5,75.0,0.5,9.8,0.2,29.5,0.3,North Carolina ...,,37000
2,2014,37,000,40,0,0,0,2,4004606,20766,943257,18015,3061349,22648,23.6,0.4,76.4,0.4,11.4,0.2,37.0,0.3,North Carolina ...,,37000
3,2014,37,000,40,0,0,0,3,2219241,19006,577619,13734,1641622,18892,26.0,0.6,74.0,0.6,7.0,0.2,19.9,0.2,North Carolina ...,,37000
4,2014,37,000,40,0,0,0,4,5738731,19786,1136153,20151,4602578,24399,19.8,0.3,80.2,0.3,13.7,0.2,55.7,0.3,North Carolina ...,,37000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9961,2014,37,199,50,5,0,2,1,2031,110,590,102,1441,123,29.0,4.7,71.0,4.7,12.3,2.1,30.2,2.6,North Carolina ...,Yancey County,37199
9962,2014,37,199,50,5,0,2,2,2552,112,674,113,1878,134,26.4,4.2,73.6,4.2,14.1,2.4,39.3,2.8,North Carolina ...,Yancey County,37199
9963,2014,37,199,50,5,0,2,3,1392,105,446,82,946,101,32.0,5.3,68.0,5.3,9.3,1.7,19.8,2.1,North Carolina ...,Yancey County,37199
9964,2014,37,199,50,5,0,2,4,3725,95,783,125,2942,146,21.0,3.3,79.0,3.3,16.4,2.6,61.6,3.1,North Carolina ...,Yancey County,37199


In [49]:
frames = [sahie_2014, sahie_2015, sahie_2016, sahie_2017, sahie_2018]
sahie = pd.concat(frames)

In [50]:
sahie

Unnamed: 0,year,statefips,countyfips,geocat,agecat,racecat,sexcat,iprcat,NIPR,nipr_moe,NUI,nui_moe,NIC,nic_moe,PCTUI,pctui_moe,PCTIC,pctic_moe,PCTELIG,pctelig_moe,PCTLIIC,pctliic_moe,state_name,county_name,GEOID20
0,2014,37,000,40,0,0,0,0,8269110,0,1254138,21758,7014972,21758,15.2,0.3,84.8,0.3,15.2,0.3,84.8,0.3,North Carolina ...,,37000
1,2014,37,000,40,0,0,0,1,3247517,20648,810758,16688,2436759,21498,25.0,0.5,75.0,0.5,9.8,0.2,29.5,0.3,North Carolina ...,,37000
2,2014,37,000,40,0,0,0,2,4004606,20766,943257,18015,3061349,22648,23.6,0.4,76.4,0.4,11.4,0.2,37.0,0.3,North Carolina ...,,37000
3,2014,37,000,40,0,0,0,3,2219241,19006,577619,13734,1641622,18892,26.0,0.6,74.0,0.6,7.0,0.2,19.9,0.2,North Carolina ...,,37000
4,2014,37,000,40,0,0,0,4,5738731,19786,1136153,20151,4602578,24399,19.8,0.3,80.2,0.3,13.7,0.2,55.7,0.3,North Carolina ...,,37000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9961,2018,37,199,50,5,0,2,1,1944,110,497,106,1447,127,25.6,5.2,74.4,5.2,10.4,2.2,30.3,2.7,North Carolina ...,Yancey County,37199
9962,2018,37,199,50,5,0,2,2,2378,112,567,118,1811,138,23.8,4.8,76.2,4.8,11.9,2.5,37.9,2.9,North Carolina ...,Yancey County,37199
9963,2018,37,199,50,5,0,2,3,1282,103,357,80,925,101,27.8,5.7,72.2,5.7,7.5,1.7,19.4,2.1,North Carolina ...,Yancey County,37199
9964,2018,37,199,50,5,0,2,4,3558,106,694,134,2864,157,19.5,3.7,80.5,3.7,14.5,2.8,59.9,3.3,North Carolina ...,Yancey County,37199


In [51]:
# sahie.to_csv("../data/health_insurance.csv")