# Census and CSSE Preprocessing

The goal of this notebook is to obtain and organize the following county-level data:

- nominal data: state, county, fips
- census data: 
    - total population
    - ethnic population(s)
    - voting statistics
    - median income
    - educational attainment
- geographic data (from GeoJSON): 
    - census area
    - latitude/longitude

The statistics gathered in this notebook will only need to be updated once the 2020 Census information is released to the public.

In [1]:
# standard EDA
import numpy as np
import pandas as pd

# processing geodata
import geopandas as gp
import pickle                           # saving to pickle instead of csv
from scipy import sparse
from shapely.geometry import asShape, Polygon

# opening external coordinates
import json

# opening urls
from urllib.request import urlopen

pd.options.display.max_rows = 150
pd.options.display.max_columns = 150

# 1. import census data from `census.gov`

2019 population estimates can be collected from [census.gov](https://www.census.gov/data/datasets/time-series/demo/popest/2010s-counties-total.html). For the most current estimates, we will only save data from `YEAR == 12` and `AGEGRP == 0` ([data dictionary](https://www2.census.gov/programs-surveys/popest/technical-documentation/file-layouts/2010-2019/cc-est2019-alldata.pdf)).

In [65]:
raw_eth_cols = ['TOT', 'NHWA', 'NHBA', 'NHIA', 'NHAA', 'NHNA', 'NHTOM', 'H']
sex_cols = ['_MALE', '_FEMALE']
es_cols = [e+s for e in raw_eth_cols for s in sex_cols]

pop_cols = ['STATE', 'COUNTY', 'STNAME', 'CTYNAME', 'YEAR', 'AGEGRP', 'TOT_POP']\
           + es_cols

pop_df = pd.read_csv(
    '../data/external/cc-est2019-alldata.csv',
    encoding='latin-1',        # to avoid unicode error
    usecols=pop_cols,          # it's a big file, only import certain columns
    dtype={'STATE':'str',      # these are FIPS codes
           'COUNTY':'str'},
)

# mask for 2019 estimates (12)
pop_df = pop_df.loc[(pop_df['YEAR'] == 12)]    
pop_df = pop_df.drop(columns=['YEAR'])

# rename columns to better-match nytimes data (and personal preference)
pop_df = pop_df.rename(
    columns={
        'STATE':'state_fips',
        'COUNTY':'county_fips',
        'STNAME':'state',
        'CTYNAME':'county',
    }
)

# not sure if we need this level of granularity but we can keep it for now
eth_cols = ['tot_pop_white', 'tot_pop_black', 'tot_pop_native', 'tot_pop_asian', 
            'tot_pop_pacific', 'tot_pop_twoplus', 'tot_pop_hispanic']
es_cols_2 = [(e+s).lower() for e in eth_cols for s in sex_cols]
pop_df = pop_df.rename(
    columns=dict(zip(es_cols[2:], es_cols_2))
)

pop_df.columns = pop_df.columns.str.lower()

# nytimes fips is 5-digit combo of state and county fips
pop_df['fips'] = pop_df['state_fips'] + pop_df['county_fips']
pop_df = pop_df.drop(columns=['county_fips'])

pop_df = pop_df.reset_index(drop=True)

pop_df.head()

Unnamed: 0,state_fips,state,county,agegrp,tot_pop,tot_male,tot_female,tot_pop_white_male,tot_pop_white_female,tot_pop_black_male,tot_pop_black_female,tot_pop_native_male,tot_pop_native_female,tot_pop_asian_male,tot_pop_asian_female,tot_pop_pacific_male,tot_pop_pacific_female,tot_pop_twoplus_male,tot_pop_twoplus_female,tot_pop_hispanic_male,tot_pop_hispanic_female,fips
0,1,Alabama,Autauga County,0,55869,27092,28777,20138,21077,5171,5927,105,138,282,364,20,20,492,464,884,787,1001
1,1,Alabama,Autauga County,1,3277,1713,1564,1180,1072,334,340,3,6,23,19,2,3,85,64,86,60,1001
2,1,Alabama,Autauga County,2,3465,1787,1678,1210,1134,388,359,7,8,16,25,0,1,78,81,88,70,1001
3,1,Alabama,Autauga County,3,3851,1977,1874,1362,1285,435,409,3,9,17,24,0,3,66,65,94,79,1001
4,1,Alabama,Autauga County,4,3659,1854,1805,1291,1272,429,397,4,0,21,13,3,3,43,46,63,74,1001


In [66]:
pop_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59698 entries, 0 to 59697
Data columns (total 22 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   state_fips               59698 non-null  object
 1   state                    59698 non-null  object
 2   county                   59698 non-null  object
 3   agegrp                   59698 non-null  int64 
 4   tot_pop                  59698 non-null  int64 
 5   tot_male                 59698 non-null  int64 
 6   tot_female               59698 non-null  int64 
 7   tot_pop_white_male       59698 non-null  int64 
 8   tot_pop_white_female     59698 non-null  int64 
 9   tot_pop_black_male       59698 non-null  int64 
 10  tot_pop_black_female     59698 non-null  int64 
 11  tot_pop_native_male      59698 non-null  int64 
 12  tot_pop_native_female    59698 non-null  int64 
 13  tot_pop_asian_male       59698 non-null  int64 
 14  tot_pop_asian_female     59698 non-nul

In [67]:
# remove descriptive terms from county names
# we'll use this again so it's nice to have a function
def remove_county_terms(s):
    county_terms = ['County', 'Parish', 'Municipality']
    for term in county_terms:
        s = s.str.replace(' ' + term, '')
    return s

pop_df[['county']] = pop_df[['county']].apply(remove_county_terms)

# personally like ordinal columns listed first
pop_cols = pop_df.select_dtypes(exclude='number').columns.tolist()\
           + pop_df.select_dtypes(include='number').columns.tolist()
pop_df = pop_df[pop_cols]
pop_df.head()

Unnamed: 0,state_fips,state,county,fips,agegrp,tot_pop,tot_male,tot_female,tot_pop_white_male,tot_pop_white_female,tot_pop_black_male,tot_pop_black_female,tot_pop_native_male,tot_pop_native_female,tot_pop_asian_male,tot_pop_asian_female,tot_pop_pacific_male,tot_pop_pacific_female,tot_pop_twoplus_male,tot_pop_twoplus_female,tot_pop_hispanic_male,tot_pop_hispanic_female
0,1,Alabama,Autauga,1001,0,55869,27092,28777,20138,21077,5171,5927,105,138,282,364,20,20,492,464,884,787
1,1,Alabama,Autauga,1001,1,3277,1713,1564,1180,1072,334,340,3,6,23,19,2,3,85,64,86,60
2,1,Alabama,Autauga,1001,2,3465,1787,1678,1210,1134,388,359,7,8,16,25,0,1,78,81,88,70
3,1,Alabama,Autauga,1001,3,3851,1977,1874,1362,1285,435,409,3,9,17,24,0,3,66,65,94,79
4,1,Alabama,Autauga,1001,4,3659,1854,1805,1291,1272,429,397,4,0,21,13,3,3,43,46,63,74


In [68]:
# check to see if we have all ethnic groups covered
(pop_df.iloc[:, 8:].sum(axis=1) / pop_df['tot_pop']).describe()

count    59690.0
mean         1.0
std          0.0
min          1.0
25%          1.0
50%          1.0
75%          1.0
max          1.0
dtype: float64

In [69]:
# add sex-aggregated columns
eth_cols_3 = [e.lower() for e in eth_cols]

for e in eth_cols_3:
    pop_df.loc[:, e] =\
    pop_df.loc[:, e+'_male'] + pop_df.loc[:, e+'_female']
    
pop_df.columns

Index(['state_fips', 'state', 'county', 'fips', 'agegrp', 'tot_pop',
       'tot_male', 'tot_female', 'tot_pop_white_male', 'tot_pop_white_female',
       'tot_pop_black_male', 'tot_pop_black_female', 'tot_pop_native_male',
       'tot_pop_native_female', 'tot_pop_asian_male', 'tot_pop_asian_female',
       'tot_pop_pacific_male', 'tot_pop_pacific_female',
       'tot_pop_twoplus_male', 'tot_pop_twoplus_female',
       'tot_pop_hispanic_male', 'tot_pop_hispanic_female', 'tot_pop_white',
       'tot_pop_black', 'tot_pop_native', 'tot_pop_asian', 'tot_pop_pacific',
       'tot_pop_twoplus', 'tot_pop_hispanic'],
      dtype='object')

### calculating age coefficient and adding percentages

In [70]:
# engineer an 'age' column from the age group bins
def age_coefficient(df):
    
    grouped = df[df['agegrp']!=0].groupby(by='fips')
    cols = df.select_dtypes(include='number').columns.tolist()
    cols.remove('agegrp')
    age_cols = ['age_' + c[4:] for c in cols]
    
    def _age(g):
        return (g['agegrp'] * g.loc[:, cols].T).sum(axis=1)\
               / g.loc[:, cols].sum()
    
    adf = pd.DataFrame(grouped.apply(_age).values, columns=age_cols)
    df = df[df['agegrp']==0].drop(columns='agegrp').reset_index(drop=True)
    
    return pd.concat([df, adf], axis=1)

In [71]:
pop_df = pop_df.pipe(age_coefficient)

FUTURE WORK: impute numbers using neighbors

In [72]:
pop_df.tail()

Unnamed: 0,state_fips,state,county,fips,tot_pop,tot_male,tot_female,tot_pop_white_male,tot_pop_white_female,tot_pop_black_male,tot_pop_black_female,tot_pop_native_male,tot_pop_native_female,tot_pop_asian_male,tot_pop_asian_female,tot_pop_pacific_male,tot_pop_pacific_female,tot_pop_twoplus_male,tot_pop_twoplus_female,tot_pop_hispanic_male,tot_pop_hispanic_female,tot_pop_white,tot_pop_black,tot_pop_native,tot_pop_asian,tot_pop_pacific,tot_pop_twoplus,tot_pop_hispanic,age_pop,age_male,age_female,age_pop_white_male,age_pop_white_female,age_pop_black_male,age_pop_black_female,age_pop_native_male,age_pop_native_female,age_pop_asian_male,age_pop_asian_female,age_pop_pacific_male,age_pop_pacific_female,age_pop_twoplus_male,age_pop_twoplus_female,age_pop_hispanic_male,age_pop_hispanic_female,age_pop_white,age_pop_black,age_pop_native,age_pop_asian,age_pop_pacific,age_pop_twoplus,age_pop_hispanic
3137,56,Wyoming,Sweetwater,56037,42343,21808,20535,17223,16338,298,204,177,158,183,227,26,29,350,358,3551,3221,33561,502,335,410,55,708,6772,7.942493,7.886051,8.002435,8.187366,8.316012,7.614094,7.990196,7.960452,8.310127,6.431694,8.026432,7.038462,7.689655,6.022857,5.368715,6.708533,6.6914,8.249993,7.766932,8.125373,7.314634,7.381818,5.69209,6.700384
3138,56,Wyoming,Teton,56039,23464,12142,11322,9832,9168,87,58,33,41,135,243,11,7,160,135,1884,1670,19000,145,74,378,18,295,3554,8.657688,8.632103,8.685126,9.090419,9.172339,9.16092,8.896552,9.424242,8.878049,7.711111,7.452675,10.090909,9.0,6.05625,6.362963,6.478238,6.364072,9.129947,9.055172,9.121622,7.544974,9.666667,6.19661,6.424592
3139,56,Wyoming,Uinta,56041,20226,10224,10002,8935,8722,64,62,69,75,36,56,11,13,182,130,927,944,17657,126,144,92,24,312,1871,8.025512,7.988165,8.063687,8.168103,8.313575,7.375,6.016129,8.014493,8.853333,7.0,8.607143,4.545455,7.076923,7.192308,5.892308,6.529666,6.106992,8.239961,6.706349,8.451389,7.978261,5.916667,6.650641,6.316408
3140,56,Wyoming,Washakie,56043,7805,3963,3842,3266,3151,23,15,19,34,22,33,0,0,68,66,565,543,6417,38,53,55,0,134,1108,9.065343,8.91673,9.218636,9.261788,9.638527,9.913043,8.466667,9.0,9.764706,8.409091,9.484848,,,7.029412,7.318182,7.125664,6.983425,9.446782,9.342105,9.490566,9.054545,,7.171642,7.055957
3141,56,Wyoming,Weston,56045,6927,3624,3303,3273,2963,30,15,62,45,30,83,1,0,73,67,155,130,6236,45,107,113,1,140,285,9.173668,9.06319,9.294883,9.175985,9.507256,8.433333,8.333333,8.145161,8.222222,11.666667,9.626506,9.0,,6.616438,5.791045,7.819355,6.530769,9.333387,8.4,8.17757,10.168142,9.0,6.221429,7.231579


In [19]:
def save_df(df, filename, csv_=False, pickle_=True, path='../data/processed/'):
  if csv_ + pickle_ == 0:
    csv_ = True
  if csv_:
    df.to_csv(f'{path}{filename}.csv', index=False)
  if pickle_:
    with open(f'{path}{filename}.p', 'wb') as file:
      pickle.dump(df, file, protocol=pickle.HIGHEST_PROTOCOL)

In [74]:
pop_df = pop_df.sort_values(by='fips')
save_df(pop_df, 'pop_df')

# 2. add census region labels

In [75]:
with urlopen('https://raw.githubusercontent.com/cphalpert/census-regions/master/us%20census%20bureau%20regions%20and%20divisions.csv') as response:
    region_df = pd.read_csv(
        response
    )
region_df.columns = region_df.columns.str.lower()
region_df.head()

Unnamed: 0,state,state code,region,division
0,Alaska,AK,West,Pacific
1,Alabama,AL,South,East South Central
2,Arkansas,AR,South,West South Central
3,Arizona,AZ,West,Mountain
4,California,CA,West,Pacific


## merge with `pop_df` to begin building `info_df`

In [76]:
info_df = pop_df.merge(region_df[['state', 'region', 'division']], on='state')
info_df.head()

Unnamed: 0,state_fips,state,county,fips,tot_pop,tot_male,tot_female,tot_pop_white_male,tot_pop_white_female,tot_pop_black_male,tot_pop_black_female,tot_pop_native_male,tot_pop_native_female,tot_pop_asian_male,tot_pop_asian_female,tot_pop_pacific_male,tot_pop_pacific_female,tot_pop_twoplus_male,tot_pop_twoplus_female,tot_pop_hispanic_male,tot_pop_hispanic_female,tot_pop_white,tot_pop_black,tot_pop_native,tot_pop_asian,tot_pop_pacific,tot_pop_twoplus,tot_pop_hispanic,age_pop,age_male,age_female,age_pop_white_male,age_pop_white_female,age_pop_black_male,age_pop_black_female,age_pop_native_male,age_pop_native_female,age_pop_asian_male,age_pop_asian_female,age_pop_pacific_male,age_pop_pacific_female,age_pop_twoplus_male,age_pop_twoplus_female,age_pop_hispanic_male,age_pop_hispanic_female,age_pop_white,age_pop_black,age_pop_native,age_pop_asian,age_pop_pacific,age_pop_twoplus,age_pop_hispanic,region,division
0,1,Alabama,Autauga,1001,55869,27092,28777,20138,21077,5171,5927,105,138,282,364,20,20,492,464,884,787,41215,11098,243,646,40,956,1671,8.422041,8.181973,8.648052,8.518373,8.928216,7.480758,8.104269,9.733333,9.826087,7.276596,8.381868,8.4,5.85,5.195122,5.519397,6.382353,7.072427,8.727963,7.81375,9.786008,7.899381,7.125,5.35251,6.707361,South,East South Central
1,1,Alabama,Baldwin,1003,223234,108247,114987,89845,95902,9308,9907,753,754,911,1435,53,70,1832,1930,5545,4989,185747,19215,1507,2346,123,3762,10534,8.987202,8.806627,9.157192,9.150993,9.481721,7.587129,8.095791,9.14741,8.851459,7.246981,7.887108,8.150943,8.628571,5.757642,5.864767,6.497565,6.719182,9.321749,7.849388,8.999336,7.638534,8.422764,5.8126,6.602525,South,East South Central
2,1,Alabama,Barbour,1005,24686,13064,11622,5894,5341,6260,5547,52,43,55,61,21,10,153,132,629,488,11235,11807,95,116,31,285,1117,8.784412,8.463564,9.14507,9.475568,10.196592,7.767732,8.491437,9.346154,10.465116,8.109091,10.065574,6.285714,7.7,6.418301,5.810606,6.434022,5.766393,9.818336,8.107733,9.852632,9.137931,6.741935,6.136842,6.142346,South,East South Central
3,1,Alabama,Bibb,1007,22394,11929,10465,8482,8181,2912,1807,50,41,21,25,5,1,116,130,343,280,16663,4719,91,46,6,246,623,8.606145,8.349484,8.89871,8.645838,9.105122,7.706387,8.455451,8.52,8.219512,7.52381,9.8,6.4,3.0,6.422414,6.3,7.186589,6.975,8.871332,7.993219,8.384615,8.76087,5.833333,6.357724,7.091493,South,East South Central
4,1,Alabama,Blount,1009,57826,28472,29354,24494,25682,453,419,143,139,73,90,14,7,345,385,2950,2632,50176,872,282,163,21,730,5582,8.651714,8.49136,8.807249,8.771209,9.131843,8.030905,7.947494,10.125874,9.446043,8.30137,9.066667,8.142857,10.571429,6.486957,6.535065,6.4,6.06193,8.955796,7.990826,9.79078,8.723926,8.952381,6.512329,6.240595,South,East South Central


# 2. import geojson for boundaries and census areas

In [77]:
# https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.html

geo_df = gp.read_file('../data/external/cb_2018_us_county_20m/cb_2018_us_county_20m.shp')
geo_df = geo_df.sort_values(by='GEOID').reset_index(drop=True)
geo_df['ALAND'] = geo_df['ALAND'] / 1e6     # convert m^2 to km^2
geo_df = geo_df[['STATEFP', 'GEOID', 'ALAND', 'geometry']]
geo_df.rename(columns={
    'STATEFP': 'state_fips', 
    'GEOID': 'fips', 
    'ALAND': 'area'
}, inplace=True)
geo_df.head()

Unnamed: 0,state_fips,fips,area,geometry
0,1,1001,1539.602123,"POLYGON ((-86.91759 32.66417, -86.71339 32.661..."
1,1,1003,4117.546676,"POLYGON ((-88.02632 30.75336, -87.94455 30.827..."
2,1,1005,2292.144655,"POLYGON ((-85.73573 31.62449, -85.66565 31.786..."
3,1,1007,1612.167481,"POLYGON ((-87.42194 33.00338, -87.31854 33.006..."
4,1,1009,1670.103911,"POLYGON ((-86.96336 33.85822, -86.92439 33.909..."


## find neighbors (for clustering later)

In [78]:
# https://gis.stackexchange.com/a/281676

def county_neighbors(g):
    
    indices = g['fips'].tolist()
    neighbor_matrix = []
    
    for i, row in g.iterrows():
        neighbors = g[g['geometry'].intersects(row['geometry'])]['fips'].tolist()
        neighbors.remove(row['fips'])
        neighbor_matrix.append(neighbors)
    
    g['neighbors'] = neighbor_matrix
    return g

geo_df = geo_df.groupby(by='state_fips').apply(county_neighbors)
geo_df.head()

Unnamed: 0,state_fips,fips,area,geometry,neighbors
0,1,1001,1539.602123,"POLYGON ((-86.91759 32.66417, -86.71339 32.661...","[01021, 01047, 01051, 01085, 01101]"
1,1,1003,4117.546676,"POLYGON ((-88.02632 30.75336, -87.94455 30.827...","[01025, 01053, 01097, 01099, 01129]"
2,1,1005,2292.144655,"POLYGON ((-85.73573 31.62449, -85.66565 31.786...","[01011, 01045, 01067, 01109, 01113]"
3,1,1007,1612.167481,"POLYGON ((-87.42194 33.00338, -87.31854 33.006...","[01021, 01065, 01073, 01105, 01117, 01125]"
4,1,1009,1670.103911,"POLYGON ((-86.96336 33.85822, -86.92439 33.909...","[01043, 01055, 01073, 01095, 01115, 01127]"


In [79]:
# def centroid(df):
#     centroids = df['geometry'].centroid
#     return [c.coords[0] for c in centroids]

# geo_df['lon'], geo_df['lat'] = zip(*geo_df.pipe(centroid))
# geo_df.head()

In [80]:
save_df(geo_df, 'geo_df')

In [81]:
info_df = info_df.merge(geo_df[['fips', 'area', 'neighbors']], on='fips')
info_df.head()

Unnamed: 0,state_fips,state,county,fips,tot_pop,tot_male,tot_female,tot_pop_white_male,tot_pop_white_female,tot_pop_black_male,tot_pop_black_female,tot_pop_native_male,tot_pop_native_female,tot_pop_asian_male,tot_pop_asian_female,tot_pop_pacific_male,tot_pop_pacific_female,tot_pop_twoplus_male,tot_pop_twoplus_female,tot_pop_hispanic_male,tot_pop_hispanic_female,tot_pop_white,tot_pop_black,tot_pop_native,tot_pop_asian,tot_pop_pacific,tot_pop_twoplus,tot_pop_hispanic,age_pop,age_male,age_female,age_pop_white_male,age_pop_white_female,age_pop_black_male,age_pop_black_female,age_pop_native_male,age_pop_native_female,age_pop_asian_male,age_pop_asian_female,age_pop_pacific_male,age_pop_pacific_female,age_pop_twoplus_male,age_pop_twoplus_female,age_pop_hispanic_male,age_pop_hispanic_female,age_pop_white,age_pop_black,age_pop_native,age_pop_asian,age_pop_pacific,age_pop_twoplus,age_pop_hispanic,region,division,area,neighbors
0,1,Alabama,Autauga,1001,55869,27092,28777,20138,21077,5171,5927,105,138,282,364,20,20,492,464,884,787,41215,11098,243,646,40,956,1671,8.422041,8.181973,8.648052,8.518373,8.928216,7.480758,8.104269,9.733333,9.826087,7.276596,8.381868,8.4,5.85,5.195122,5.519397,6.382353,7.072427,8.727963,7.81375,9.786008,7.899381,7.125,5.35251,6.707361,South,East South Central,1539.602123,"[01021, 01047, 01051, 01085, 01101]"
1,1,Alabama,Baldwin,1003,223234,108247,114987,89845,95902,9308,9907,753,754,911,1435,53,70,1832,1930,5545,4989,185747,19215,1507,2346,123,3762,10534,8.987202,8.806627,9.157192,9.150993,9.481721,7.587129,8.095791,9.14741,8.851459,7.246981,7.887108,8.150943,8.628571,5.757642,5.864767,6.497565,6.719182,9.321749,7.849388,8.999336,7.638534,8.422764,5.8126,6.602525,South,East South Central,4117.546676,"[01025, 01053, 01097, 01099, 01129]"
2,1,Alabama,Barbour,1005,24686,13064,11622,5894,5341,6260,5547,52,43,55,61,21,10,153,132,629,488,11235,11807,95,116,31,285,1117,8.784412,8.463564,9.14507,9.475568,10.196592,7.767732,8.491437,9.346154,10.465116,8.109091,10.065574,6.285714,7.7,6.418301,5.810606,6.434022,5.766393,9.818336,8.107733,9.852632,9.137931,6.741935,6.136842,6.142346,South,East South Central,2292.144655,"[01011, 01045, 01067, 01109, 01113]"
3,1,Alabama,Bibb,1007,22394,11929,10465,8482,8181,2912,1807,50,41,21,25,5,1,116,130,343,280,16663,4719,91,46,6,246,623,8.606145,8.349484,8.89871,8.645838,9.105122,7.706387,8.455451,8.52,8.219512,7.52381,9.8,6.4,3.0,6.422414,6.3,7.186589,6.975,8.871332,7.993219,8.384615,8.76087,5.833333,6.357724,7.091493,South,East South Central,1612.167481,"[01021, 01065, 01073, 01105, 01117, 01125]"
4,1,Alabama,Blount,1009,57826,28472,29354,24494,25682,453,419,143,139,73,90,14,7,345,385,2950,2632,50176,872,282,163,21,730,5582,8.651714,8.49136,8.807249,8.771209,9.131843,8.030905,7.947494,10.125874,9.446043,8.30137,9.066667,8.142857,10.571429,6.486957,6.535065,6.4,6.06193,8.955796,7.990826,9.79078,8.723926,8.952381,6.512329,6.240595,South,East South Central,1670.103911,"[01043, 01055, 01073, 01095, 01115, 01127]"


We will merge latitude/longitude coordinates with `info_df` when we process CSSE data.

# 3. add 2016 general election data

Mask compliance has been very political, so it would be interesting to see how political differences vary by county. Data taken from [github.com/tonmcg](https://github.com/tonmcg). Alaska data taken from [RRH Elections](https://rrhelections.com/index.php/2018/02/02/alaska-results-by-county-equivalent-1960-2016/).

In [82]:
with urlopen('https://raw.githubusercontent.com/tonmcg/US_County_Level_Election_Results_08-16/master/2016_US_County_Level_Presidential_Results.csv') as response:
    elect_df = pd.read_csv(
        response,
        encoding='latin-1',        # to avoid unicode error
        dtype={
            'votes_dem':'int',
            'votes_gop':'int',
            'total_votes':'int',
            'combined_fips':'str'},
        index_col=0
    )

elect_df.rename(
    columns={
        'county_name':'county',
        'combined_fips':'fips',
        'votes_dem':'tot_dem',
        'votes_gop':'tot_gop',
        'total_votes':'tot_votes'
    }, inplace=True
)

elect_df[['county']] = elect_df[['county']].apply(remove_county_terms)

# https://stackoverflow.com/a/23836353
elect_df['fips'] = elect_df['fips'].apply('{0:0>5}'.format) 

elect_cols = ['state_abbr', 'county', 'fips', 'tot_dem', 'tot_gop', 'tot_votes']
elect_df = elect_df[elect_cols]
elect_df = elect_df.sort_values(by='fips')
elect_df.head()

Unnamed: 0,state_abbr,county,fips,tot_dem,tot_gop,tot_votes
29,AL,Autauga,1001,5908,18110,24661
30,AL,Baldwin,1003,18409,72780,94090
31,AL,Barbour,1005,4848,5431,10390
32,AL,Bibb,1007,1874,6733,8748
33,AL,Blount,1009,2150,22808,25384


## add alaska elections data

Data taken from [RRH Elections](https://rrhelections.com/index.php/2018/02/02/alaska-results-by-county-equivalent-1960-2016/).

In [83]:
ak_elect_df = pd.read_excel('../data/external/2016 AK Gen Official.xlsx', sheet_name='By CE')
ak_elect_df = ak_elect_df.iloc[0:29, 0:12]
ak_elect_df.rename(
    columns={
        'Trump, Donald J. ':'tot_gop',
        'Clinton, Hillary ':'tot_dem'
    }, inplace=True
)
ak_elect_df = ak_elect_df[['ED/Muni', 'tot_gop', 'tot_dem', 'ED Total']].sort_values(by='ED/Muni')
ak_elect_df[['tot_gop', 'tot_dem', 'ED Total']] = ak_elect_df[['tot_gop', 'tot_dem', 'ED Total']].astype(int)
ak_elect_df = ak_elect_df.sort_values(by='ED/Muni')
ak_elect_df.head()

Unnamed: 0,ED/Muni,tot_gop,tot_dem,ED Total
22,Aleutians East,198,121,369
24,Aleutians West,260,493,846
19,Anchorage,39942,32130,81678
12,Bethel,809,2178,3933
25,Bristol Bay,180,99,316


In [84]:
# just checking lengths
print(len(ak_elect_df))
print(len(elect_df[elect_df['state_abbr'] == 'AK']))

29
29


In [85]:
elect_df.loc[
    elect_df['state_abbr'] == 'AK', ['tot_gop', 'tot_dem', 'tot_votes']
] = ak_elect_df[['tot_gop', 'tot_dem', 'ED Total']].values
elect_df.tail()

Unnamed: 0,state_abbr,county,fips,tot_dem,tot_gop,tot_votes
3136,WY,Sweetwater,56037,3233,12153,16661
3137,WY,Teton,56039,7313,3920,12176
3138,WY,Uinta,56041,1202,6154,8053
3139,WY,Washakie,56043,532,2911,3715
3140,WY,Weston,56045,294,2898,3334


In [86]:
elect_df.loc[:, 'per_gop'] = elect_df.loc[:, 'tot_gop']\
                             / (elect_df.loc[:, 'tot_gop']
                                + elect_df.loc[:, 'tot_dem'])
elect_df.head()

Unnamed: 0,state_abbr,county,fips,tot_dem,tot_gop,tot_votes,per_gop
29,AL,Autauga,1001,5908,18110,24661,0.754018
30,AL,Baldwin,1003,18409,72780,94090,0.798123
31,AL,Barbour,1005,4848,5431,10390,0.528359
32,AL,Bibb,1007,1874,6733,8748,0.78227
33,AL,Blount,1009,2150,22808,25384,0.913855


In [87]:
save_df(elect_df, 'elect_df')

In [88]:
info_df = info_df.merge(elect_df.loc[:, 'fips':'per_gop'], on='fips', how='left')
info_df.tail()

Unnamed: 0,state_fips,state,county,fips,tot_pop,tot_male,tot_female,tot_pop_white_male,tot_pop_white_female,tot_pop_black_male,tot_pop_black_female,tot_pop_native_male,tot_pop_native_female,tot_pop_asian_male,tot_pop_asian_female,tot_pop_pacific_male,tot_pop_pacific_female,tot_pop_twoplus_male,tot_pop_twoplus_female,tot_pop_hispanic_male,tot_pop_hispanic_female,tot_pop_white,tot_pop_black,tot_pop_native,tot_pop_asian,tot_pop_pacific,tot_pop_twoplus,tot_pop_hispanic,age_pop,age_male,age_female,age_pop_white_male,age_pop_white_female,age_pop_black_male,age_pop_black_female,age_pop_native_male,age_pop_native_female,age_pop_asian_male,age_pop_asian_female,age_pop_pacific_male,age_pop_pacific_female,age_pop_twoplus_male,age_pop_twoplus_female,age_pop_hispanic_male,age_pop_hispanic_female,age_pop_white,age_pop_black,age_pop_native,age_pop_asian,age_pop_pacific,age_pop_twoplus,age_pop_hispanic,region,division,area,neighbors,tot_dem,tot_gop,tot_votes,per_gop
3137,56,Wyoming,Sweetwater,56037,42343,21808,20535,17223,16338,298,204,177,158,183,227,26,29,350,358,3551,3221,33561,502,335,410,55,708,6772,7.942493,7.886051,8.002435,8.187366,8.316012,7.614094,7.990196,7.960452,8.310127,6.431694,8.026432,7.038462,7.689655,6.022857,5.368715,6.708533,6.6914,8.249993,7.766932,8.125373,7.314634,7.381818,5.69209,6.700384,West,Mountain,27005.754244,"[56007, 56013, 56023, 56035, 56041]",3233.0,12153.0,16661.0,0.789874
3138,56,Wyoming,Teton,56039,23464,12142,11322,9832,9168,87,58,33,41,135,243,11,7,160,135,1884,1670,19000,145,74,378,18,295,3554,8.657688,8.632103,8.685126,9.090419,9.172339,9.16092,8.896552,9.424242,8.878049,7.711111,7.452675,10.090909,9.0,6.05625,6.362963,6.478238,6.364072,9.129947,9.055172,9.121622,7.544974,9.666667,6.19661,6.424592,West,Mountain,10351.784301,"[56013, 56023, 56029, 56035]",7313.0,3920.0,12176.0,0.348972
3139,56,Wyoming,Uinta,56041,20226,10224,10002,8935,8722,64,62,69,75,36,56,11,13,182,130,927,944,17657,126,144,92,24,312,1871,8.025512,7.988165,8.063687,8.168103,8.313575,7.375,6.016129,8.014493,8.853333,7.0,8.607143,4.545455,7.076923,7.192308,5.892308,6.529666,6.106992,8.239961,6.706349,8.451389,7.978261,5.916667,6.650641,6.316408,West,Mountain,5391.631764,"[56023, 56037]",1202.0,6154.0,8053.0,0.836596
3140,56,Wyoming,Washakie,56043,7805,3963,3842,3266,3151,23,15,19,34,22,33,0,0,68,66,565,543,6417,38,53,55,0,134,1108,9.065343,8.91673,9.218636,9.261788,9.638527,9.913043,8.466667,9.0,9.764706,8.409091,9.484848,,,7.029412,7.318182,7.125664,6.983425,9.446782,9.342105,9.490566,9.054545,,7.171642,7.055957,West,Mountain,5798.138762,"[56003, 56013, 56017, 56019, 56025, 56029]",532.0,2911.0,3715.0,0.845484
3141,56,Wyoming,Weston,56045,6927,3624,3303,3273,2963,30,15,62,45,30,83,1,0,73,67,155,130,6236,45,107,113,1,140,285,9.173668,9.06319,9.294883,9.175985,9.507256,8.433333,8.333333,8.145161,8.222222,11.666667,9.626506,9.0,,6.616438,5.791045,7.819355,6.530769,9.333387,8.4,8.17757,10.168142,9.0,6.221429,7.231579,West,Mountain,6210.804116,"[56005, 56009, 56011, 56027]",294.0,2898.0,3334.0,0.907895


# 4. add income data

Median income statistics taken from [data.census.gov](https://data.census.gov/cedsci/table?q=s1901&tid=ACSST1Y2018.S1901) (2017 ACS 1-Year Estimates). 

- `S1903_C03_001E` -- all households
- `S1903_C03_003E` -- black
- `S1903_C03_004E` -- native
- `S1903_C03_005E` -- asian
- `S1903_C03_006E` -- pacific
- `S1903_C03_007E` -- other
- `S1903_C03_008E` -- two or more
- `S1903_C03_009E` -- hispanic
- `S1903_C03_010E` -- white only, not hispanic

In [89]:
inc_cols = [f'S1903_C03_{i:03d}E' for i in range(1,11) if i != 2]
inc_col_names = ['median_income'] + [f'median_income_{race}'
                                     for race in ['black', 'native', 'asian', 'pacific', 'other', 'twoplus', 'hispanic', 'white']]
inc_dict = dict(zip(inc_cols, inc_col_names))
inc_dict.update({'GEO_ID':'fips'})

# can't use dtype 'int' here because of entries like `250000+` and `-`
inc_df = pd.read_csv(
    '../data/external/ACSST5Y2018.S1903/ACSST5Y2018.S1903_data_with_overlays.csv',
    usecols=['GEO_ID', 'NAME'] + inc_cols,
)
inc_df = inc_df.drop(0, axis=0)
inc_df = inc_df.rename(columns=inc_dict)
# inc_df['median_income'] = inc_df['median_income'].astype(float)

# joplin and kansas city
inc_df.at[inc_df['fips'] == '1600000US2937592', 'fips'] = '29JOP'
inc_df.at[inc_df['fips'] == '1600000US2938000', 'fips'] = '29KAN'
inc_df['fips'] = inc_df['fips'].str[-5:]

inc_df['county'], inc_df['state'] = zip(*inc_df['NAME'].str.split(', ').tolist())
inc_df.at[inc_df['fips'] == '29JOP', 'county'] = 'Joplin'
inc_df.at[inc_df['fips'] == '29KAN', 'county'] = 'Kansas City'
inc_df = inc_df.drop('NAME', axis=1)
inc_df['county'] = inc_df[['county']].apply(remove_county_terms)

# rio arriba taken from datausa.io
inc_df.at[inc_df['fips'] == '35039', 'median_income'] = 33_422

inc_df = inc_df.replace({'-': np.nan, '2,500-':2500, '250,000+':250000})

# can't do int because of nan
inc_df[inc_col_names] = inc_df[inc_col_names].astype(float)

inc_df.tail()

Unnamed: 0,fips,median_income,median_income_black,median_income_native,median_income_asian,median_income_pacific,median_income_other,median_income_twoplus,median_income_hispanic,median_income_white,county,state
3218,72149,19855.0,25714.0,,,,19535.0,17871.0,19807.0,,Villalba Municipio,Puerto Rico
3219,72151,16013.0,14852.0,,,,29063.0,19213.0,15992.0,,Yabucoa Municipio,Puerto Rico
3220,72153,14954.0,13986.0,,,,12204.0,12650.0,14927.0,,Yauco Municipio,Puerto Rico
3221,29JOP,42782.0,32500.0,39663.0,,,,41033.0,47208.0,43473.0,Joplin,Missouri
3222,29KAN,52405.0,33899.0,48929.0,49367.0,33563.0,31768.0,50538.0,44003.0,65637.0,Kansas City,Missouri


In [90]:
inc_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3222 entries, 1 to 3222
Data columns (total 12 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   fips                    3222 non-null   object 
 1   median_income           3222 non-null   float64
 2   median_income_black     2019 non-null   float64
 3   median_income_native    1423 non-null   float64
 4   median_income_asian     1405 non-null   float64
 5   median_income_pacific   281 non-null    float64
 6   median_income_other     1689 non-null   float64
 7   median_income_twoplus   2190 non-null   float64
 8   median_income_hispanic  2555 non-null   float64
 9   median_income_white     3161 non-null   float64
 10  county                  3222 non-null   object 
 11  state                   3222 non-null   object 
dtypes: float64(9), object(3)
memory usage: 327.2+ KB


In [91]:
# income_df = pd.read_csv('../data/income_df.csv')

In [92]:
inc_cols = ['state', 'county', 'fips']\
           + inc_df.select_dtypes(include='number').columns.tolist()
inc_df = inc_df[inc_cols]
inc_df.tail()

Unnamed: 0,state,county,fips,median_income,median_income_black,median_income_native,median_income_asian,median_income_pacific,median_income_other,median_income_twoplus,median_income_hispanic,median_income_white
3218,Puerto Rico,Villalba Municipio,72149,19855.0,25714.0,,,,19535.0,17871.0,19807.0,
3219,Puerto Rico,Yabucoa Municipio,72151,16013.0,14852.0,,,,29063.0,19213.0,15992.0,
3220,Puerto Rico,Yauco Municipio,72153,14954.0,13986.0,,,,12204.0,12650.0,14927.0,
3221,Missouri,Joplin,29JOP,42782.0,32500.0,39663.0,,,,41033.0,47208.0,43473.0
3222,Missouri,Kansas City,29KAN,52405.0,33899.0,48929.0,49367.0,33563.0,31768.0,50538.0,44003.0,65637.0


In [93]:
save_df(inc_df, 'inc_df')

In [94]:
info_df = info_df.merge(inc_df.loc[:, 'fips':], on='fips', how='left')
info_df.tail()

Unnamed: 0,state_fips,state,county,fips,tot_pop,tot_male,tot_female,tot_pop_white_male,tot_pop_white_female,tot_pop_black_male,tot_pop_black_female,tot_pop_native_male,tot_pop_native_female,tot_pop_asian_male,tot_pop_asian_female,tot_pop_pacific_male,tot_pop_pacific_female,tot_pop_twoplus_male,tot_pop_twoplus_female,tot_pop_hispanic_male,tot_pop_hispanic_female,tot_pop_white,tot_pop_black,tot_pop_native,tot_pop_asian,tot_pop_pacific,tot_pop_twoplus,tot_pop_hispanic,age_pop,age_male,age_female,age_pop_white_male,age_pop_white_female,age_pop_black_male,age_pop_black_female,age_pop_native_male,age_pop_native_female,age_pop_asian_male,age_pop_asian_female,age_pop_pacific_male,age_pop_pacific_female,age_pop_twoplus_male,age_pop_twoplus_female,age_pop_hispanic_male,age_pop_hispanic_female,age_pop_white,age_pop_black,age_pop_native,age_pop_asian,age_pop_pacific,age_pop_twoplus,age_pop_hispanic,region,division,area,neighbors,tot_dem,tot_gop,tot_votes,per_gop,median_income,median_income_black,median_income_native,median_income_asian,median_income_pacific,median_income_other,median_income_twoplus,median_income_hispanic,median_income_white
3137,56,Wyoming,Sweetwater,56037,42343,21808,20535,17223,16338,298,204,177,158,183,227,26,29,350,358,3551,3221,33561,502,335,410,55,708,6772,7.942493,7.886051,8.002435,8.187366,8.316012,7.614094,7.990196,7.960452,8.310127,6.431694,8.026432,7.038462,7.689655,6.022857,5.368715,6.708533,6.6914,8.249993,7.766932,8.125373,7.314634,7.381818,5.69209,6.700384,West,Mountain,27005.754244,"[56007, 56013, 56023, 56035, 56041]",3233.0,12153.0,16661.0,0.789874,73008.0,,62188.0,72614.0,138053.0,74189.0,55284.0,61921.0,76469.0
3138,56,Wyoming,Teton,56039,23464,12142,11322,9832,9168,87,58,33,41,135,243,11,7,160,135,1884,1670,19000,145,74,378,18,295,3554,8.657688,8.632103,8.685126,9.090419,9.172339,9.16092,8.896552,9.424242,8.878049,7.711111,7.452675,10.090909,9.0,6.05625,6.362963,6.478238,6.364072,9.129947,9.055172,9.121622,7.544974,9.666667,6.19661,6.424592,West,Mountain,10351.784301,"[56013, 56023, 56029, 56035]",7313.0,3920.0,12176.0,0.348972,83831.0,,,98125.0,,36433.0,17188.0,45361.0,95222.0
3139,56,Wyoming,Uinta,56041,20226,10224,10002,8935,8722,64,62,69,75,36,56,11,13,182,130,927,944,17657,126,144,92,24,312,1871,8.025512,7.988165,8.063687,8.168103,8.313575,7.375,6.016129,8.014493,8.853333,7.0,8.607143,4.545455,7.076923,7.192308,5.892308,6.529666,6.106992,8.239961,6.706349,8.451389,7.978261,5.916667,6.650641,6.316408,West,Mountain,5391.631764,"[56023, 56037]",1202.0,6154.0,8053.0,0.836596,58235.0,,68125.0,,,55701.0,39205.0,39816.0,61330.0
3140,56,Wyoming,Washakie,56043,7805,3963,3842,3266,3151,23,15,19,34,22,33,0,0,68,66,565,543,6417,38,53,55,0,134,1108,9.065343,8.91673,9.218636,9.261788,9.638527,9.913043,8.466667,9.0,9.764706,8.409091,9.484848,,,7.029412,7.318182,7.125664,6.983425,9.446782,9.342105,9.490566,9.054545,,7.171642,7.055957,West,Mountain,5798.138762,"[56003, 56013, 56017, 56019, 56025, 56029]",532.0,2911.0,3715.0,0.845484,53426.0,,,,,62054.0,36118.0,50035.0,54815.0
3141,56,Wyoming,Weston,56045,6927,3624,3303,3273,2963,30,15,62,45,30,83,1,0,73,67,155,130,6236,45,107,113,1,140,285,9.173668,9.06319,9.294883,9.175985,9.507256,8.433333,8.333333,8.145161,8.222222,11.666667,9.626506,9.0,,6.616438,5.791045,7.819355,6.530769,9.333387,8.4,8.17757,10.168142,9.0,6.221429,7.231579,West,Mountain,6210.804116,"[56005, 56009, 56011, 56027]",294.0,2898.0,3334.0,0.907895,52867.0,,,,,,,37870.0,55032.0


# 5. add educational attainment data

Educational attainment statistics taken from [data.census.gov](https://data.census.gov/cedsci/table?tid=ACSST1Y2018.S1501&g=0400000US04) (2017 ACS 5-Year Estimates).

- `S1501_C01_006E` -- population > 25yo
- `S1501_C01_007E` -- less than 9th grade
- `S1501_C01_008E` -- some high school
- `S1501_C01_009E` -- high school or GED
- `S1501_C01_010E` -- some college
- `S1501_C01_011E` -- associate's
- `S1501_C01_012E` -- bachelor's
- `S1501_C01_013E` -- graduate or professional

In addition, there are ethnic / sex breakdowns:
- general patterns:
    - `CO1_XXXE` -- ethnic total
    - `CO3_XXXE` -- ethnic male
    - `CO5_XXXE` -- ethnic female
- `S1501_C01_031E` -- white alone
- `S1501_C01_032E` -- white alone, high school graduate or higher
- `S1501_C01_033E` -- white alone, bachelor's degree or higher
- etc.

In [95]:
# general educational attainment columns
edu_cols = [f'S1501_C01_{i:03d}E' for i in range(6,14)]
edu_col_names = ['pop_25p', 'no_hs', 'some_hs', 'hs', 'some_college', 
                 'associates', 'bachelors', 'graduate']
edu_dict = dict(zip(edu_cols, edu_col_names))
edu_dict.update({'GEO_ID':'fips'})

# education/ethnicity/sex columns
edu_eth_sex_cols = [f'S1501_C{i:02d}_{j:03d}E' for i in range(1,6,2) for j in range(31,55)]
edu_eth_sex_col_names = [f'tot_edu_{race}{sex}{edu}' 
                         for sex in ['', '_male', '_female']
                         for race in ['white', 'black', 'native', 'asian', 'pacific', 'other', 'twoplus', 'hispanic']
                         for edu in ['', '_hsplus', '_4yplus']]
edu_eth_sex_dict = dict(zip(edu_eth_sex_cols, edu_eth_sex_col_names))
edu_dict.update(edu_eth_sex_dict)

edu_df = pd.read_csv('../data/external/ACSST5Y2018.S1501/ACSST5Y2018.S1501_data_with_overlays.csv',
                     usecols=['GEO_ID', 'NAME']+edu_cols+edu_eth_sex_cols)
edu_df = edu_df.drop(0, axis=0)
for col in (edu_cols+edu_eth_sex_cols):
    edu_df[col] = edu_df[col].astype(int)
edu_df.rename(
    columns=edu_dict,
    inplace=True
)

# joplin and kansas city
edu_df.at[edu_df['fips'] == '1600000US2937592', 'fips'] = '29JOP'
edu_df.at[edu_df['fips'] == '1600000US2938000', 'fips'] = '29KAN'
edu_df['fips'] = edu_df['fips'].str[-5:]

edu_df['county'], edu_df['state'] = zip(*edu_df['NAME'].str.split(', ').tolist())
edu_df.at[edu_df['fips'] == '29JOP', 'county'] = 'Joplin'
edu_df.at[edu_df['fips'] == '29KAN', 'county'] = 'Kansas City'
edu_df = edu_df.drop('NAME', axis=1)
edu_df['county'] = edu_df[['county']].apply(remove_county_terms)

edu_df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,fips,tot_edu_white,tot_edu_white_male,tot_edu_white_female,tot_edu_white_hsplus,tot_edu_white_male_hsplus,tot_edu_white_female_hsplus,tot_edu_white_4yplus,tot_edu_white_male_4yplus,tot_edu_white_female_4yplus,tot_edu_black,tot_edu_black_male,tot_edu_black_female,tot_edu_black_hsplus,tot_edu_black_male_hsplus,tot_edu_black_female_hsplus,tot_edu_black_4yplus,tot_edu_black_male_4yplus,tot_edu_black_female_4yplus,tot_edu_native,tot_edu_native_male,tot_edu_native_female,tot_edu_native_hsplus,tot_edu_native_male_hsplus,tot_edu_native_female_hsplus,tot_edu_native_4yplus,tot_edu_native_male_4yplus,tot_edu_native_female_4yplus,tot_edu_asian,tot_edu_asian_male,tot_edu_asian_female,tot_edu_asian_hsplus,tot_edu_asian_male_hsplus,tot_edu_asian_female_hsplus,tot_edu_asian_4yplus,tot_edu_asian_male_4yplus,tot_edu_asian_female_4yplus,tot_edu_pacific,tot_edu_pacific_male,tot_edu_pacific_female,tot_edu_pacific_hsplus,tot_edu_pacific_male_hsplus,tot_edu_pacific_female_hsplus,tot_edu_pacific_4yplus,tot_edu_pacific_male_4yplus,tot_edu_pacific_female_4yplus,tot_edu_other,tot_edu_other_male,tot_edu_other_female,tot_edu_other_hsplus,tot_edu_other_male_hsplus,tot_edu_other_female_hsplus,tot_edu_other_4yplus,tot_edu_other_male_4yplus,tot_edu_other_female_4yplus,tot_edu_twoplus,tot_edu_twoplus_male,tot_edu_twoplus_female,tot_edu_twoplus_hsplus,tot_edu_twoplus_male_hsplus,tot_edu_twoplus_female_hsplus,tot_edu_twoplus_4yplus,tot_edu_twoplus_male_4yplus,tot_edu_twoplus_female_4yplus,tot_edu_hispanic,tot_edu_hispanic_male,tot_edu_hispanic_female,tot_edu_hispanic_hsplus,tot_edu_hispanic_male_hsplus,tot_edu_hispanic_female_hsplus,tot_edu_hispanic_4yplus,tot_edu_hispanic_male_4yplus,tot_edu_hispanic_female_4yplus,pop_25p,no_hs,some_hs,hs,some_college,associates,bachelors,graduate,county,state
1,1001,28726,13834,14892,26130,12588,13542,8440,4573,3867,6786,3042,3744,5459,2436,3023,1296,573,723,78,39,39,61,25,36,25,25,0,317,95,222,278,62,216,118,43,75,32,5,27,32,5,27,0,0,0,262,93,169,197,67,130,0,0,0,380,135,245,269,73,196,92,0,92,939,455,484,821,380,441,346,230,116,37166,956,3248,12119,7554,2998,5903,4388,Autauga,Alabama
2,1003,126316,60310,66006,116288,54788,61500,41648,19863,21785,12006,5593,6413,9565,4129,5436,2164,808,1356,1015,523,492,790,410,380,145,81,64,1180,426,754,980,271,709,243,147,96,9,0,9,9,0,9,0,0,0,938,469,469,695,394,301,262,119,143,1712,853,859,1529,744,785,559,199,360,5119,2749,2370,3823,1813,2010,1389,637,752,146989,3978,10332,40579,32266,13759,30431,15644,Baldwin,Alabama
3,1005,9171,4846,4325,7264,3657,3607,1578,814,764,8137,4304,3833,5551,2776,2775,552,240,312,72,72,0,42,42,0,0,0,0,88,40,48,72,27,45,5,0,5,1,0,1,0,0,0,0,0,0,345,230,115,100,76,24,44,44,0,183,80,103,153,50,103,9,0,9,573,395,178,251,192,59,76,60,16,18173,1490,3411,6486,3287,1279,1417,803,Barbour,Alabama
4,1007,12002,6037,5965,10483,5181,5302,1570,674,896,3316,2146,1170,2296,1377,919,200,83,117,8,8,0,8,8,0,0,0,0,37,16,21,37,16,21,37,16,21,0,0,0,0,0,0,0,0,0,9,9,0,9,9,0,0,0,0,149,108,41,108,89,19,6,6,0,313,171,142,206,95,111,0,0,0,15780,903,1747,7471,2938,908,1197,616,Bibb,Alabama
5,1009,35774,17200,18574,29814,14167,15647,4775,1900,2875,596,281,315,411,192,219,22,10,12,132,22,110,94,14,80,13,13,0,124,43,81,104,43,61,62,25,37,18,0,18,18,0,18,0,0,0,211,106,105,120,56,64,90,28,62,440,212,228,361,154,207,24,22,2,2610,1468,1142,1006,476,530,82,48,34,39627,2967,4894,13489,8492,4775,3217,1793,Blount,Alabama


In [96]:
edu_df['edu'] = (edu_df['some_hs'] + 2*edu_df['hs'] \
                 + 3*edu_df['some_college'] + 4*edu_df['associates'] \
                 + 5*edu_df['bachelors'] + 6*edu_df['graduate'])\
                / edu_df['pop_25p']
for race in ['white', 'black', 'native', 'asian', 'pacific', 'other', 'twoplus', 'hispanic']:
    for sex in ['', '_male', '_female']:
        edu_df[f'edu_{race}{sex}'] = (2*edu_df[f'tot_edu_{race}{sex}_hsplus']
                                      + 5*edu_df[f'tot_edu_{race}{sex}_4yplus'])\
                                     / edu_df[f'tot_edu_{race}{sex}']
        edu_df[f'per_edu_{race}{sex}_nohs'] = (edu_df[f'tot_edu_{race}{sex}']
                                           - edu_df[f'tot_edu_{race}{sex}_hsplus'])\
                                          / edu_df[f'tot_edu_{race}{sex}']
# edu_df = edu_df.fillna(-1)
edu_df.tail()

Unnamed: 0,fips,tot_edu_white,tot_edu_white_male,tot_edu_white_female,tot_edu_white_hsplus,tot_edu_white_male_hsplus,tot_edu_white_female_hsplus,tot_edu_white_4yplus,tot_edu_white_male_4yplus,tot_edu_white_female_4yplus,tot_edu_black,tot_edu_black_male,tot_edu_black_female,tot_edu_black_hsplus,tot_edu_black_male_hsplus,tot_edu_black_female_hsplus,tot_edu_black_4yplus,tot_edu_black_male_4yplus,tot_edu_black_female_4yplus,tot_edu_native,tot_edu_native_male,tot_edu_native_female,tot_edu_native_hsplus,tot_edu_native_male_hsplus,tot_edu_native_female_hsplus,tot_edu_native_4yplus,tot_edu_native_male_4yplus,tot_edu_native_female_4yplus,tot_edu_asian,tot_edu_asian_male,tot_edu_asian_female,tot_edu_asian_hsplus,tot_edu_asian_male_hsplus,tot_edu_asian_female_hsplus,tot_edu_asian_4yplus,tot_edu_asian_male_4yplus,tot_edu_asian_female_4yplus,tot_edu_pacific,tot_edu_pacific_male,tot_edu_pacific_female,tot_edu_pacific_hsplus,tot_edu_pacific_male_hsplus,tot_edu_pacific_female_hsplus,tot_edu_pacific_4yplus,tot_edu_pacific_male_4yplus,tot_edu_pacific_female_4yplus,tot_edu_other,tot_edu_other_male,tot_edu_other_female,tot_edu_other_hsplus,tot_edu_other_male_hsplus,tot_edu_other_female_hsplus,tot_edu_other_4yplus,tot_edu_other_male_4yplus,tot_edu_other_female_4yplus,tot_edu_twoplus,tot_edu_twoplus_male,tot_edu_twoplus_female,tot_edu_twoplus_hsplus,tot_edu_twoplus_male_hsplus,tot_edu_twoplus_female_hsplus,tot_edu_twoplus_4yplus,tot_edu_twoplus_male_4yplus,tot_edu_twoplus_female_4yplus,tot_edu_hispanic,tot_edu_hispanic_male,tot_edu_hispanic_female,tot_edu_hispanic_hsplus,tot_edu_hispanic_male_hsplus,tot_edu_hispanic_female_hsplus,tot_edu_hispanic_4yplus,tot_edu_hispanic_male_4yplus,tot_edu_hispanic_female_4yplus,pop_25p,no_hs,some_hs,hs,some_college,associates,bachelors,graduate,county,state,edu,edu_white,per_edu_white_nohs,edu_white_male,per_edu_white_male_nohs,edu_white_female,per_edu_white_female_nohs,edu_black,per_edu_black_nohs,edu_black_male,per_edu_black_male_nohs,edu_black_female,per_edu_black_female_nohs,edu_native,per_edu_native_nohs,edu_native_male,per_edu_native_male_nohs,edu_native_female,per_edu_native_female_nohs,edu_asian,per_edu_asian_nohs,edu_asian_male,per_edu_asian_male_nohs,edu_asian_female,per_edu_asian_female_nohs,edu_pacific,per_edu_pacific_nohs,edu_pacific_male,per_edu_pacific_male_nohs,edu_pacific_female,per_edu_pacific_female_nohs,edu_other,per_edu_other_nohs,edu_other_male,per_edu_other_male_nohs,edu_other_female,per_edu_other_female_nohs,edu_twoplus,per_edu_twoplus_nohs,edu_twoplus_male,per_edu_twoplus_male_nohs,edu_twoplus_female,per_edu_twoplus_female_nohs,edu_hispanic,per_edu_hispanic_nohs,edu_hispanic_male,per_edu_hispanic_male_nohs,edu_hispanic_female,per_edu_hispanic_female_nohs
3218,72149,13,1,12,13,1,12,1,1,0,537,340,197,489,313,176,129,74,55,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2456,1178,1278,1701,798,903,357,102,255,4351,2095,2256,3189,1460,1729,896,180,716,15255,7166,8089,11523,5256,6267,3072,949,2123,15288,2599,1134,5616,1648,1218,2545,528,Villalba Municipio,Puerto Rico,2.490515,2.384615,0.0,7.0,0.0,2.0,0.0,3.022346,0.089385,2.929412,0.079412,3.182741,0.106599,,,,,,,,,,,,,,,,,,,2.111971,0.30741,1.787776,0.322581,2.410798,0.293427,2.495518,0.267065,1.823389,0.303103,3.119681,0.233599,2.517601,0.244641,2.129082,0.266536,2.861788,0.225244
3219,72151,0,0,0,0,0,0,0,0,0,15706,7363,8343,10864,4831,6033,2677,865,1812,39,0,39,39,0,39,39,0,39,12,12,0,12,12,0,12,12,0,0,0,0,0,0,0,0,0,0,450,233,217,315,179,136,114,66,48,349,156,193,180,63,117,45,9,36,23904,11323,12581,16684,7409,9275,4431,1519,2912,23916,4975,2245,5972,3636,2645,3706,737,Yabucoa Municipio,Puerto Rico,2.451455,,,,,,,2.235642,0.30829,1.899633,0.343882,2.532183,0.276879,7.0,0.0,,,7.0,0.0,7.0,0.0,7.0,0.0,,,,,,,,,2.666667,0.3,2.95279,0.23176,2.359447,0.373272,1.676218,0.484241,1.096154,0.596154,2.145078,0.393782,2.322749,0.302041,1.979422,0.345668,2.631746,0.262777
3220,72153,29,16,13,19,6,13,5,0,5,925,471,454,652,329,323,235,100,135,75,35,40,56,16,40,18,8,10,5,5,0,5,5,0,0,0,0,0,0,0,0,0,0,0,0,0,4430,2202,2228,3131,1513,1618,830,288,542,292,127,165,168,85,83,69,35,34,25926,12022,13904,18700,8291,10409,6365,2430,3935,25976,4977,2259,8182,2381,1791,4902,1484,Yauco Municipio,Puerto Rico,2.55405,2.172414,0.344828,0.75,0.625,3.923077,0.0,2.68,0.295135,2.458599,0.301486,2.909692,0.288546,2.693333,0.253333,2.057143,0.542857,3.25,0.0,2.0,0.0,2.0,0.0,,,,,,,,,2.350339,0.293228,2.028156,0.312897,2.668761,0.273788,2.332192,0.424658,2.716535,0.330709,2.036364,0.49697,2.6701,0.278716,2.389952,0.310348,2.912327,0.251367
3221,29JOP,29516,13887,15629,26830,12641,14189,7607,3766,3841,963,493,470,874,435,439,173,144,29,653,261,392,497,243,254,92,32,60,523,213,310,449,171,278,234,74,160,38,0,38,0,0,0,0,0,0,265,129,136,159,74,85,68,29,39,905,490,415,836,433,403,266,196,70,1136,500,636,788,384,404,180,78,102,33571,779,2580,10582,8462,2576,5759,2833,Joplin,Missouri,3.134461,3.10662,0.091001,3.176496,0.089724,3.044533,0.092136,2.713396,0.09242,3.225152,0.117647,2.176596,0.065957,2.226646,0.238897,2.475096,0.068966,2.061224,0.352041,3.954111,0.141491,3.342723,0.197183,4.374194,0.103226,0.0,1.0,,,0.0,1.0,2.483019,0.4,2.271318,0.426357,2.683824,0.375,3.317127,0.076243,3.767347,0.116327,2.785542,0.028916,2.179577,0.306338,2.316,0.232,2.072327,0.36478
3222,29KAN,196115,96625,99490,184949,90966,93983,86232,41929,44303,87359,37954,49405,75251,32052,43199,13994,5465,8529,1537,750,787,1318,654,664,376,134,242,8573,4180,4393,6870,3402,3468,4038,2030,2008,398,162,236,292,141,151,104,97,7,10735,5533,5202,6278,3387,2891,1062,603,459,6881,3436,3445,6173,3063,3110,2637,1119,1518,26509,13802,12707,18460,9584,8876,4559,2335,2224,325065,11373,22302,82996,73203,23673,69682,41836,Kansas City,Missouri,3.390156,4.084634,0.056936,4.052543,0.058567,4.115801,0.055352,2.523747,0.1386,2.408942,0.155504,2.611942,0.125615,2.938191,0.142485,2.637333,0.128,3.224905,0.15629,3.957774,0.198647,4.055981,0.186124,3.86433,0.210562,2.773869,0.266332,4.734568,0.12963,1.427966,0.360169,1.664276,0.415184,1.769203,0.387855,1.552672,0.444252,3.710362,0.102892,3.411234,0.108556,4.008708,0.097242,2.252631,0.303633,2.234676,0.305608,2.272133,0.301487


In [97]:
save_df(edu_df, 'edu_df')

In [98]:
# edu_df = pd.read_csv('../data/processed/edu_df.csv')

In [99]:
info_df = info_df.merge(
    edu_df[['fips']+edu_df.select_dtypes(include='number').columns.tolist()], 
    on='fips', 
    how='left'
)
info_df.tail()

Unnamed: 0,state_fips,state,county,fips,tot_pop,tot_male,tot_female,tot_pop_white_male,tot_pop_white_female,tot_pop_black_male,tot_pop_black_female,tot_pop_native_male,tot_pop_native_female,tot_pop_asian_male,tot_pop_asian_female,tot_pop_pacific_male,tot_pop_pacific_female,tot_pop_twoplus_male,tot_pop_twoplus_female,tot_pop_hispanic_male,tot_pop_hispanic_female,tot_pop_white,tot_pop_black,tot_pop_native,tot_pop_asian,tot_pop_pacific,tot_pop_twoplus,tot_pop_hispanic,age_pop,age_male,age_female,age_pop_white_male,age_pop_white_female,age_pop_black_male,age_pop_black_female,age_pop_native_male,age_pop_native_female,age_pop_asian_male,age_pop_asian_female,age_pop_pacific_male,age_pop_pacific_female,age_pop_twoplus_male,age_pop_twoplus_female,age_pop_hispanic_male,age_pop_hispanic_female,age_pop_white,age_pop_black,age_pop_native,age_pop_asian,age_pop_pacific,age_pop_twoplus,age_pop_hispanic,region,division,area,neighbors,tot_dem,tot_gop,tot_votes,per_gop,median_income,median_income_black,median_income_native,median_income_asian,median_income_pacific,median_income_other,median_income_twoplus,median_income_hispanic,median_income_white,tot_edu_white,tot_edu_white_male,tot_edu_white_female,tot_edu_white_hsplus,tot_edu_white_male_hsplus,tot_edu_white_female_hsplus,...,tot_edu_twoplus,tot_edu_twoplus_male,tot_edu_twoplus_female,tot_edu_twoplus_hsplus,tot_edu_twoplus_male_hsplus,tot_edu_twoplus_female_hsplus,tot_edu_twoplus_4yplus,tot_edu_twoplus_male_4yplus,tot_edu_twoplus_female_4yplus,tot_edu_hispanic,tot_edu_hispanic_male,tot_edu_hispanic_female,tot_edu_hispanic_hsplus,tot_edu_hispanic_male_hsplus,tot_edu_hispanic_female_hsplus,tot_edu_hispanic_4yplus,tot_edu_hispanic_male_4yplus,tot_edu_hispanic_female_4yplus,pop_25p,no_hs,some_hs,hs,some_college,associates,bachelors,graduate,edu,edu_white,per_edu_white_nohs,edu_white_male,per_edu_white_male_nohs,edu_white_female,per_edu_white_female_nohs,edu_black,per_edu_black_nohs,edu_black_male,per_edu_black_male_nohs,edu_black_female,per_edu_black_female_nohs,edu_native,per_edu_native_nohs,edu_native_male,per_edu_native_male_nohs,edu_native_female,per_edu_native_female_nohs,edu_asian,per_edu_asian_nohs,edu_asian_male,per_edu_asian_male_nohs,edu_asian_female,per_edu_asian_female_nohs,edu_pacific,per_edu_pacific_nohs,edu_pacific_male,per_edu_pacific_male_nohs,edu_pacific_female,per_edu_pacific_female_nohs,edu_other,per_edu_other_nohs,edu_other_male,per_edu_other_male_nohs,edu_other_female,per_edu_other_female_nohs,edu_twoplus,per_edu_twoplus_nohs,edu_twoplus_male,per_edu_twoplus_male_nohs,edu_twoplus_female,per_edu_twoplus_female_nohs,edu_hispanic,per_edu_hispanic_nohs,edu_hispanic_male,per_edu_hispanic_male_nohs,edu_hispanic_female,per_edu_hispanic_female_nohs
3137,56,Wyoming,Sweetwater,56037,42343,21808,20535,17223,16338,298,204,177,158,183,227,26,29,350,358,3551,3221,33561,502,335,410,55,708,6772,7.942493,7.886051,8.002435,8.187366,8.316012,7.614094,7.990196,7.960452,8.310127,6.431694,8.026432,7.038462,7.689655,6.022857,5.368715,6.708533,6.6914,8.249993,7.766932,8.125373,7.314634,7.381818,5.69209,6.700384,West,Mountain,27005.754244,"[56007, 56013, 56023, 56035, 56041]",3233.0,12153.0,16661.0,0.789874,73008.0,,62188.0,72614.0,138053.0,74189.0,55284.0,61921.0,76469.0,23640,12157,11483,22176,11404,10772,...,257,121,136,257,121,136,69,19,50,3774,2017,1757,2735,1389,1346,524,140,384,28333,633,1916,9433,6994,3114,4298,1945,3.084036,3.037521,0.061929,2.939706,0.06194,3.141078,0.061918,4.245714,0.005714,4.10084,0.0,4.553571,0.017857,1.651543,0.174229,1.724138,0.137931,1.551724,0.224138,3.966019,0.169903,4.583333,0.0,3.712329,0.239726,2.0,0.0,2.0,0.0,,,2.09816,0.134969,1.590698,0.204651,3.081081,0.0,3.342412,0.0,2.785124,0.0,3.838235,0.0,2.143614,0.275305,1.724343,0.311353,2.624929,0.233921
3138,56,Wyoming,Teton,56039,23464,12142,11322,9832,9168,87,58,33,41,135,243,11,7,160,135,1884,1670,19000,145,74,378,18,295,3554,8.657688,8.632103,8.685126,9.090419,9.172339,9.16092,8.896552,9.424242,8.878049,7.711111,7.452675,10.090909,9.0,6.05625,6.362963,6.478238,6.364072,9.129947,9.055172,9.121622,7.544974,9.666667,6.19661,6.424592,West,Mountain,10351.784301,"[56013, 56023, 56029, 56035]",7313.0,3920.0,12176.0,0.348972,83831.0,,,98125.0,,36433.0,17188.0,45361.0,95222.0,14776,7716,7060,14565,7565,7000,...,52,52,0,52,52,0,51,51,0,2053,978,1075,1367,579,788,269,0,269,17164,457,501,2272,3219,868,6488,3359,4.123048,5.146521,0.01428,5.023328,0.01957,5.281161,0.008499,4.470588,0.235294,2.0,0.0,5.0,0.285714,2.0,0.0,2.0,0.0,2.0,0.0,4.839286,0.232143,7.0,0.0,3.441176,0.382353,0.0,1.0,0.0,1.0,,,2.201691,0.264493,1.311258,0.344371,2.712928,0.218631,6.903846,0.0,6.903846,0.0,,,1.986849,0.334145,1.184049,0.407975,2.717209,0.266977
3139,56,Wyoming,Uinta,56041,20226,10224,10002,8935,8722,64,62,69,75,36,56,11,13,182,130,927,944,17657,126,144,92,24,312,1871,8.025512,7.988165,8.063687,8.168103,8.313575,7.375,6.016129,8.014493,8.853333,7.0,8.607143,4.545455,7.076923,7.192308,5.892308,6.529666,6.106992,8.239961,6.706349,8.451389,7.978261,5.916667,6.650641,6.316408,West,Mountain,5391.631764,"[56023, 56037]",1202.0,6154.0,8053.0,0.836596,58235.0,,68125.0,,,55701.0,39205.0,39816.0,61330.0,11678,5807,5871,10942,5317,5625,...,350,180,170,336,179,157,14,14,0,829,430,399,651,361,290,37,12,25,12915,288,646,5176,3420,1390,1356,639,2.898335,2.688303,0.063024,2.638023,0.084381,2.738034,0.041901,4.7,0.15,7.0,0.0,1.25,0.375,2.435644,0.079208,2.8,0.145455,2.0,0.0,7.0,0.0,,,7.0,0.0,,,,,,,1.451777,0.274112,1.6875,0.15625,1.014493,0.492754,2.12,0.04,2.377778,0.005556,1.847059,0.076471,1.793727,0.214717,1.818605,0.160465,1.766917,0.273183
3140,56,Wyoming,Washakie,56043,7805,3963,3842,3266,3151,23,15,19,34,22,33,0,0,68,66,565,543,6417,38,53,55,0,134,1108,9.065343,8.91673,9.218636,9.261788,9.638527,9.913043,8.466667,9.0,9.764706,8.409091,9.484848,,,7.029412,7.318182,7.125664,6.983425,9.446782,9.342105,9.490566,9.054545,,7.171642,7.055957,West,Mountain,5798.138762,"[56003, 56013, 56017, 56019, 56025, 56029]",532.0,2911.0,3715.0,0.845484,53426.0,,,,,62054.0,36118.0,50035.0,54815.0,4848,2425,2423,4418,2096,2322,...,210,72,138,202,64,138,9,0,9,632,324,308,480,218,262,59,24,35,5662,181,409,1717,1434,701,849,371,3.076651,2.98288,0.088696,3.027629,0.13567,2.938093,0.041684,,,,,,,5.857143,0.0,5.857143,0.0,,,,,,,,,,,,,,,0.767442,0.616279,0.639175,0.680412,0.933333,0.533333,2.138095,0.038095,1.777778,0.111111,2.326087,0.0,1.985759,0.240506,1.716049,0.32716,2.269481,0.149351
3141,56,Wyoming,Weston,56045,6927,3624,3303,3273,2963,30,15,62,45,30,83,1,0,73,67,155,130,6236,45,107,113,1,140,285,9.173668,9.06319,9.294883,9.175985,9.507256,8.433333,8.333333,8.145161,8.222222,11.666667,9.626506,9.0,,6.616438,5.791045,7.819355,6.530769,9.333387,8.4,8.17757,10.168142,9.0,6.221429,7.231579,West,Mountain,6210.804116,"[56005, 56009, 56011, 56027]",294.0,2898.0,3334.0,0.907895,52867.0,,,,,,,37870.0,55032.0,4689,2450,2239,4381,2286,2095,...,52,12,40,52,12,40,17,0,17,95,80,15,72,57,15,3,3,0,5014,129,260,1796,1334,534,676,285,3.007579,2.85498,0.065686,2.747755,0.066939,2.972309,0.064314,,,,,,,1.2,0.4,2.0,0.0,0.0,1.0,1.815029,0.323699,1.26087,0.369565,2.015748,0.307087,2.0,0.0,2.0,0.0,,,,,,,,,3.634615,0.0,2.0,0.0,4.125,0.0,1.673684,0.242105,1.6125,0.2875,2.0,0.0


# 6. add mask usage statistics

In [100]:
with urlopen('https://raw.githubusercontent.com/nytimes/covid-19-data/master/mask-use/mask-use-by-county.csv') as response:
    mask_df = pd.read_csv(response)
    
mask_df.rename(columns={'COUNTYFP':'fips'}, inplace=True)
mask_df['fips'] = mask_df['fips'].apply('{0:0>5}'.format)
mask_df.columns = mask_df.columns.str.lower()

mask_df.head()

Unnamed: 0,fips,never,rarely,sometimes,frequently,always
0,1001,0.053,0.074,0.134,0.295,0.444
1,1003,0.083,0.059,0.098,0.323,0.436
2,1005,0.067,0.121,0.12,0.201,0.491
3,1007,0.02,0.034,0.096,0.278,0.572
4,1009,0.053,0.114,0.18,0.194,0.459


In [101]:
mask_df['mask'] = mask_df['rarely'] + 2*mask_df['sometimes']\
                  + 3*mask_df['frequently'] + 4*mask_df['always']
mask_df.tail()

Unnamed: 0,fips,never,rarely,sometimes,frequently,always,mask
3137,56037,0.061,0.295,0.23,0.146,0.268,2.265
3138,56039,0.095,0.157,0.16,0.247,0.34,2.578
3139,56041,0.098,0.278,0.154,0.207,0.264,2.263
3140,56043,0.204,0.155,0.069,0.285,0.287,2.296
3141,56045,0.142,0.129,0.148,0.207,0.374,2.542


In [102]:
save_df(mask_df, 'mask_df')

In [103]:
info_df = info_df.merge(mask_df, on='fips', how='left')
info_df.tail()

Unnamed: 0,state_fips,state,county,fips,tot_pop,tot_male,tot_female,tot_pop_white_male,tot_pop_white_female,tot_pop_black_male,tot_pop_black_female,tot_pop_native_male,tot_pop_native_female,tot_pop_asian_male,tot_pop_asian_female,tot_pop_pacific_male,tot_pop_pacific_female,tot_pop_twoplus_male,tot_pop_twoplus_female,tot_pop_hispanic_male,tot_pop_hispanic_female,tot_pop_white,tot_pop_black,tot_pop_native,tot_pop_asian,tot_pop_pacific,tot_pop_twoplus,tot_pop_hispanic,age_pop,age_male,age_female,age_pop_white_male,age_pop_white_female,age_pop_black_male,age_pop_black_female,age_pop_native_male,age_pop_native_female,age_pop_asian_male,age_pop_asian_female,age_pop_pacific_male,age_pop_pacific_female,age_pop_twoplus_male,age_pop_twoplus_female,age_pop_hispanic_male,age_pop_hispanic_female,age_pop_white,age_pop_black,age_pop_native,age_pop_asian,age_pop_pacific,age_pop_twoplus,age_pop_hispanic,region,division,area,neighbors,tot_dem,tot_gop,tot_votes,per_gop,median_income,median_income_black,median_income_native,median_income_asian,median_income_pacific,median_income_other,median_income_twoplus,median_income_hispanic,median_income_white,tot_edu_white,tot_edu_white_male,tot_edu_white_female,tot_edu_white_hsplus,tot_edu_white_male_hsplus,tot_edu_white_female_hsplus,...,tot_edu_twoplus_4yplus,tot_edu_twoplus_male_4yplus,tot_edu_twoplus_female_4yplus,tot_edu_hispanic,tot_edu_hispanic_male,tot_edu_hispanic_female,tot_edu_hispanic_hsplus,tot_edu_hispanic_male_hsplus,tot_edu_hispanic_female_hsplus,tot_edu_hispanic_4yplus,tot_edu_hispanic_male_4yplus,tot_edu_hispanic_female_4yplus,pop_25p,no_hs,some_hs,hs,some_college,associates,bachelors,graduate,edu,edu_white,per_edu_white_nohs,edu_white_male,per_edu_white_male_nohs,edu_white_female,per_edu_white_female_nohs,edu_black,per_edu_black_nohs,edu_black_male,per_edu_black_male_nohs,edu_black_female,per_edu_black_female_nohs,edu_native,per_edu_native_nohs,edu_native_male,per_edu_native_male_nohs,edu_native_female,per_edu_native_female_nohs,edu_asian,per_edu_asian_nohs,edu_asian_male,per_edu_asian_male_nohs,edu_asian_female,per_edu_asian_female_nohs,edu_pacific,per_edu_pacific_nohs,edu_pacific_male,per_edu_pacific_male_nohs,edu_pacific_female,per_edu_pacific_female_nohs,edu_other,per_edu_other_nohs,edu_other_male,per_edu_other_male_nohs,edu_other_female,per_edu_other_female_nohs,edu_twoplus,per_edu_twoplus_nohs,edu_twoplus_male,per_edu_twoplus_male_nohs,edu_twoplus_female,per_edu_twoplus_female_nohs,edu_hispanic,per_edu_hispanic_nohs,edu_hispanic_male,per_edu_hispanic_male_nohs,edu_hispanic_female,per_edu_hispanic_female_nohs,never,rarely,sometimes,frequently,always,mask
3137,56,Wyoming,Sweetwater,56037,42343,21808,20535,17223,16338,298,204,177,158,183,227,26,29,350,358,3551,3221,33561,502,335,410,55,708,6772,7.942493,7.886051,8.002435,8.187366,8.316012,7.614094,7.990196,7.960452,8.310127,6.431694,8.026432,7.038462,7.689655,6.022857,5.368715,6.708533,6.6914,8.249993,7.766932,8.125373,7.314634,7.381818,5.69209,6.700384,West,Mountain,27005.754244,"[56007, 56013, 56023, 56035, 56041]",3233.0,12153.0,16661.0,0.789874,73008.0,,62188.0,72614.0,138053.0,74189.0,55284.0,61921.0,76469.0,23640,12157,11483,22176,11404,10772,...,69,19,50,3774,2017,1757,2735,1389,1346,524,140,384,28333,633,1916,9433,6994,3114,4298,1945,3.084036,3.037521,0.061929,2.939706,0.06194,3.141078,0.061918,4.245714,0.005714,4.10084,0.0,4.553571,0.017857,1.651543,0.174229,1.724138,0.137931,1.551724,0.224138,3.966019,0.169903,4.583333,0.0,3.712329,0.239726,2.0,0.0,2.0,0.0,,,2.09816,0.134969,1.590698,0.204651,3.081081,0.0,3.342412,0.0,2.785124,0.0,3.838235,0.0,2.143614,0.275305,1.724343,0.311353,2.624929,0.233921,0.061,0.295,0.23,0.146,0.268,2.265
3138,56,Wyoming,Teton,56039,23464,12142,11322,9832,9168,87,58,33,41,135,243,11,7,160,135,1884,1670,19000,145,74,378,18,295,3554,8.657688,8.632103,8.685126,9.090419,9.172339,9.16092,8.896552,9.424242,8.878049,7.711111,7.452675,10.090909,9.0,6.05625,6.362963,6.478238,6.364072,9.129947,9.055172,9.121622,7.544974,9.666667,6.19661,6.424592,West,Mountain,10351.784301,"[56013, 56023, 56029, 56035]",7313.0,3920.0,12176.0,0.348972,83831.0,,,98125.0,,36433.0,17188.0,45361.0,95222.0,14776,7716,7060,14565,7565,7000,...,51,51,0,2053,978,1075,1367,579,788,269,0,269,17164,457,501,2272,3219,868,6488,3359,4.123048,5.146521,0.01428,5.023328,0.01957,5.281161,0.008499,4.470588,0.235294,2.0,0.0,5.0,0.285714,2.0,0.0,2.0,0.0,2.0,0.0,4.839286,0.232143,7.0,0.0,3.441176,0.382353,0.0,1.0,0.0,1.0,,,2.201691,0.264493,1.311258,0.344371,2.712928,0.218631,6.903846,0.0,6.903846,0.0,,,1.986849,0.334145,1.184049,0.407975,2.717209,0.266977,0.095,0.157,0.16,0.247,0.34,2.578
3139,56,Wyoming,Uinta,56041,20226,10224,10002,8935,8722,64,62,69,75,36,56,11,13,182,130,927,944,17657,126,144,92,24,312,1871,8.025512,7.988165,8.063687,8.168103,8.313575,7.375,6.016129,8.014493,8.853333,7.0,8.607143,4.545455,7.076923,7.192308,5.892308,6.529666,6.106992,8.239961,6.706349,8.451389,7.978261,5.916667,6.650641,6.316408,West,Mountain,5391.631764,"[56023, 56037]",1202.0,6154.0,8053.0,0.836596,58235.0,,68125.0,,,55701.0,39205.0,39816.0,61330.0,11678,5807,5871,10942,5317,5625,...,14,14,0,829,430,399,651,361,290,37,12,25,12915,288,646,5176,3420,1390,1356,639,2.898335,2.688303,0.063024,2.638023,0.084381,2.738034,0.041901,4.7,0.15,7.0,0.0,1.25,0.375,2.435644,0.079208,2.8,0.145455,2.0,0.0,7.0,0.0,,,7.0,0.0,,,,,,,1.451777,0.274112,1.6875,0.15625,1.014493,0.492754,2.12,0.04,2.377778,0.005556,1.847059,0.076471,1.793727,0.214717,1.818605,0.160465,1.766917,0.273183,0.098,0.278,0.154,0.207,0.264,2.263
3140,56,Wyoming,Washakie,56043,7805,3963,3842,3266,3151,23,15,19,34,22,33,0,0,68,66,565,543,6417,38,53,55,0,134,1108,9.065343,8.91673,9.218636,9.261788,9.638527,9.913043,8.466667,9.0,9.764706,8.409091,9.484848,,,7.029412,7.318182,7.125664,6.983425,9.446782,9.342105,9.490566,9.054545,,7.171642,7.055957,West,Mountain,5798.138762,"[56003, 56013, 56017, 56019, 56025, 56029]",532.0,2911.0,3715.0,0.845484,53426.0,,,,,62054.0,36118.0,50035.0,54815.0,4848,2425,2423,4418,2096,2322,...,9,0,9,632,324,308,480,218,262,59,24,35,5662,181,409,1717,1434,701,849,371,3.076651,2.98288,0.088696,3.027629,0.13567,2.938093,0.041684,,,,,,,5.857143,0.0,5.857143,0.0,,,,,,,,,,,,,,,0.767442,0.616279,0.639175,0.680412,0.933333,0.533333,2.138095,0.038095,1.777778,0.111111,2.326087,0.0,1.985759,0.240506,1.716049,0.32716,2.269481,0.149351,0.204,0.155,0.069,0.285,0.287,2.296
3141,56,Wyoming,Weston,56045,6927,3624,3303,3273,2963,30,15,62,45,30,83,1,0,73,67,155,130,6236,45,107,113,1,140,285,9.173668,9.06319,9.294883,9.175985,9.507256,8.433333,8.333333,8.145161,8.222222,11.666667,9.626506,9.0,,6.616438,5.791045,7.819355,6.530769,9.333387,8.4,8.17757,10.168142,9.0,6.221429,7.231579,West,Mountain,6210.804116,"[56005, 56009, 56011, 56027]",294.0,2898.0,3334.0,0.907895,52867.0,,,,,,,37870.0,55032.0,4689,2450,2239,4381,2286,2095,...,17,0,17,95,80,15,72,57,15,3,3,0,5014,129,260,1796,1334,534,676,285,3.007579,2.85498,0.065686,2.747755,0.066939,2.972309,0.064314,,,,,,,1.2,0.4,2.0,0.0,0.0,1.0,1.815029,0.323699,1.26087,0.369565,2.015748,0.307087,2.0,0.0,2.0,0.0,,,,,,,,,3.634615,0.0,2.0,0.0,4.125,0.0,1.673684,0.242105,1.6125,0.2875,2.0,0.0,0.142,0.129,0.148,0.207,0.374,2.542


In [104]:
info_df.columns

Index(['state_fips', 'state', 'county', 'fips', 'tot_pop', 'tot_male',
       'tot_female', 'tot_pop_white_male', 'tot_pop_white_female',
       'tot_pop_black_male',
       ...
       'edu_hispanic_male', 'per_edu_hispanic_male_nohs',
       'edu_hispanic_female', 'per_edu_hispanic_female_nohs', 'never',
       'rarely', 'sometimes', 'frequently', 'always', 'mask'],
      dtype='object', length=204)

In [105]:
def per_population(df, divisor='tot_pop', ignore=['tot_pop']):
    
    cols = [c for c in df.columns.tolist() if c[:4]=='tot_']
    
    for c in cols:
        if c not in ignore:
            df[c.replace('tot_', 'per_')] = df[c] / df[divisor]
    
    return df

In [107]:
info_df['pop_density'] = info_df['tot_pop'] / info_df['area']

edu_cols_to_ignore = [c for c in info_df.columns if 'edu' in c]
info_df = info_df.pipe(
    per_population, 
    ignore=['tot_pop', 'tot_dem', 'tot_gop']+edu_cols_to_ignore
)
info_df.head()

Unnamed: 0,state_fips,state,county,fips,tot_pop,tot_male,tot_female,tot_pop_white_male,tot_pop_white_female,tot_pop_black_male,tot_pop_black_female,tot_pop_native_male,tot_pop_native_female,tot_pop_asian_male,tot_pop_asian_female,tot_pop_pacific_male,tot_pop_pacific_female,tot_pop_twoplus_male,tot_pop_twoplus_female,tot_pop_hispanic_male,tot_pop_hispanic_female,tot_pop_white,tot_pop_black,tot_pop_native,tot_pop_asian,tot_pop_pacific,tot_pop_twoplus,tot_pop_hispanic,age_pop,age_male,age_female,age_pop_white_male,age_pop_white_female,age_pop_black_male,age_pop_black_female,age_pop_native_male,age_pop_native_female,age_pop_asian_male,age_pop_asian_female,age_pop_pacific_male,age_pop_pacific_female,age_pop_twoplus_male,age_pop_twoplus_female,age_pop_hispanic_male,age_pop_hispanic_female,age_pop_white,age_pop_black,age_pop_native,age_pop_asian,age_pop_pacific,age_pop_twoplus,age_pop_hispanic,region,division,area,neighbors,tot_dem,tot_gop,tot_votes,per_gop,median_income,median_income_black,median_income_native,median_income_asian,median_income_pacific,median_income_other,median_income_twoplus,median_income_hispanic,median_income_white,tot_edu_white,tot_edu_white_male,tot_edu_white_female,tot_edu_white_hsplus,tot_edu_white_male_hsplus,tot_edu_white_female_hsplus,...,edu_white_female,per_edu_white_female_nohs,edu_black,per_edu_black_nohs,edu_black_male,per_edu_black_male_nohs,edu_black_female,per_edu_black_female_nohs,edu_native,per_edu_native_nohs,edu_native_male,per_edu_native_male_nohs,edu_native_female,per_edu_native_female_nohs,edu_asian,per_edu_asian_nohs,edu_asian_male,per_edu_asian_male_nohs,edu_asian_female,per_edu_asian_female_nohs,edu_pacific,per_edu_pacific_nohs,edu_pacific_male,per_edu_pacific_male_nohs,edu_pacific_female,per_edu_pacific_female_nohs,edu_other,per_edu_other_nohs,edu_other_male,per_edu_other_male_nohs,edu_other_female,per_edu_other_female_nohs,edu_twoplus,per_edu_twoplus_nohs,edu_twoplus_male,per_edu_twoplus_male_nohs,edu_twoplus_female,per_edu_twoplus_female_nohs,edu_hispanic,per_edu_hispanic_nohs,edu_hispanic_male,per_edu_hispanic_male_nohs,edu_hispanic_female,per_edu_hispanic_female_nohs,never,rarely,sometimes,frequently,always,mask,pop_density,per_male,per_female,per_pop_white_male,per_pop_white_female,per_pop_black_male,per_pop_black_female,per_pop_native_male,per_pop_native_female,per_pop_asian_male,per_pop_asian_female,per_pop_pacific_male,per_pop_pacific_female,per_pop_twoplus_male,per_pop_twoplus_female,per_pop_hispanic_male,per_pop_hispanic_female,per_pop_white,per_pop_black,per_pop_native,per_pop_asian,per_pop_pacific,per_pop_twoplus,per_pop_hispanic,per_votes
0,1,Alabama,Autauga,1001,55869,27092,28777,20138,21077,5171,5927,105,138,282,364,20,20,492,464,884,787,41215,11098,243,646,40,956,1671,8.422041,8.181973,8.648052,8.518373,8.928216,7.480758,8.104269,9.733333,9.826087,7.276596,8.381868,8.4,5.85,5.195122,5.519397,6.382353,7.072427,8.727963,7.81375,9.786008,7.899381,7.125,5.35251,6.707361,South,East South Central,1539.602123,"[01021, 01047, 01051, 01085, 01101]",5908.0,18110.0,24661.0,0.754018,58786.0,27643.0,,,,,,83423.0,65047.0,28726,13834,14892,26130,12588,13542,...,3.117043,0.090653,2.563808,0.19555,2.543393,0.199211,2.580395,0.192575,3.166667,0.217949,4.487179,0.358974,1.846154,0.076923,3.615142,0.123028,3.568421,0.347368,3.635135,0.027027,2.0,0.0,2.0,0.0,2.0,0.0,1.503817,0.248092,1.44086,0.27957,1.538462,0.230769,2.626316,0.292105,1.081481,0.459259,3.477551,0.2,3.591054,0.125666,4.197802,0.164835,3.020661,0.088843,0.053,0.074,0.134,0.295,0.444,3.003,36.287947,0.48492,0.51508,0.36045,0.377258,0.092556,0.106087,0.001879,0.00247,0.005048,0.006515,0.000358,0.000358,0.008806,0.008305,0.015823,0.014087,0.737708,0.198643,0.004349,0.011563,0.000716,0.017111,0.029909,0.441408
1,1,Alabama,Baldwin,1003,223234,108247,114987,89845,95902,9308,9907,753,754,911,1435,53,70,1832,1930,5545,4989,185747,19215,1507,2346,123,3762,10534,8.987202,8.806627,9.157192,9.150993,9.481721,7.587129,8.095791,9.14741,8.851459,7.246981,7.887108,8.150943,8.628571,5.757642,5.864767,6.497565,6.719182,9.321749,7.849388,8.999336,7.638534,8.422764,5.8126,6.602525,South,East South Central,4117.546676,"[01025, 01053, 01097, 01099, 01129]",18409.0,72780.0,94090.0,0.798123,55962.0,31112.0,53289.0,34763.0,,45634.0,53456.0,43279.0,59418.0,126316,60310,66006,116288,54788,61500,...,3.513696,0.068267,2.494586,0.203315,2.19882,0.261756,2.752534,0.152347,2.270936,0.221675,2.342256,0.216061,2.195122,0.227642,2.690678,0.169492,2.997653,0.36385,2.517241,0.059682,2.0,0.0,,,2.0,0.0,2.878465,0.259062,2.948827,0.159915,2.808102,0.358209,3.418808,0.106893,2.910903,0.127784,3.923166,0.086147,2.850361,0.253174,2.477628,0.340487,3.2827,0.151899,0.083,0.059,0.098,0.323,0.436,2.968,54.215293,0.484904,0.515096,0.40247,0.429603,0.041696,0.044379,0.003373,0.003378,0.004081,0.006428,0.000237,0.000314,0.008207,0.008646,0.024839,0.022349,0.832073,0.086076,0.006751,0.010509,0.000551,0.016852,0.047188,0.421486
2,1,Alabama,Barbour,1005,24686,13064,11622,5894,5341,6260,5547,52,43,55,61,21,10,153,132,629,488,11235,11807,95,116,31,285,1117,8.784412,8.463564,9.14507,9.475568,10.196592,7.767732,8.491437,9.346154,10.465116,8.109091,10.065574,6.285714,7.7,6.418301,5.810606,6.434022,5.766393,9.818336,8.107733,9.852632,9.137931,6.741935,6.136842,6.142346,South,East South Central,2292.144655,"[01011, 01045, 01067, 01109, 01113]",4848.0,5431.0,10390.0,0.528359,34186.0,23013.0,,50417.0,,26793.0,19760.0,30417.0,47031.0,9171,4846,4325,7264,3657,3607,...,2.551214,0.166012,1.703576,0.317808,1.568773,0.355019,1.854944,0.276024,1.166667,0.416667,1.166667,0.416667,,,1.920455,0.181818,1.35,0.325,2.395833,0.0625,0.0,1.0,,,0.0,1.0,1.217391,0.710145,1.617391,0.669565,0.417391,0.791304,1.918033,0.163934,1.25,0.375,2.436893,0.0,1.539267,0.561955,1.731646,0.513924,1.11236,0.668539,0.067,0.121,0.12,0.201,0.491,2.928,10.769826,0.529207,0.470793,0.238759,0.216357,0.253585,0.224702,0.002106,0.001742,0.002228,0.002471,0.000851,0.000405,0.006198,0.005347,0.02548,0.019768,0.455116,0.478287,0.003848,0.004699,0.001256,0.011545,0.045248,0.420886
3,1,Alabama,Bibb,1007,22394,11929,10465,8482,8181,2912,1807,50,41,21,25,5,1,116,130,343,280,16663,4719,91,46,6,246,623,8.606145,8.349484,8.89871,8.645838,9.105122,7.706387,8.455451,8.52,8.219512,7.52381,9.8,6.4,3.0,6.422414,6.3,7.186589,6.975,8.871332,7.993219,8.384615,8.76087,5.833333,6.357724,7.091493,South,East South Central,1612.167481,"[01021, 01065, 01073, 01105, 01117, 01125]",1874.0,6733.0,8748.0,0.78227,45340.0,34000.0,,,,,20329.0,42708.0,50769.0,12002,6037,5965,10483,5181,5302,...,2.528751,0.111148,1.686369,0.3076,1.476701,0.358341,2.07094,0.21453,2.0,0.0,2.0,0.0,,,7.0,0.0,7.0,0.0,7.0,0.0,,,,,,,2.0,0.0,2.0,0.0,,,1.651007,0.275168,1.925926,0.175926,0.926829,0.536585,1.316294,0.341853,1.111111,0.444444,1.56338,0.21831,0.02,0.034,0.096,0.278,0.572,3.348,13.890616,0.532687,0.467313,0.378762,0.365321,0.130035,0.080691,0.002233,0.001831,0.000938,0.001116,0.000223,4.5e-05,0.00518,0.005805,0.015317,0.012503,0.744083,0.210726,0.004064,0.002054,0.000268,0.010985,0.02782,0.39064
4,1,Alabama,Blount,1009,57826,28472,29354,24494,25682,453,419,143,139,73,90,14,7,345,385,2950,2632,50176,872,282,163,21,730,5582,8.651714,8.49136,8.807249,8.771209,9.131843,8.030905,7.947494,10.125874,9.446043,8.30137,9.066667,8.142857,10.571429,6.486957,6.535065,6.4,6.06193,8.955796,7.990826,9.79078,8.723926,8.952381,6.512329,6.240595,South,East South Central,1670.103911,"[01043, 01055, 01073, 01095, 01115, 01127]",2150.0,22808.0,25384.0,0.913855,48695.0,,65385.0,99219.0,,,44934.0,35495.0,49872.0,35774,17200,18574,29814,14167,15647,...,2.45876,0.157586,1.563758,0.310403,1.544484,0.316726,1.580952,0.304762,1.916667,0.287879,4.227273,0.363636,1.454545,0.272727,4.177419,0.16129,4.906977,0.0,3.790123,0.246914,2.0,0.0,,,2.0,0.0,3.270142,0.43128,2.377358,0.471698,4.171429,0.390476,1.913636,0.179545,1.971698,0.273585,1.859649,0.092105,0.927969,0.614559,0.811989,0.675749,1.077058,0.535902,0.053,0.114,0.18,0.194,0.459,2.892,34.624193,0.492374,0.507626,0.423581,0.444125,0.007834,0.007246,0.002473,0.002404,0.001262,0.001556,0.000242,0.000121,0.005966,0.006658,0.051015,0.045516,0.867707,0.01508,0.004877,0.002819,0.000363,0.012624,0.096531,0.438972


# import CSSE data

>Note: New York Times data has a few caveats, including treating New York City, Kansas City, and Joplin as single entities rather than including them in their respective counties. Read their [README](https://github.com/nytimes/covid-19-data/blob/master/README.md) for more information.

In [2]:
with open('../data/processed/info_df.p', 'rb') as f:
  info_df = pickle.load(f)
  
info_df.head()

Unnamed: 0,state_fips,state,county,fips,tot_pop,tot_male,tot_female,tot_pop_white_male,tot_pop_white_female,tot_pop_black_male,tot_pop_black_female,tot_pop_native_male,tot_pop_native_female,tot_pop_asian_male,tot_pop_asian_female,tot_pop_pacific_male,tot_pop_pacific_female,tot_pop_twoplus_male,tot_pop_twoplus_female,tot_pop_hispanic_male,tot_pop_hispanic_female,tot_pop_white,tot_pop_black,tot_pop_native,tot_pop_asian,tot_pop_pacific,tot_pop_twoplus,tot_pop_hispanic,age_pop,age_male,age_female,age_pop_white_male,age_pop_white_female,age_pop_black_male,age_pop_black_female,age_pop_native_male,age_pop_native_female,age_pop_asian_male,age_pop_asian_female,age_pop_pacific_male,age_pop_pacific_female,age_pop_twoplus_male,age_pop_twoplus_female,age_pop_hispanic_male,age_pop_hispanic_female,age_pop_white,age_pop_black,age_pop_native,age_pop_asian,age_pop_pacific,age_pop_twoplus,age_pop_hispanic,region,division,area,neighbors,tot_dem,tot_gop,tot_votes,per_gop,median_income,median_income_black,median_income_native,median_income_asian,median_income_pacific,median_income_other,median_income_twoplus,median_income_hispanic,median_income_white,tot_edu_white,tot_edu_white_male,tot_edu_white_female,tot_edu_white_hsplus,tot_edu_white_male_hsplus,tot_edu_white_female_hsplus,...,edu_black,per_edu_black_nohs,edu_black_male,per_edu_black_male_nohs,edu_black_female,per_edu_black_female_nohs,edu_native,per_edu_native_nohs,edu_native_male,per_edu_native_male_nohs,edu_native_female,per_edu_native_female_nohs,edu_asian,per_edu_asian_nohs,edu_asian_male,per_edu_asian_male_nohs,edu_asian_female,per_edu_asian_female_nohs,edu_pacific,per_edu_pacific_nohs,edu_pacific_male,per_edu_pacific_male_nohs,edu_pacific_female,per_edu_pacific_female_nohs,edu_other,per_edu_other_nohs,edu_other_male,per_edu_other_male_nohs,edu_other_female,per_edu_other_female_nohs,edu_twoplus,per_edu_twoplus_nohs,edu_twoplus_male,per_edu_twoplus_male_nohs,edu_twoplus_female,per_edu_twoplus_female_nohs,edu_hispanic,per_edu_hispanic_nohs,edu_hispanic_male,per_edu_hispanic_male_nohs,edu_hispanic_female,per_edu_hispanic_female_nohs,never,rarely,sometimes,frequently,always,mask,pop_density,per_male,per_female,per_pop_white_male,per_pop_white_female,per_pop_black_male,per_pop_black_female,per_pop_native_male,per_pop_native_female,per_pop_asian_male,per_pop_asian_female,per_pop_pacific_male,per_pop_pacific_female,per_pop_twoplus_male,per_pop_twoplus_female,per_pop_hispanic_male,per_pop_hispanic_female,per_pop_white,per_pop_black,per_pop_native,per_pop_asian,per_pop_pacific,per_pop_twoplus,per_pop_hispanic,per_votes,lat,lon
0,1,Alabama,Autauga,1001,55869,27092,28777,20138,21077,5171,5927,105,138,282,364,20,20,492,464,884,787,41215,11098,243,646,40,956,1671,8.422041,8.181973,8.648052,8.518373,8.928216,7.480758,8.104269,9.733333,9.826087,7.276596,8.381868,8.4,5.85,5.195122,5.519397,6.382353,7.072427,8.727963,7.81375,9.786008,7.899381,7.125,5.35251,6.707361,South,East South Central,1539.602123,"[01021, 01047, 01051, 01085, 01101]",5908.0,18110.0,24661.0,0.754018,58786.0,27643.0,,,,,,83423.0,65047.0,28726,13834,14892,26130,12588,13542,...,2.563808,0.19555,2.543393,0.199211,2.580395,0.192575,3.166667,0.217949,4.487179,0.358974,1.846154,0.076923,3.615142,0.123028,3.568421,0.347368,3.635135,0.027027,2.0,0.0,2.0,0.0,2.0,0.0,1.503817,0.248092,1.44086,0.27957,1.538462,0.230769,2.626316,0.292105,1.081481,0.459259,3.477551,0.2,3.591054,0.125666,4.197802,0.164835,3.020661,0.088843,0.053,0.074,0.134,0.295,0.444,3.003,36.287947,0.48492,0.51508,0.36045,0.377258,0.092556,0.106087,0.001879,0.00247,0.005048,0.006515,0.000358,0.000358,0.008806,0.008305,0.015823,0.014087,0.737708,0.198643,0.004349,0.011563,0.000716,0.017111,0.029909,0.441408,32.539527,-86.644082
1,1,Alabama,Baldwin,1003,223234,108247,114987,89845,95902,9308,9907,753,754,911,1435,53,70,1832,1930,5545,4989,185747,19215,1507,2346,123,3762,10534,8.987202,8.806627,9.157192,9.150993,9.481721,7.587129,8.095791,9.14741,8.851459,7.246981,7.887108,8.150943,8.628571,5.757642,5.864767,6.497565,6.719182,9.321749,7.849388,8.999336,7.638534,8.422764,5.8126,6.602525,South,East South Central,4117.546676,"[01025, 01053, 01097, 01099, 01129]",18409.0,72780.0,94090.0,0.798123,55962.0,31112.0,53289.0,34763.0,,45634.0,53456.0,43279.0,59418.0,126316,60310,66006,116288,54788,61500,...,2.494586,0.203315,2.19882,0.261756,2.752534,0.152347,2.270936,0.221675,2.342256,0.216061,2.195122,0.227642,2.690678,0.169492,2.997653,0.36385,2.517241,0.059682,2.0,0.0,,,2.0,0.0,2.878465,0.259062,2.948827,0.159915,2.808102,0.358209,3.418808,0.106893,2.910903,0.127784,3.923166,0.086147,2.850361,0.253174,2.477628,0.340487,3.2827,0.151899,0.083,0.059,0.098,0.323,0.436,2.968,54.215293,0.484904,0.515096,0.40247,0.429603,0.041696,0.044379,0.003373,0.003378,0.004081,0.006428,0.000237,0.000314,0.008207,0.008646,0.024839,0.022349,0.832073,0.086076,0.006751,0.010509,0.000551,0.016852,0.047188,0.421486,30.72775,-87.722071
2,1,Alabama,Barbour,1005,24686,13064,11622,5894,5341,6260,5547,52,43,55,61,21,10,153,132,629,488,11235,11807,95,116,31,285,1117,8.784412,8.463564,9.14507,9.475568,10.196592,7.767732,8.491437,9.346154,10.465116,8.109091,10.065574,6.285714,7.7,6.418301,5.810606,6.434022,5.766393,9.818336,8.107733,9.852632,9.137931,6.741935,6.136842,6.142346,South,East South Central,2292.144655,"[01011, 01045, 01067, 01109, 01113]",4848.0,5431.0,10390.0,0.528359,34186.0,23013.0,,50417.0,,26793.0,19760.0,30417.0,47031.0,9171,4846,4325,7264,3657,3607,...,1.703576,0.317808,1.568773,0.355019,1.854944,0.276024,1.166667,0.416667,1.166667,0.416667,,,1.920455,0.181818,1.35,0.325,2.395833,0.0625,0.0,1.0,,,0.0,1.0,1.217391,0.710145,1.617391,0.669565,0.417391,0.791304,1.918033,0.163934,1.25,0.375,2.436893,0.0,1.539267,0.561955,1.731646,0.513924,1.11236,0.668539,0.067,0.121,0.12,0.201,0.491,2.928,10.769826,0.529207,0.470793,0.238759,0.216357,0.253585,0.224702,0.002106,0.001742,0.002228,0.002471,0.000851,0.000405,0.006198,0.005347,0.02548,0.019768,0.455116,0.478287,0.003848,0.004699,0.001256,0.011545,0.045248,0.420886,31.868263,-85.387129
3,1,Alabama,Bibb,1007,22394,11929,10465,8482,8181,2912,1807,50,41,21,25,5,1,116,130,343,280,16663,4719,91,46,6,246,623,8.606145,8.349484,8.89871,8.645838,9.105122,7.706387,8.455451,8.52,8.219512,7.52381,9.8,6.4,3.0,6.422414,6.3,7.186589,6.975,8.871332,7.993219,8.384615,8.76087,5.833333,6.357724,7.091493,South,East South Central,1612.167481,"[01021, 01065, 01073, 01105, 01117, 01125]",1874.0,6733.0,8748.0,0.78227,45340.0,34000.0,,,,,20329.0,42708.0,50769.0,12002,6037,5965,10483,5181,5302,...,1.686369,0.3076,1.476701,0.358341,2.07094,0.21453,2.0,0.0,2.0,0.0,,,7.0,0.0,7.0,0.0,7.0,0.0,,,,,,,2.0,0.0,2.0,0.0,,,1.651007,0.275168,1.925926,0.175926,0.926829,0.536585,1.316294,0.341853,1.111111,0.444444,1.56338,0.21831,0.02,0.034,0.096,0.278,0.572,3.348,13.890616,0.532687,0.467313,0.378762,0.365321,0.130035,0.080691,0.002233,0.001831,0.000938,0.001116,0.000223,4.5e-05,0.00518,0.005805,0.015317,0.012503,0.744083,0.210726,0.004064,0.002054,0.000268,0.010985,0.02782,0.39064,32.996421,-87.125115
4,1,Alabama,Blount,1009,57826,28472,29354,24494,25682,453,419,143,139,73,90,14,7,345,385,2950,2632,50176,872,282,163,21,730,5582,8.651714,8.49136,8.807249,8.771209,9.131843,8.030905,7.947494,10.125874,9.446043,8.30137,9.066667,8.142857,10.571429,6.486957,6.535065,6.4,6.06193,8.955796,7.990826,9.79078,8.723926,8.952381,6.512329,6.240595,South,East South Central,1670.103911,"[01043, 01055, 01073, 01095, 01115, 01127]",2150.0,22808.0,25384.0,0.913855,48695.0,,65385.0,99219.0,,,44934.0,35495.0,49872.0,35774,17200,18574,29814,14167,15647,...,1.563758,0.310403,1.544484,0.316726,1.580952,0.304762,1.916667,0.287879,4.227273,0.363636,1.454545,0.272727,4.177419,0.16129,4.906977,0.0,3.790123,0.246914,2.0,0.0,,,2.0,0.0,3.270142,0.43128,2.377358,0.471698,4.171429,0.390476,1.913636,0.179545,1.971698,0.273585,1.859649,0.092105,0.927969,0.614559,0.811989,0.675749,1.077058,0.535902,0.053,0.114,0.18,0.194,0.459,2.892,34.624193,0.492374,0.507626,0.423581,0.444125,0.007834,0.007246,0.002473,0.002404,0.001262,0.001556,0.000242,0.000121,0.005966,0.006658,0.051015,0.045516,0.867707,0.01508,0.004877,0.002819,0.000363,0.012624,0.096531,0.438972,33.982109,-86.567906


In [2]:
csse_cases.columns = csse_cases.columns.str.lower()
csse_cases = csse_cases.drop(columns=['iso2', 'iso3', 'code3', 'fips', 'admin2', 'province_state', 'country_region', 'combined_key'])
csse_cases = csse_cases.rename(
  columns={
    'uid': 'fips',
    'long_': 'lon'
  }
)
csse_cases['fips'] = csse_cases['fips'].apply(lambda x: x[-5:])
csse_cases.head()

Unnamed: 0,fips,lat,lon,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20,3/24/20,3/25/20,3/26/20,3/27/20,3/28/20,3/29/20,3/30/20,3/31/20,4/1/20,4/2/20,...,8/19/20,8/20/20,8/21/20,8/22/20,8/23/20,8/24/20,8/25/20,8/26/20,8/27/20,8/28/20,8/29/20,8/30/20,8/31/20,9/1/20,9/2/20,9/3/20,9/4/20,9/5/20,9/6/20,9/7/20,9/8/20,9/9/20,9/10/20,9/11/20,9/12/20,9/13/20,9/14/20,9/15/20,9/16/20,9/17/20,9/18/20,9/19/20,9/20/20,9/21/20,9/22/20,9/23/20,9/24/20,9/25/20,9/26/20,9/27/20,9/28/20,9/29/20,9/30/20,10/1/20,10/2/20,10/3/20,10/4/20,10/5/20,10/6/20,10/7/20,10/8/20,10/9/20,10/10/20,10/11/20,10/12/20,10/13/20,10/14/20,10/15/20,10/16/20,10/17/20,10/18/20,10/19/20,10/20/20,10/21/20,10/22/20,10/23/20,10/24/20,10/25/20,10/26/20,10/27/20,10/28/20,10/29/20,10/30/20,10/31/20,11/1/20
0,1001,32.539527,-86.644082,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,4,6,6,6,6,6,7,8,10,...,1241,1240,1255,1264,1266,1286,1286,1281,1284,1296,1309,1345,1348,1354,1345,1349,1355,1371,1377,1383,1385,1398,1413,1420,1432,1442,1447,1463,1619,1624,1664,1673,1690,1691,1714,1715,1738,1757,1764,1773,1785,1787,1791,1798,1805,1818,1828,1831,1839,1852,1863,1882,1898,1905,1911,1924,1928,1949,1966,1983,1989,1999,2010,2021,2023,2030,2048,2059,2074,2082,2103,2126,2141,2159,2173
1,1003,30.72775,-87.722071,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,2,2,2,3,4,4,5,5,10,15,18,19,20,24,...,3931,3957,3997,4037,4069,4083,4100,4166,4199,4230,4307,4402,4422,4445,4458,4495,4513,4542,4569,4586,4609,4639,4678,4722,4752,4781,4800,4812,5003,5021,5033,5047,5061,5087,5124,5141,5165,5456,5477,5526,5588,5606,5640,5997,6024,6048,6073,6085,6116,6134,6141,6172,6190,6203,6220,6248,6270,6285,6333,6350,6369,6375,6405,6443,6475,6615,6637,6658,6694,6712,6743,6768,6888,6940,6966
2,1005,31.868263,-85.387129,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,613,619,622,626,629,633,628,616,614,620,624,628,628,629,617,614,616,616,616,617,617,618,618,618,620,622,626,629,809,809,824,830,835,838,848,851,857,873,882,885,886,886,896,898,902,921,921,921,923,927,927,939,942,942,944,951,950,965,968,977,981,981,988,996,997,1012,1031,1033,1033,1042,1045,1055,1056,1060,1061
3,1007,32.996421,-87.125115,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,3,4,...,490,494,501,503,510,510,511,513,515,517,523,533,535,538,541,542,545,550,555,557,562,564,566,574,576,578,581,580,612,617,619,628,632,636,635,638,642,652,654,656,657,658,664,672,675,678,686,687,691,703,708,719,726,736,738,744,744,761,771,775,785,789,791,801,811,825,828,840,843,850,856,861,866,873,878
4,1009,33.982109,-86.567906,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,4,5,5,5,5,5,6,...,901,912,930,931,934,946,967,968,979,983,1004,1025,1034,1045,1036,1037,1049,1062,1065,1070,1071,1082,1099,1109,1114,1121,1128,1139,1487,1504,1527,1542,1551,1560,1573,1580,1594,1608,1611,1617,1618,1621,1629,1634,1642,1655,1656,1662,1665,1673,1681,1689,1704,1713,1722,1742,1750,1768,1783,1807,1827,1838,1848,1873,1893,1911,1925,1932,1942,1972,1988,2009,2039,2074,2095


In [3]:


csse_deaths.columns = csse_deaths.columns.str.lower()
csse_deaths = csse_deaths.drop(columns=['lat', 'long_', 'population', 'iso2', 'iso3', 'code3', 'fips', 'admin2', 'province_state', 'country_region', 'combined_key'])
csse_deaths = csse_deaths.rename(
  columns={
    'uid': 'fips'
  }
)
csse_deaths['fips'] = csse_deaths['fips'].apply(lambda x: x[-5:])
csse_deaths.head()

Unnamed: 0,fips,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20,3/24/20,3/25/20,3/26/20,3/27/20,3/28/20,3/29/20,3/30/20,3/31/20,4/1/20,4/2/20,4/3/20,4/4/20,...,8/19/20,8/20/20,8/21/20,8/22/20,8/23/20,8/24/20,8/25/20,8/26/20,8/27/20,8/28/20,8/29/20,8/30/20,8/31/20,9/1/20,9/2/20,9/3/20,9/4/20,9/5/20,9/6/20,9/7/20,9/8/20,9/9/20,9/10/20,9/11/20,9/12/20,9/13/20,9/14/20,9/15/20,9/16/20,9/17/20,9/18/20,9/19/20,9/20/20,9/21/20,9/22/20,9/23/20,9/24/20,9/25/20,9/26/20,9/27/20,9/28/20,9/29/20,9/30/20,10/1/20,10/2/20,10/3/20,10/4/20,10/5/20,10/6/20,10/7/20,10/8/20,10/9/20,10/10/20,10/11/20,10/12/20,10/13/20,10/14/20,10/15/20,10/16/20,10/17/20,10/18/20,10/19/20,10/20/20,10/21/20,10/22/20,10/23/20,10/24/20,10/25/20,10/26/20,10/27/20,10/28/20,10/29/20,10/30/20,10/31/20,11/1/20
0,1001,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,22,22,22,22,22,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,24,24,24,24,24,24,25,25,25,25,25,25,25,27,27,28,27,27,27,27,27,27,28,28,28,28,28,28,28,28,28,28,28,28,29,30,30,30,31,31,31,31,31,31,31,31,31
1,1003,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,...,30,31,32,32,32,32,32,33,34,35,36,36,38,38,38,40,42,42,42,42,42,42,42,43,43,43,43,43,47,48,48,49,49,49,49,49,50,50,50,50,50,50,52,53,53,53,53,53,55,56,64,64,65,65,65,65,66,66,67,67,67,67,69,69,69,69,69,69,69,69,69,69,71,71,71
2,1005,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9
3,1007,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,5,6,6,6,6,6,6,5,5,5,6,6,7,7,7,6,6,6,6,6,6,6,6,6,6,6,6,6,9,9,10,10,10,10,10,10,10,10,10,10,10,10,11,10,10,10,10,10,10,10,12,12,12,12,12,12,13,13,13,13,13,13,14,14,14,14,14,14,14,15,15,15,15,15,15
4,1009,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,5,5,6,6,6,6,6,7,7,9,9,10,11,11,11,11,12,12,12,12,12,12,12,12,13,13,13,13,13,13,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,23,23,23,23,23,23,24,25,25,25,25,25,25,25,25,25,25,25,25


In [110]:
info_df = info_df.merge(csse_cases[['fips', 'lat', 'lon']], on='fips', how='left')
info_df.tail()

Unnamed: 0,state_fips,state,county,fips,tot_pop,tot_male,tot_female,tot_pop_white_male,tot_pop_white_female,tot_pop_black_male,tot_pop_black_female,tot_pop_native_male,tot_pop_native_female,tot_pop_asian_male,tot_pop_asian_female,tot_pop_pacific_male,tot_pop_pacific_female,tot_pop_twoplus_male,tot_pop_twoplus_female,tot_pop_hispanic_male,tot_pop_hispanic_female,tot_pop_white,tot_pop_black,tot_pop_native,tot_pop_asian,tot_pop_pacific,tot_pop_twoplus,tot_pop_hispanic,age_pop,age_male,age_female,age_pop_white_male,age_pop_white_female,age_pop_black_male,age_pop_black_female,age_pop_native_male,age_pop_native_female,age_pop_asian_male,age_pop_asian_female,age_pop_pacific_male,age_pop_pacific_female,age_pop_twoplus_male,age_pop_twoplus_female,age_pop_hispanic_male,age_pop_hispanic_female,age_pop_white,age_pop_black,age_pop_native,age_pop_asian,age_pop_pacific,age_pop_twoplus,age_pop_hispanic,region,division,area,neighbors,tot_dem,tot_gop,tot_votes,per_gop,median_income,median_income_black,median_income_native,median_income_asian,median_income_pacific,median_income_other,median_income_twoplus,median_income_hispanic,median_income_white,tot_edu_white,tot_edu_white_male,tot_edu_white_female,tot_edu_white_hsplus,tot_edu_white_male_hsplus,tot_edu_white_female_hsplus,...,edu_black,per_edu_black_nohs,edu_black_male,per_edu_black_male_nohs,edu_black_female,per_edu_black_female_nohs,edu_native,per_edu_native_nohs,edu_native_male,per_edu_native_male_nohs,edu_native_female,per_edu_native_female_nohs,edu_asian,per_edu_asian_nohs,edu_asian_male,per_edu_asian_male_nohs,edu_asian_female,per_edu_asian_female_nohs,edu_pacific,per_edu_pacific_nohs,edu_pacific_male,per_edu_pacific_male_nohs,edu_pacific_female,per_edu_pacific_female_nohs,edu_other,per_edu_other_nohs,edu_other_male,per_edu_other_male_nohs,edu_other_female,per_edu_other_female_nohs,edu_twoplus,per_edu_twoplus_nohs,edu_twoplus_male,per_edu_twoplus_male_nohs,edu_twoplus_female,per_edu_twoplus_female_nohs,edu_hispanic,per_edu_hispanic_nohs,edu_hispanic_male,per_edu_hispanic_male_nohs,edu_hispanic_female,per_edu_hispanic_female_nohs,never,rarely,sometimes,frequently,always,mask,pop_density,per_male,per_female,per_pop_white_male,per_pop_white_female,per_pop_black_male,per_pop_black_female,per_pop_native_male,per_pop_native_female,per_pop_asian_male,per_pop_asian_female,per_pop_pacific_male,per_pop_pacific_female,per_pop_twoplus_male,per_pop_twoplus_female,per_pop_hispanic_male,per_pop_hispanic_female,per_pop_white,per_pop_black,per_pop_native,per_pop_asian,per_pop_pacific,per_pop_twoplus,per_pop_hispanic,per_votes,lat,lon
3137,56,Wyoming,Sweetwater,56037,42343,21808,20535,17223,16338,298,204,177,158,183,227,26,29,350,358,3551,3221,33561,502,335,410,55,708,6772,7.942493,7.886051,8.002435,8.187366,8.316012,7.614094,7.990196,7.960452,8.310127,6.431694,8.026432,7.038462,7.689655,6.022857,5.368715,6.708533,6.6914,8.249993,7.766932,8.125373,7.314634,7.381818,5.69209,6.700384,West,Mountain,27005.754244,"[56007, 56013, 56023, 56035, 56041]",3233.0,12153.0,16661.0,0.789874,73008.0,,62188.0,72614.0,138053.0,74189.0,55284.0,61921.0,76469.0,23640,12157,11483,22176,11404,10772,...,4.245714,0.005714,4.10084,0.0,4.553571,0.017857,1.651543,0.174229,1.724138,0.137931,1.551724,0.224138,3.966019,0.169903,4.583333,0.0,3.712329,0.239726,2.0,0.0,2.0,0.0,,,2.09816,0.134969,1.590698,0.204651,3.081081,0.0,3.342412,0.0,2.785124,0.0,3.838235,0.0,2.143614,0.275305,1.724343,0.311353,2.624929,0.233921,0.061,0.295,0.23,0.146,0.268,2.265,1.567925,0.515032,0.484968,0.40675,0.385849,0.007038,0.004818,0.00418,0.003731,0.004322,0.005361,0.000614,0.000685,0.008266,0.008455,0.083863,0.076069,0.792599,0.011856,0.007912,0.009683,0.001299,0.016721,0.159932,0.393477,41.659439,-108.882788
3138,56,Wyoming,Teton,56039,23464,12142,11322,9832,9168,87,58,33,41,135,243,11,7,160,135,1884,1670,19000,145,74,378,18,295,3554,8.657688,8.632103,8.685126,9.090419,9.172339,9.16092,8.896552,9.424242,8.878049,7.711111,7.452675,10.090909,9.0,6.05625,6.362963,6.478238,6.364072,9.129947,9.055172,9.121622,7.544974,9.666667,6.19661,6.424592,West,Mountain,10351.784301,"[56013, 56023, 56029, 56035]",7313.0,3920.0,12176.0,0.348972,83831.0,,,98125.0,,36433.0,17188.0,45361.0,95222.0,14776,7716,7060,14565,7565,7000,...,4.470588,0.235294,2.0,0.0,5.0,0.285714,2.0,0.0,2.0,0.0,2.0,0.0,4.839286,0.232143,7.0,0.0,3.441176,0.382353,0.0,1.0,0.0,1.0,,,2.201691,0.264493,1.311258,0.344371,2.712928,0.218631,6.903846,0.0,6.903846,0.0,,,1.986849,0.334145,1.184049,0.407975,2.717209,0.266977,0.095,0.157,0.16,0.247,0.34,2.578,2.266662,0.517474,0.482526,0.419025,0.390726,0.003708,0.002472,0.001406,0.001747,0.005753,0.010356,0.000469,0.000298,0.006819,0.005753,0.080293,0.071173,0.809751,0.00618,0.003154,0.01611,0.000767,0.012572,0.151466,0.518923,43.935225,-110.58908
3139,56,Wyoming,Uinta,56041,20226,10224,10002,8935,8722,64,62,69,75,36,56,11,13,182,130,927,944,17657,126,144,92,24,312,1871,8.025512,7.988165,8.063687,8.168103,8.313575,7.375,6.016129,8.014493,8.853333,7.0,8.607143,4.545455,7.076923,7.192308,5.892308,6.529666,6.106992,8.239961,6.706349,8.451389,7.978261,5.916667,6.650641,6.316408,West,Mountain,5391.631764,"[56023, 56037]",1202.0,6154.0,8053.0,0.836596,58235.0,,68125.0,,,55701.0,39205.0,39816.0,61330.0,11678,5807,5871,10942,5317,5625,...,4.7,0.15,7.0,0.0,1.25,0.375,2.435644,0.079208,2.8,0.145455,2.0,0.0,7.0,0.0,,,7.0,0.0,,,,,,,1.451777,0.274112,1.6875,0.15625,1.014493,0.492754,2.12,0.04,2.377778,0.005556,1.847059,0.076471,1.793727,0.214717,1.818605,0.160465,1.766917,0.273183,0.098,0.278,0.154,0.207,0.264,2.263,3.751369,0.505488,0.494512,0.441758,0.431227,0.003164,0.003065,0.003411,0.003708,0.00178,0.002769,0.000544,0.000643,0.008998,0.006427,0.045832,0.046673,0.872985,0.00623,0.00712,0.004549,0.001187,0.015426,0.092505,0.398151,41.287818,-110.547578
3140,56,Wyoming,Washakie,56043,7805,3963,3842,3266,3151,23,15,19,34,22,33,0,0,68,66,565,543,6417,38,53,55,0,134,1108,9.065343,8.91673,9.218636,9.261788,9.638527,9.913043,8.466667,9.0,9.764706,8.409091,9.484848,,,7.029412,7.318182,7.125664,6.983425,9.446782,9.342105,9.490566,9.054545,,7.171642,7.055957,West,Mountain,5798.138762,"[56003, 56013, 56017, 56019, 56025, 56029]",532.0,2911.0,3715.0,0.845484,53426.0,,,,,62054.0,36118.0,50035.0,54815.0,4848,2425,2423,4418,2096,2322,...,,,,,,,5.857143,0.0,5.857143,0.0,,,,,,,,,,,,,,,0.767442,0.616279,0.639175,0.680412,0.933333,0.533333,2.138095,0.038095,1.777778,0.111111,2.326087,0.0,1.985759,0.240506,1.716049,0.32716,2.269481,0.149351,0.204,0.155,0.069,0.285,0.287,2.296,1.346122,0.507751,0.492249,0.41845,0.403716,0.002947,0.001922,0.002434,0.004356,0.002819,0.004228,0.0,0.0,0.008712,0.008456,0.072389,0.069571,0.822165,0.004869,0.006791,0.007047,0.0,0.017168,0.14196,0.475977,43.904516,-107.680187
3141,56,Wyoming,Weston,56045,6927,3624,3303,3273,2963,30,15,62,45,30,83,1,0,73,67,155,130,6236,45,107,113,1,140,285,9.173668,9.06319,9.294883,9.175985,9.507256,8.433333,8.333333,8.145161,8.222222,11.666667,9.626506,9.0,,6.616438,5.791045,7.819355,6.530769,9.333387,8.4,8.17757,10.168142,9.0,6.221429,7.231579,West,Mountain,6210.804116,"[56005, 56009, 56011, 56027]",294.0,2898.0,3334.0,0.907895,52867.0,,,,,,,37870.0,55032.0,4689,2450,2239,4381,2286,2095,...,,,,,,,1.2,0.4,2.0,0.0,0.0,1.0,1.815029,0.323699,1.26087,0.369565,2.015748,0.307087,2.0,0.0,2.0,0.0,,,,,,,,,3.634615,0.0,2.0,0.0,4.125,0.0,1.673684,0.242105,1.6125,0.2875,2.0,0.0,0.142,0.129,0.148,0.207,0.374,2.542,1.115315,0.52317,0.47683,0.472499,0.427746,0.004331,0.002165,0.00895,0.006496,0.004331,0.011982,0.000144,0.0,0.010538,0.009672,0.022376,0.018767,0.900245,0.006496,0.015447,0.016313,0.000144,0.020211,0.041143,0.481305,43.839612,-104.567488


## save info_df

In [111]:
save_df(info_df, 'info_df', csv_=True)

## convert to long-form data

This will include some code up above but will provide basis for automatically grabbing CSSE data and updating our site.

In [13]:
fips = set(info_df['fips'])

def preprocess_csse(df, name):
  df.columns = df.columns.str.lower()
  columns_to_drop = ['lat', 'long_', 'iso2', 'iso3', 'code3', 'fips', 
                     'admin2', 'province_state', 'country_region', 
                     'combined_key']
  if 'population' in df.columns.tolist():
    columns_to_drop.append('population')
  df = df.drop(columns=columns_to_drop)
  df = df.rename(columns={'uid': 'fips'})
  df['fips'] = df['fips'].apply(lambda x: x[-5:])    
  # convert to long-form
  df = pd.DataFrame(df.set_index('fips').unstack()).rename(columns={0: name})
  return df


def make_csse_df():
  
  with urlopen('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv') as response:
    cases = pd.read_csv(response, dtype={'UID': str})
  with urlopen('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv') as response:
    deaths = pd.read_csv(response, dtype={'UID': str})
  
  cases = preprocess_csse(cases, 'cases')
  deaths = preprocess_csse(deaths, 'deaths')
  
  df = pd.concat([cases, deaths], axis=1)       # merge along multi-index
  
  df.index = df.index.set_levels(               # change date to pd.datetime
    [pd.to_datetime(df.index.levels[0]), 
     df.index.levels[1]]
  )
  
  df = df.reset_index().rename(columns={'level_0': 'date'})
  df = df[df['fips'].isin(fips)]
  
  df[['new_cases', 'new_deaths']] = df[['cases', 'deaths']] - df.groupby(by='fips')[['cases', 'deaths']].shift()
  df[['new_cases_c', 'new_deaths_c']] = df[['new_cases', 'new_deaths']].clip(lower=0)
  df = df.fillna(0)
  num_cols = df.select_dtypes(include='number').columns
  df[num_cols] = df[num_cols].astype(int)
  
  return df

In [14]:
csse_df = make_csse_df()

In [12]:
csse_df.select_dtypes(include='number').columns

Index(['cases', 'deaths', 'new_cases', 'new_deaths', 'new_cases_c',
       'new_deaths_c'],
      dtype='object')

In [15]:
csse_df.describe()

Unnamed: 0,cases,deaths,new_cases,new_deaths,new_cases_c,new_deaths_c
count,898612.0,898612.0,898612.0,898612.0,898612.0,898612.0
mean,960.684341,32.944827,10.15188,0.254911,10.199202,0.259708
std,5594.133569,235.884242,62.887945,3.408457,62.775972,3.403056
min,0.0,0.0,-2545.0,-54.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0
50%,40.0,0.0,0.0,0.0,0.0,0.0
75%,334.0,7.0,4.0,0.0,4.0,0.0
max,310595.0,7406.0,14129.0,1553.0,14129.0,1553.0


In [16]:
csse_df[csse_df['new_deaths'] == 1553]

Unnamed: 0,date,fips,cases,deaths,new_cases,new_deaths,new_cases_c,new_deaths_c
392703,2020-05-18,36047,52485,6663,187,1553,187,1553


In [17]:
csse_df[(csse_df['date'].map(lambda x: x.month) == 5) & (csse_df['fips'] == '36047')]

Unnamed: 0,date,fips,cases,deaths,new_cases,new_deaths,new_cases_c,new_deaths_c
335923,2020-05-01,36047,45519,4576,647,55,647,55
339263,2020-05-02,36047,46275,4622,756,46,756,46
342603,2020-05-03,36047,46839,4668,564,46,564,46
345943,2020-05-04,36047,47183,4710,344,42,344,42
349283,2020-05-05,36047,47579,4759,396,49,396,49
352623,2020-05-06,36047,47974,4797,395,38,395,38
355963,2020-05-07,36047,48550,4842,576,45,576,45
359303,2020-05-08,36047,48998,4885,448,43,448,43
362643,2020-05-09,36047,49461,4913,463,28,463,28
365983,2020-05-10,36047,49817,4939,356,26,356,26
