In [1]:
import numpy as np
import pandas as pd

In [2]:
# load csv of all US fips
fips = pd.read_csv('https://raw.githubusercontent.com/kjhealy/us-county/master/data/census/fips-by-state.csv',
                   encoding= 'unicode_escape')

# load csv of US regions
url = 'https://raw.githubusercontent.com/cphalpert/census-regions/master/us%20census%20bureau%20regions%20and%20divisions.csv'
r = pd.read_csv(url)

In [3]:
# assign each fips a region and division
rf = fips.merge(r, how='left', left_on='state', right_on='State Code')

# drop extra col
rf = rf.drop('state', axis=1)

# fix fips
rf['fips'] = rf['fips'].astype(str).str.zfill(5)
rf.sample(5)

Unnamed: 0,fips,name,State,State Code,Region,Division
1259,26057,Gratiot County,Michigan,MI,Midwest,East North Central
2185,40109,Oklahoma County,Oklahoma,OK,South,West South Central
501,13233,Polk County,Georgia,GA,South,South Atlantic
2625,48205,Hartley County,Texas,TX,South,West South Central
2796,49039,Sanpete County,Utah,UT,West,Mountain


In [4]:
# load Metro/Micro delineations file

#omb03 = pd.read_csv('omb-cbsa-csa_2003.csv')
#bls13 = pd.read_csv('qcew-county-msa-csa-crosswalk.csv', encoding = 'ISO-8859-1') # BLS13 is the "master" sheet, with null values for non Metro/Micro counties
#cen20 = pd.read_csv('census_2020.csv')
#met20 = pd.read_csv('METROS_2020.csv')
#mic20 = pd.read_csv('MICROS_2020.csv')

bea20 = pd.read_csv('typology/BEA_2020.csv')

In [5]:
# create normal fips
bea20['fips'] = bea20['fips'].astype(str).str.zfill(5)

# process BEA delineations
bea20['type_bea20'] = bea20['msa'].str.split('(').str[1].str[:5]

# strip extraneous text from MSA name
bea20['msa'] = bea20['msa'].str.split('(').str[0]

# drop extra col
bea20 = bea20.drop('msa_code', axis=1)

In [6]:
# merge master fips/regions/divisions + Metro/Micro
df = rf.merge(bea20, how='outer', on='fips')
df = df.drop('county_name', axis=1)
df

Unnamed: 0,fips,name,State,State Code,Region,Division,msa,type_bea20
0,01001,Autauga County,Alabama,AL,South,East South Central,"Montgomery, AL",Metro
1,01003,Baldwin County,Alabama,AL,South,East South Central,"Daphne-Fairhope-Foley, AL",Metro
2,01005,Barbour County,Alabama,AL,South,East South Central,"Eufaula, AL-GA",Micro
3,01007,Bibb County,Alabama,AL,South,East South Central,"Birmingham-Hoover, AL",Metro
4,01009,Blount County,Alabama,AL,South,East South Central,"Birmingham-Hoover, AL",Metro
...,...,...,...,...,...,...,...,...
3159,51921,,,,,,"Winchester, VA-WV",Metro
3160,51955,,,,,,"Big Stone Gap, VA",Micro
3161,51939,,,,,,"Danville, VA",Micro
3162,51929,,,,,,"Martinsville, VA",Micro


In [7]:
# all unmatched counties are special BEA combo areas
df[df['fips'].notna() & df['name'].isna()]

Unnamed: 0,fips,name,State,State Code,Region,Division,msa,type_bea20
3143,51933,,,,,,"Blacksburg-Christiansburg, VA",Metro
3144,51901,,,,,,"Charlottesville, VA",Metro
3145,51947,,,,,,"Harrisonburg, VA",Metro
3146,15901,,,,,,"Kahului-Wailuku-Lahaina, HI",Metro
3147,51953,,,,,,"Kingsport-Bristol, TN-VA",Metro
3148,51911,,,,,,"Lynchburg, VA",Metro
3149,51918,,,,,,"Richmond, VA",Metro
3150,51941,,,,,,"Richmond, VA",Metro
3151,51944,,,,,,"Roanoke, VA",Metro
3152,51907,,,,,,"Staunton, VA",Metro


In [8]:
# count which states are most affected
df[df['fips'].notna() & df['name'].isna()]['msa'].str[-3:].value_counts()

VA     12
WV      4
NC      3
HI      1
WI      1
Name: msa, dtype: int64

In [9]:
# drop these in accordance w ECH methodlogy
df = df.drop(df[df['fips'].notna() & df['name'].isna()].index)
df.shape

(3143, 8)

#### Rurals
According to HHS: _The Census does not actually define "rural." "Rural" encompasses all population, housing, and territory not included within an urban area.  Whatever is not urban is considered rural._

Therefore, we change _null_ typologies to _Rural:_

In [10]:
df['type_bea20'] = df['type_bea20'].fillna('Rural')
df['type_bea20'].value_counts()

Rural    1345
Metro    1143
Micro     655
Name: type_bea20, dtype: int64

In [11]:
# export df
df.to_csv('typology/region_delineation.csv', index_label=False)

## OR

In [12]:
# add other regional classification
reg = pd.read_csv('typology/regions.csv')

# merge alternative region to df
data = df.merge(reg, how='left', left_on='State', right_on='STATE')

# assign DC an alt region
data.loc[data[data.State == 'District of Columbia'].index, 'REGION'] = 'Mid-Atlantic'

# clean cols
data = data.rename(columns={'REGION' : 'Region_alt'})
cols = data.columns[:6].tolist() + ['Region_alt'] + data.columns[6:8].tolist()

# export
data[cols].to_csv('typology/typology.csv', index_label=False)

In [13]:
data

Unnamed: 0,fips,name,State,State Code,Region,Division,msa,type_bea20,STATE,Region_alt
0,01001,Autauga County,Alabama,AL,South,East South Central,"Montgomery, AL",Metro,Alabama,South
1,01003,Baldwin County,Alabama,AL,South,East South Central,"Daphne-Fairhope-Foley, AL",Metro,Alabama,South
2,01005,Barbour County,Alabama,AL,South,East South Central,"Eufaula, AL-GA",Micro,Alabama,South
3,01007,Bibb County,Alabama,AL,South,East South Central,"Birmingham-Hoover, AL",Metro,Alabama,South
4,01009,Blount County,Alabama,AL,South,East South Central,"Birmingham-Hoover, AL",Metro,Alabama,South
...,...,...,...,...,...,...,...,...,...,...
3138,56037,Sweetwater County,Wyoming,WY,West,Mountain,"Rock Springs, WY",Micro,Wyoming,Interior Northwest
3139,56039,Teton County,Wyoming,WY,West,Mountain,"Jackson, WY-ID",Micro,Wyoming,Interior Northwest
3140,56041,Uinta County,Wyoming,WY,West,Mountain,"Evanston, WY",Micro,Wyoming,Interior Northwest
3141,56043,Washakie County,Wyoming,WY,West,Mountain,,Rural,Wyoming,Interior Northwest
