In [1]:
import numpy as np
import pandas as pd
#import os
#import warnings
#warnings.filterwarnings('ignore')
#import matplotlib.pyplot as plt

In [2]:
omb03 = pd.read_csv('omb-cbsa-csa_2003.csv')
bls13 = pd.read_csv('qcew-county-msa-csa-crosswalk.csv', encoding = 'ISO-8859-1')
cen20 = pd.read_csv('census_2020.csv')
met20 = pd.read_csv('METROS_2020.csv')
mic20 = pd.read_csv('MICROS_2020.csv')
bea20 = pd.read_csv('BEA_2020.csv')

In [3]:
# check for metro/micro county overlaps in Kurt's doc
if len(met20[met20['COUNTY_FIPS'].isin(mic20['COUNTY_FIPS'])]) == 0:
    kwm20 = pd.concat([met20, mic20])
else:
    print('error')
    
kwm20['type_kurt20'] = kwm20['MSA_NAME'].str.split('(').str[1].str[:5]
kwm20 = kwm20.rename(columns={'COUNTY_FIPS':'fips'})
kwm20['fips'] = kwm20['fips'].astype(str).str.zfill(5)

In [4]:
# check how similar kurt and BEA's typologies are
bea20['type_bea20'] = bea20['msa'].str.split('(').str[1].str[:5]
bea20['fips'] = bea20['fips'].astype(str).str.zfill(5)
temp = bea20.merge(kwm20, how='left', left_on='fips', right_on='fips')
temp[temp['type_bea20'] != temp['type_kurt20']]
# for some reason, Kurt's sheet is missing Brown, SD, a micropolitan county

Unnamed: 0,msa_code,msa,fips,county_name,type_bea20,MSA_CODE,MSA_NAME,COUNTY_NAME,type_kurt20
1160,10100,"Aberdeen, SD (Micropolitan Statistical Area)",46013,"Brown, SD",Micro,,,,


In [5]:
cen20 = cen20.dropna(subset='Metropolitan/Micropolitan Statistical Area')
cen20['FIPS State Code'] = cen20['FIPS State Code'].astype(int).astype(str).str.zfill(2)
cen20['FIPS County Code'] = cen20['FIPS County Code'].astype(int).astype(str).str.zfill(3)
cen20['fips'] = cen20['FIPS State Code'] + cen20['FIPS County Code']
cen20['type_census20'] = cen20['Metropolitan/Micropolitan Statistical Area'].str[:5]

In [6]:
bls13['County Code'] = bls13['County Code'].astype(str).str.zfill(5)
mask = bls13['County Code'].str[:1] != '7'

cols = ['County Code', 'County Title', 'MSA Type']
bls13 = bls13[mask]
bls13 = bls13[cols].rename(columns={'County Code':'fips', 'MSA Type':'type_bls13'})

In [7]:
omb03 = omb03.sort_values(by='FIPS').reset_index(drop=True)
omb03['FIPS'] = omb03['FIPS'].astype(str).str.zfill(5)
omb03['MSA Type_03'] = np.where(omb03['Status, 1=metro 2=micro'] == 1, 'Metro', 'Micro')
mask = omb03['FIPS'].str[:1] != '7'
cols2 = ['FIPS', 'Component Name','State', 'MSA Type_03']

omb03 = omb03[mask]
omb03 = omb03[cols2].rename(columns={'FIPS':'fips', 'MSA Type_03':'type_census03'})

In [8]:
df = bls13.merge(omb03, how='left', left_on='fips', right_on='fips')
df = df.merge(kwm20, how='left', left_on='fips', right_on='fips')
df = df.merge(cen20, how='left', left_on='fips', right_on='fips')

cols3 = ['fips', 'type_census03', 'type_bls13', 'type_census20', 'type_kurt20']
df[cols3]

Unnamed: 0,fips,type_census03,type_bls13,type_census20,type_kurt20
0,01001,Metro,Metro,Metro,Metro
1,01003,Micro,Metro,Metro,Metro
2,01005,,,Micro,Micro
3,01007,Metro,Metro,Metro,Metro
4,01009,Metro,Metro,Metro,Metro
...,...,...,...,...,...
3165,56037,Micro,Micro,Micro,Micro
3166,56039,Micro,Micro,Micro,Micro
3167,56041,Micro,Micro,Micro,Micro
3168,56043,,,,


In [13]:
# unmatched counties (aka did not appear in the 03 typology)
# besides nulls, only discrepancies are due to spelling
df[['County Title', 'Component Name'] + cols3][df['County Title'].str.split(' ').str[0] != df['Component Name'].str.split(' ').str[0]].dropna()

Unnamed: 0,County Title,Component Name,fips,type_census03,type_bls13,type_census20,type_kurt20
664,"LaSalle County, Illinois",La Salle County,17099,Micro,Micro,Micro,Micro
1824,"Doña Ana County, New Mexico",Dona Ana County,35013,Metro,Metro,Metro,Metro


In [14]:
# create state names column
df['State'] = df['County Title'].str.split(', ').str[-1]

We've got Metro Micro and null values. According to HHS: _The Census does not actually define "rural." "Rural" encompasses all population, housing, and territory not included within an urban area.  Whatever is not urban is considered rural._

In [15]:
# so we change null typologies to Rural
for col in cols3[1:]:
    df.loc[df[col].isna(), col] = 'Rural'

All of these counties are counted as Metro in our BLS data, but in classifyng them as urban are we taking away from rural job growth?

To rectify this properly, we would have to consider what led to this status change: was it population growth, migration patterns, political boundary shifting? With this info, we could better track rural county development.

In [None]:
# export
df[cols3 + ['State', 'County Title']].to_csv('typology.csv', index_label=False)
# open in BLS3

#### explore data

In [17]:
# Brown County, SD accounted for here--only missing in Kurt's
df[df['fips'] == '46013']

Unnamed: 0,fips,County Title,type_bls13,Component Name,State,type_census03,MSA_CODE,MSA_NAME,COUNTY_NAME,type_kurt20,...,CBSA Title,Metropolitan/Micropolitan Statistical Area,Metropolitan Division Title,CSA Title,County/County Equivalent,State Name,FIPS State Code,FIPS County Code,Central/Outlying County,type_census20
2389,46013,"Brown County, South Dakota",Micro,Brown County,South Dakota,Micro,,,,Rural,...,"Aberdeen, SD",Micropolitan Statistical Area,,,Brown County,South Dakota,46,13,Central,Micro


In [103]:
# create DF of all counties that have heterogenous classifications
df['align'] = df['type_census03'].str[0:2] + df['type_bls13'].str[0:2] + df['type_census20'].str[0:2] + df['type_kurt20'].str[0:2]
searchfor = ['RuRuRuRu', 'MiMiMiMi', 'MeMeMeMe']
het = df[~df['align'].str.contains('|'.join(searchfor))]
print('Total Counties:', len(df))
print('Mixed Classifications:', len(het))

Total Counties: 3170
Mixed Classifications: 351


In [102]:
# how many classifications changed over time?
print('2003-13:', len(het[cols3][het['type_census03'] != het['type_bls13']]))
print('2013-20:', len(het[cols3][het['type_bls13'] != het['type_census20']]))

2003-13: 234
2013-20: 140


In [98]:
# how many differ bc of errors within same year data?
len(het[het['type_census20'] != het['type_kurt20']])

45

Classification scheme selection is hugely important and needs to be addressed. Won't change the analysis much but will alter the exact numbers.