In [None]:
#default_exp OMB_counties

In [1]:
# The OMB urban concept has two categories: Metropolitan Areas and Micropolitan Areas, both created from counties
# according to criteria described here: 
# https://www.govinfo.gov/content/pkg/FR-2010-06-28/pdf/2010-15605.pdf.

# This notebook, after the file improvements made in the first notebook, defines and maps counties that are
# metropolitan, micropolitan, and neither ("rural").

In [2]:
import pandas as pd

In [3]:
year = 2017
data_file = 'data/df_%d.csv' % year
df = pd.read_csv(data_file,dtype=object)

In [4]:
df.dtypes

Company                         object
Address Line 1                  object
City                            object
State                           object
ZipCode                         object
County Code                     object
Primary SIC Code                object
SIC6_Descriptions               object
Primary NAICS Code              object
NAICS8 Descriptions             object
Employee Size (5) - Location    object
Sales Volume (9) - Location     object
Business Status Code            object
Industry Specific First Byte    object
Year Established                object
ABI                             object
Subsidiary Number               object
Parent Number                   object
Parent Actual Employee Size     object
Parent Actual Sales Volume      object
Census Tract                    object
Census Block                    object
Latitude                        object
Longitude                       object
CBSA Code                       object
CBSA Level               

In [5]:
len(df[df['CBSA Level'].isnull()])

0

In [6]:
len(df[df['CBSA Code']=='00000'])

883453

In [7]:
len(df[(df['CBSA Code']=='00000') & (df['CBSA Level']=='0')])

883453

In [8]:
# So CBSA Code of 00000 and CBSA Level are not independent criteria of rurality. CBSA Level == 0 will do.

In [9]:
words = {'0':'rural','1':'micropolitan','2':'metropolitan'}
def make_word(code):
    return words[code]

In [10]:
df['CBSA Level desc'] = df['CBSA Level'].apply(make_word)

In [11]:
def is_it_rural(code):
    if code == '0':
        return '1'
    else:
        return '0'

In [12]:
df['rural_OMB'] = df['CBSA Level'].apply(is_it_rural)

In [13]:
df['rural_OMB'].value_counts()

0    13849980
1      883457
Name: rural_OMB, dtype: int64

In [14]:
df['rural_OMB'].value_counts(normalize=True) * 100

0    94.003728
1     5.996272
Name: rural_OMB, dtype: float64

In [15]:
# Write a new file
outfile = 'data/df_%d_OMB.csv' % year
df.to_csv(outfile,index=None)