In [1]:
import pandas as pd

Data source: [RUCA Codes](https://www.ers.usda.gov/webdocs/DataFiles/53241/ruca2010revised.xlsx?v=3913.8)

Methods used: RUCA codes classify the census tracts within counties, so the average RUCA code for a county is calculated by taking the average of the RUCA codes of all the census tracts within that county. The RUCA codes are then mapped to the county FIPS codes.

errata: On July 3, 2019, the Rural-Urban Commuting Area (RUCA) Codes data product was revised to correct a programming error affecting the 2010 secondary RUCA codes. The revision corrects the secondary codes of 10,909 of 74,002 census tracts. Secondary RUCA codes may be used to classify census tracts into rural and urban categories. The revised secondary codes result in an increase in the number of census tracts classified as rural. The 2010 primary RUCA codes are not affected by this revision.

In [2]:
#rows begin on 2nd row
df = pd.read_excel('./ruca2010revised.xlsx', skiprows=1)

In [3]:
df.head()

Unnamed: 0,State-County FIPS Code,Select State,Select County,State-County-Tract FIPS Code (lookup by address at http://www.ffiec.gov/Geocode/),Primary RUCA Code 2010,"Secondary RUCA Code, 2010 (see errata)","Tract Population, 2010","Land Area (square miles), 2010","Population Density (per square mile), 2010"
0,1001,AL,Autauga County,1001020100,1,1.0,1912,3.787641,504.799727
1,1001,AL,Autauga County,1001020200,1,1.0,2170,1.289776,1682.46237
2,1001,AL,Autauga County,1001020300,1,1.0,3373,2.065366,1633.124331
3,1001,AL,Autauga County,1001020400,1,1.0,4386,2.464376,1779.760676
4,1001,AL,Autauga County,1001020500,1,1.0,10766,4.400686,2446.436531


In [19]:
cols_keep = [
    'Select State',
    'Select County',
    'Primary RUCA Code 2010'
]

df = df[cols_keep].copy()

In [20]:
df.head()

Unnamed: 0,Select State,Select County,Primary RUCA Code 2010
0,AL,Autauga County,1
1,AL,Autauga County,1
2,AL,Autauga County,1
3,AL,Autauga County,1
4,AL,Autauga County,1


Primary RUCA Codes, 2010
* 1    Metropolitan area core: primary flow within an urbanized area (UA)
* 2    Metropolitan area high commuting: primary flow 30% or more to a UA
* 3    Metropolitan area low commuting: primary flow 10% to 30% to a UA
* 4    Micropolitan area core: primary flow within an Urban Cluster of 10,000 to 49,999 (large UC)
* 5    Micropolitan high commuting: primary flow 30% or more to a large UC
* 6    Micropolitan low commuting: primary flow 10% to 30% to a large UC
* 7    Small town core: primary flow within an Urban Cluster of 2,500 to 9,999 (small UC)
* 8    Small town high commuting: primary flow 30% or more to a small UC
* 9    Small town low commuting: primary flow 10% to 30% to a small UC
* 10  Rural areas: primary flow to a tract outside a UA or UC
* 99  Not coded: Census tract has zero population and no rural-urban identifier information


In [27]:
#drop rows with 99 in Primary RUCA Code 2010
df = df[df['Primary RUCA Code 2010'] != 99].copy()

# groupby select state and select county, mean of primary ruca code 2010
df = df.groupby(['Select State', 'Select County'])['Primary RUCA Code 2010'].mean().reset_index()
df.head()

Unnamed: 0,Select State,Select County,Primary RUCA Code 2010
0,AK,Aleutians East Borough,10.0
1,AK,Aleutians West Census Area,10.0
2,AK,Anchorage Municipality,1.0
3,AK,Bethel Census Area,9.0
4,AK,Bristol Bay Borough,10.0


In [28]:
# round to nearest integer
df['Primary RUCA Code 2010'] = df['Primary RUCA Code 2010'].round(0).astype(int)
df.head()

Unnamed: 0,Select State,Select County,Primary RUCA Code 2010
0,AK,Aleutians East Borough,10
1,AK,Aleutians West Census Area,10
2,AK,Anchorage Municipality,1
3,AK,Bethel Census Area,9
4,AK,Bristol Bay Borough,10


In [29]:
df.describe()

Unnamed: 0,Primary RUCA Code 2010
count,3221.0
mean,5.162372
std,3.094078
min,1.0
25%,2.0
50%,5.0
75%,8.0
max,10.0


In [30]:
#export as csv
df.to_csv('./ruca_codes.csv', index=False)