In [1]:
# Dependencies and Setup
import pandas as pd
import os

In [2]:
# link to FCC data & create the first dataframe
usda_data = '../resources/usda/usda_rural_urban_codes_2013.csv'
usda_df = pd.read_csv(usda_data)
usda_df.head()

Unnamed: 0,FIPS,State,County_Name,Population_2010,RUCC_2013,Description
0,1001.0,AL,Autauga County,54571,2.0,"Metro - Counties in metro areas of 250,000 to ..."
1,1003.0,AL,Baldwin County,182265,3.0,Metro - Counties in metro areas of fewer than ...
2,1005.0,AL,Barbour County,27457,6.0,"Nonmetro - Urban population of 2,500 to 19,999..."
3,1007.0,AL,Bibb County,22915,1.0,Metro - Counties in metro areas of 1 million p...
4,1009.0,AL,Blount County,57322,1.0,Metro - Counties in metro areas of 1 million p...


In [3]:
# determine state we should filter the data for
#user_input_state = input('Enter State Abbreviation (use two letter abbreviation):')
user_input_state = 'CA'

In [4]:
# filter for selected state and drop un needed columns
usda_df_one = usda_df.loc[usda_df["State"] == user_input_state, ]
usda_df_two = usda_df_one.drop(columns = ["State", "County_Name"])
usda_df_two.head()

Unnamed: 0,FIPS,Population_2010,RUCC_2013,Description
186,6001.0,1510271,1.0,Metro - Counties in metro areas of 1 million p...
187,6003.0,1175,8.0,"Nonmetro - Completely rural or less than 2,500..."
188,6005.0,38091,6.0,"Nonmetro - Urban population of 2,500 to 19,999..."
189,6007.0,220000,3.0,Metro - Counties in metro areas of fewer than ...
190,6009.0,45578,6.0,"Nonmetro - Urban population of 2,500 to 19,999..."


In [5]:
#check data types
usda_df_two.dtypes

FIPS               float64
Population_2010     object
RUCC_2013          float64
Description         object
dtype: object

In [6]:
# Change ID to a string
usda_df_two["FIPS"] = usda_df_two["FIPS"].astype("int")
usda_df_two["FIPS"] = usda_df_two["FIPS"].astype("str")
usda_df_two["RUCC_2013"] = usda_df_two["RUCC_2013"].astype("int")
usda_df_two.dtypes

FIPS               object
Population_2010    object
RUCC_2013           int64
Description        object
dtype: object

In [7]:
usda_df_two.head()

Unnamed: 0,FIPS,Population_2010,RUCC_2013,Description
186,6001,1510271,1,Metro - Counties in metro areas of 1 million p...
187,6003,1175,8,"Nonmetro - Completely rural or less than 2,500..."
188,6005,38091,6,"Nonmetro - Urban population of 2,500 to 19,999..."
189,6007,220000,3,Metro - Counties in metro areas of fewer than ...
190,6009,45578,6,"Nonmetro - Urban population of 2,500 to 19,999..."


In [8]:
# Add full geo_id to join w/ census tables using sql
usda_df_two["FIPS"] = "0500000US0" + usda_df_two["FIPS"]
usda_df_two.head()

Unnamed: 0,FIPS,Population_2010,RUCC_2013,Description
186,0500000US06001,1510271,1,Metro - Counties in metro areas of 1 million p...
187,0500000US06003,1175,8,"Nonmetro - Completely rural or less than 2,500..."
188,0500000US06005,38091,6,"Nonmetro - Urban population of 2,500 to 19,999..."
189,0500000US06007,220000,3,Metro - Counties in metro areas of fewer than ...
190,0500000US06009,45578,6,"Nonmetro - Urban population of 2,500 to 19,999..."


In [9]:
# Rename Columns
usda_df_two.rename(columns={'FIPS': 'geo_id', 'Population_2010': 'total_pop_2010', 'RUCC_2013': 'usda_id', 'Description': 'description'}, inplace=True)
usda_df_two.head()

Unnamed: 0,geo_id,total_pop_2010,usda_id,description
186,0500000US06001,1510271,1,Metro - Counties in metro areas of 1 million p...
187,0500000US06003,1175,8,"Nonmetro - Completely rural or less than 2,500..."
188,0500000US06005,38091,6,"Nonmetro - Urban population of 2,500 to 19,999..."
189,0500000US06007,220000,3,Metro - Counties in metro areas of fewer than ...
190,0500000US06009,45578,6,"Nonmetro - Urban population of 2,500 to 19,999..."


In [10]:
# Export dfs as CSV files
usda_df_two.to_csv('../resources/usda/usda_rural_urban_density.csv', index=False)