In [11]:
from census import Census
from config_file import (census_key)
import pandas as pd
import us
import requests

# Census API Key
c = Census(census_key, year=2018)

# US states abbrv mapping
states = us.states.mapping('abbr', 'name')
states = {state: abbrev for abbrev, state in states.items()}

In [8]:
# Census search for poverty data based on state, county, fips data
census_data_fips = c.acs5.get(("NAME", "B01003_001E", "B17001_002E"), {'for': 'state:*', 'for':'county:*'})

# Convert to DataFrame
census_fips_pd = pd.DataFrame(census_data_fips)

# Column Reordering
census_fips_pd = census_fips_pd.rename(columns={"B01003_001E": "Population",
                                      "B17001_002E": "Poverty Count",
                                      "NAME": "Name",
                                      "state": "State ID",
                                      "county": "County ID"})

# Drop any rows with NAN
census_fips_pd.dropna(how='any', inplace=True)

# Add in Poverty Rate (Poverty Count / Population)
census_fips_pd["Poverty Rate"] = 100 * (census_fips_pd["Poverty Count"].astype(
        int) / census_fips_pd["Population"].astype(int))

# Final DataFrame
census_fips_pd = census_fips_pd[["Name", "State ID", "County ID", "Population", "Poverty Rate"]]

census_fips_pd

Unnamed: 0,Name,State ID,County ID,Population,Poverty Rate
0,"Washington County, Mississippi",28,151,47086.0,32.909994
1,"Perry County, Mississippi",28,111,12028.0,18.548387
2,"Choctaw County, Mississippi",28,019,8321.0,22.689581
3,"Itawamba County, Mississippi",28,057,23480.0,14.599659
4,"Carroll County, Mississippi",28,015,10129.0,14.581894
...,...,...,...,...,...
3215,"Clayton County, Iowa",19,043,17672.0,8.850158
3216,"Buena Vista County, Iowa",19,021,20260.0,12.374136
3217,"Guthrie County, Iowa",19,077,10674.0,11.008057
3218,"Humboldt County, Iowa",19,091,9566.0,11.530420


In [14]:
# Data Cleanup
def get_county(address):
    county = address.split("County")[0].strip(" ")
    county = strip_state(county)
    return county

def strip_state(address):
    return address.split(",")[0].strip(" ")

def get_state(address):
    state = address.split(",")[1]
    # Special case for District of Columbia
    state = state[1:]
    return state

census_fips_pd['County'] = census_fips_pd['Name'].apply(lambda x: f"{get_county(x)}")
census_fips_pd['State Name'] = census_fips_pd['Name'].apply(lambda x: f"{get_state(x)}")

#Drop PR
indicesPR = census_fips_pd.index[census_fips_pd['State ID'] == '72' ].tolist()
census_fips_pd.drop(indicesPR, inplace=True)

# Get state abbreviations
census_fips_pd['State'] = census_fips_pd['State Name'].map(states)

census_clean_pd = census_fips_pd.drop(['Name', 'State Name'], inplace=False, axis = 1)

census_clean_pd = census_clean_pd[["State", "State ID", "County", "County ID", "Population", "Poverty Rate"]]

census_clean_pd['FIPS'] = census_clean_pd['State ID'] + census_clean_pd['County ID']

census_clean_pd = census_clean_pd[["State", "County", "FIPS", "Population", "Poverty Rate"]]

census_clean_pd

Unnamed: 0,State,County,FIPS,Population,Poverty Rate
0,MS,Washington,28151,47086.0,32.909994
1,MS,Perry,28111,12028.0,18.548387
2,MS,Choctaw,28019,8321.0,22.689581
3,MS,Itawamba,28057,23480.0,14.599659
4,MS,Carroll,28015,10129.0,14.581894
...,...,...,...,...,...
3215,IA,Clayton,19043,17672.0,8.850158
3216,IA,Buena Vista,19021,20260.0,12.374136
3217,IA,Guthrie,19077,10674.0,11.008057
3218,IA,Humboldt,19091,9566.0,11.530420
