In [1]:
import pandas as pd
import configparser
from census import Census

In [2]:
# Getting the API key
parser = configparser.ConfigParser()
parser.read('.env')
CENSUS_API_KEY = parser['GENERAL']['CENSUS_KEY']

In [3]:
CENSUS_COLUMNS = {'B19001_001E': 'hh_income_total_estimate', 'B19001_001M': 'hh_income__total_me',
                  'B19001_002E': 'hh_income_less_than_10000_estimate', 'B19001_002M': 'hh_income_less_than_10000_me',
                  'B19001_003E': 'hh_income_10000_14999_estimate', 'B19001_003M': 'hh_income_10000_14999_me',
                  'B19001_004E': 'hh_income_15000_19999_estimate', 'B19001_004M': 'hh_income_15000_19999_me',
                  'B19001_005E': 'hh_income_20000_24999_estimate', 'B19001_005M': 'hh_income_20000_24999_me',
                  'B19001_006E': 'hh_income_25000_29999_estimate', 'B19001_006M': 'hh_income_25000_29999_me',
                  'B19001_007E': 'hh_income_30000_34999_estimate', 'B19001_007M': 'hh_income_30000_34999_me',
                  'B19001_008E': 'hh_income_35000_39999_estimate', 'B19001_008M': 'hh_income_35000_39999_me',
                  'B19001_009E': 'hh_income_40000_44999_estimate', 'B19001_009M': 'hh_income_40000_44999_me',
                  'B19001_010E': 'hh_income_45000_49999_estimate', 'B19001_010M': 'hh_income_45000_49999_me',
                  'B19001_011E': 'hh_income_50000_59999_estimate', 'B19001_011M': 'hh_income_50000_59999_me'
                 }

### Importing 2019 ACS dataset

We'll use the Census API to get data for the B19001 group, which asks about household income for the last year.

In [4]:
census = Census(CENSUS_API_KEY)
acs_df = pd.DataFrame.from_records(census.acs5.get([col for col in CENSUS_COLUMNS.keys()], geo={'for': 'county:*'}))

In [5]:
acs_df.rename(columns=CENSUS_COLUMNS, inplace=True)
acs_df.rename(columns={'state': 'acs_state_fips', 'county': 'acs_county_fips'}, inplace=True)

In [6]:
acs_df['hh_income_less_than_60000'] = acs_df.hh_income_less_than_10000_estimate + \
                                      acs_df.hh_income_10000_14999_estimate + \
                                      acs_df.hh_income_15000_19999_estimate + \
                                      acs_df.hh_income_20000_24999_estimate + \
                                      acs_df.hh_income_25000_29999_estimate + \
                                      acs_df.hh_income_30000_34999_estimate + \
                                      acs_df.hh_income_35000_39999_estimate + \
                                      acs_df.hh_income_40000_44999_estimate + \
                                      acs_df.hh_income_45000_49999_estimate + \
                                      acs_df.hh_income_50000_59999_estimate

In [7]:
acs_df['less_than_60000_per_capita'] = acs_df.hh_income_less_than_60000 / acs_df.hh_income_total_estimate

In [8]:
# this is a really ugly way of rearranging the columns in acs_df so that state and county are first
acs_df = acs_df[[item for sublist in [['acs_state_fips', 'acs_county_fips'], [c for c in CENSUS_COLUMNS.values()]] for item in sublist]].copy()

In [9]:
acs_df.to_csv("./data/acs_household_income_county.csv", index=True)