In [18]:
import pandas as pd
import configparser
from census import Census

In [14]:
# Getting the API key
parser = configparser.ConfigParser()
parser.read('.env')
CENSUS_API_KEY = parser['GENERAL']['CENSUS_KEY']

In [20]:
CENSUS_COLUMNS = {'B19001_001E': 'hh_income_total_estimate', 'B19001_001M': 'hh_income__total_me',
                  'B19001_002E': 'hh_income_less_than_10000_estimate', 'B19001_002M': 'hh_income_less_than_10000_me',
                  'B19001_003E': 'hh_income_10000_14999_estimate', 'B19001_003M': 'hh_income_10000_14999_me',
                  'B19001_004E': 'hh_income_15000_19999_estimate', 'B19001_004M': 'hh_income_15000_19999_me',
                  'B19001_005E': 'hh_income_20000_24999_estimate', 'B19001_005M': 'hh_income_20000_24999_me',
                  'B19001_006E': 'hh_income_25000_29999_estimate', 'B19001_006M': 'hh_income_25000_29999_me',
                  'B19001_007E': 'hh_income_30000_34999_estimate', 'B19001_007M': 'hh_income_30000_34999_me',
                  'B19001_008E': 'hh_income_35000_39999_estimate', 'B19001_008M': 'hh_income_35000_39999_me',
                  'B19001_009E': 'hh_income_40000_44999_estimate', 'B19001_009M': 'hh_income_40000_44999_me',
                  'B19001_010E': 'hh_income_45000_49999_estimate', 'B19001_010M': 'hh_income_45000_49999_me',
                  'B19001_011E': 'hh_income_50000_59999_estimate', 'B19001_011M': 'hh_income_50000_59999_me'
                 }

### Importing 2019 ACS dataset

We'll use the Census API to get data for the B19001 group, which asks about household income for the last year.

In [25]:
census = Census(CENSUS_API_KEY)
acs_df = pd.DataFrame.from_records(census.acs5.get([col for col in CENSUS_COLUMNS.keys()], geo={'for': 'county:*'}))

In [43]:
acs_df.rename(columns=CENSUS_COLUMNS, inplace=True)
acs_df.rename(columns={'state': 'acs_state_fips', 'county': 'acs_county_fips'})

Unnamed: 0,acs_state_fips,acs_county_fips,hh_income_total_estimate,hh_income__total_me,hh_income_less_than_10000_estimate,hh_income_less_than_10000_me,hh_income_10000_14999_estimate,hh_income_10000_14999_me,hh_income_15000_19999_estimate,hh_income_15000_19999_me,...,hh_income_30000_34999_estimate,hh_income_30000_34999_me,hh_income_35000_39999_estimate,hh_income_35000_39999_me,hh_income_40000_44999_estimate,hh_income_40000_44999_me,hh_income_45000_49999_estimate,hh_income_45000_49999_me,hh_income_50000_59999_estimate,hh_income_50000_59999_me
0,17,051,7737.0,207.0,502.0,97.0,485.0,101.0,502.0,106.0,...,440.0,94.0,443.0,90.0,357.0,78.0,369.0,94.0,585.0,128.0
1,17,107,10797.0,230.0,580.0,145.0,435.0,107.0,473.0,107.0,...,558.0,114.0,621.0,125.0,409.0,99.0,499.0,122.0,1028.0,160.0
2,17,165,9972.0,288.0,939.0,213.0,955.0,168.0,618.0,134.0,...,634.0,149.0,444.0,106.0,512.0,146.0,519.0,132.0,666.0,145.0
3,17,097,246122.0,1089.0,9352.0,694.0,5974.0,520.0,6750.0,591.0,...,8387.0,587.0,8273.0,604.0,8869.0,601.0,6591.0,606.0,14329.0,756.0
4,17,127,5822.0,211.0,397.0,102.0,390.0,120.0,456.0,106.0,...,315.0,94.0,284.0,84.0,226.0,72.0,270.0,85.0,423.0,110.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3215,47,033,5491.0,164.0,411.0,106.0,378.0,109.0,282.0,74.0,...,322.0,84.0,344.0,81.0,280.0,81.0,167.0,60.0,439.0,92.0
3216,47,095,2243.0,134.0,356.0,86.0,216.0,79.0,157.0,60.0,...,100.0,50.0,88.0,49.0,114.0,49.0,54.0,41.0,171.0,73.0
3217,47,093,187319.0,1133.0,13252.0,723.0,8367.0,684.0,8930.0,600.0,...,9099.0,609.0,8556.0,592.0,8408.0,681.0,7300.0,575.0,14716.0,824.0
3218,53,005,72121.0,607.0,2999.0,442.0,2689.0,380.0,2626.0,310.0,...,2710.0,406.0,2878.0,387.0,2812.0,387.0,2532.0,369.0,6088.0,601.0


In [31]:
acs_df['hh_income_less_than_60000'] = acs_df.hh_income_less_than_10000_estimate + \
                                      acs_df.hh_income_10000_14999_estimate + \
                                      acs_df.hh_income_15000_19999_estimate + \
                                      acs_df.hh_income_20000_24999_estimate + \
                                      acs_df.hh_income_25000_29999_estimate + \
                                      acs_df.hh_income_30000_34999_estimate + \
                                      acs_df.hh_income_35000_39999_estimate + \
                                      acs_df.hh_income_40000_44999_estimate + \
                                      acs_df.hh_income_45000_49999_estimate + \
                                      acs_df.hh_income_50000_59999_estimate

In [32]:
acs_df['less_than_60000_per_capita'] = acs_df.hh_income_less_than_60000 / acs_df.hh_income_total_estimate

In [42]:
# this is a really ugly way of rearranging the columns in acs_df so that state and county are first
acs_df = acs_df[[item for sublist in [['state', 'county'], [c for c in CENSUS_COLUMNS.values()]] for item in sublist]].copy()

In [None]:
acs_df.to_csv("./data/acs_household_income_county.csv", index=True)