In [1]:
#import necessary stuff
import pandas as pd
import requests
import os
import numpy as np

## Predefined Variables

In [2]:
#predefined variables

years = range(2011, 2022)
geo_hierarchy = 'county'

one_year_estimate_table_name = 'S1901'
one_year_supplement_estimate_table_name = 'K201901'
five_year_estimate_table_name = 'S1901'

## Useful Methods

In [3]:
#make a request to the census api
def make_request(url):
    response = requests.get(url)
    return response.json()

In [4]:
#make a data frame from the response
def get_df_from(url):
    data = make_request(url)
    df = pd.DataFrame(data[1:], columns=data[0])
    return df

## One Year Estimate Tables

In [5]:
if not os.path.exists('data/one-year-estimates'):
    os.makedirs('data/one-year-estimates')

for year in years:
    url = f'https://api.census.gov/data/{year}/acs/acs1/subject?get=group({one_year_estimate_table_name})&for={geo_hierarchy}:*'
    try:
        one_year_estimate_table = get_df_from(url)
    except Exception as e:
        print(e)
        continue

    critical_columns = ['NAME', 'GEO_ID', 'S1901_C01_002E', 'S1901_C01_003E', 'S1901_C01_004E', 'S1901_C01_005E', 'S1901_C01_006E', 'S1901_C01_007E', 'S1901_C01_008E', 'S1901_C01_009E', 'S1901_C01_010E', 'S1901_C01_011E']
    corresponding_labels = ['Name', 'Geo_id', '0 - 9999', '10000 - 14999', '15000 - 24999', '25000 - 34999', '35000 - 49999', '50000 - 74999', '75000 - 99999', '100000 - 149999', '150000 - 199999', '200000 or more']
    total_households = one_year_estimate_table['S1901_C01_001E']

    total_households = total_households.to_numpy().astype(float)

    one_year_estimate_table = one_year_estimate_table[critical_columns]
    one_year_estimate_table.columns = corresponding_labels

    one_year_estimate_table.iloc[:, 2:] = one_year_estimate_table.iloc[:, 2:].astype(float)
    one_year_estimate_table.iloc[:, 2:] = one_year_estimate_table.iloc[:, 2:] / 100 * total_households[:, None]

    one_year_estimate_table.to_csv(f'data/one-year-estimates/{year}.csv', index=False)

    print(f'saved, {year}!')


saved, 2011!
saved, 2012!
saved, 2013!
saved, 2014!
saved, 2015!
saved, 2016!
saved, 2017!
saved, 2018!
saved, 2019!
Expecting value: line 1 column 1 (char 0)
saved, 2021!


## One Year Supplement Estimate Tables

In [8]:
if not os.path.exists('data/one-year-supplement-estimates'):
    os.makedirs('data/one-year-supplement-estimates')

for year in years:
    url = f'https://api.census.gov/data/{year}/acs/acsse?get=group({one_year_supplement_estimate_table_name})&for={geo_hierarchy}:*'
    try:
        one_year_supplement_estimate_table = get_df_from(url)
    except Exception as e:
        print(e)
        continue

    # adjust columns 
    critical_columns = ['NAME', 'GEO_ID', 'K201901_002E', 'K201901_003E', 'K201901_004E', 'K201901_005E', 'K201901_006E', 'K201901_007E', 'K201901_008E']
    corresponding_labels = ['Name', 'Geo_id', '0 - 19999', '20000 - 39999', '40000 - 59999', '60000 - 99999', '100000 - 149999', '150000 - 199999', '200000 or more']

    one_year_supplement_estimate_table = one_year_supplement_estimate_table[critical_columns]
    one_year_supplement_estimate_table.columns = corresponding_labels
    one_year_supplement_estimate_table = one_year_supplement_estimate_table.dropna()
    # one_year_supplement_estimate_table.to_csv('data/temp.csv', index=False)
    one_year_supplement_estimate_table.iloc[:, 2:] = one_year_supplement_estimate_table.iloc[:, 2:].astype(int)

    one_year_supplement_estimate_table.to_csv(f'data/one-year-supplement-estimates/{year}.csv', index=False)

    print(f'saved, {year}!')



Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
saved, 2014!
saved, 2015!
saved, 2016!
saved, 2017!
saved, 2018!
saved, 2019!
Expecting value: line 1 column 1 (char 0)
saved, 2021!


## Five Year Estimate Tables

In [10]:
if not os.path.exists('data/five-year-estimates'):
    os.makedirs('data/five-year-estimates')

for year in years:
    url = f'https://api.census.gov/data/{year}/acs/acs5/subject?get=group({five_year_estimate_table_name})&for={geo_hierarchy}:*'
    try:
        five_year_estimate_table = get_df_from(url)
    except Exception as e:
        print(e)
        continue
    
    # adjust columns 
    critical_columns = ['NAME', 'GEO_ID', 'S1901_C01_002E', 'S1901_C01_003E', 'S1901_C01_004E', 'S1901_C01_005E', 'S1901_C01_006E', 'S1901_C01_007E', 'S1901_C01_008E', 'S1901_C01_009E', 'S1901_C01_010E', 'S1901_C01_011E']
    corresponding_labels = ['Name', 'Geo_id', '0 - 9999', '10000 - 14999', '15000 - 24999', '25000 - 34999', '35000 - 49999', '50000 - 74999', '75000 - 99999', '100000 - 149999', '150000 - 199999', '200000 or more']
    total_households = five_year_estimate_table['S1901_C01_001E']

    total_households = total_households.to_numpy().astype(float)

    five_year_estimate_table = five_year_estimate_table[critical_columns]
    five_year_estimate_table.columns = corresponding_labels

    five_year_estimate_table.iloc[:, 2:] = five_year_estimate_table.iloc[:, 2:].astype(float)
    five_year_estimate_table.iloc[:, 2:] = five_year_estimate_table.iloc[:, 2:] / 100 * total_households[:, None]

    five_year_estimate_table.to_csv(f'data/five-year-estimates/{year}.csv', index=False)    

    print(f'saved, {year}!')

saved, 2011!
saved, 2012!
saved, 2013!
saved, 2014!
saved, 2015!
saved, 2016!
saved, 2017!
saved, 2018!
saved, 2019!
saved, 2020!
Expecting value: line 1 column 1 (char 0)
