In [1]:
# Dependencies
import numpy as np
import pandas as pd
import requests
import json

from config import census_key

In [2]:
#Due to Census' API restraints this code is only valid for 2009-2018. Below list must remain in chronological order
years = [2009, 2010, 2011, 2012, 2013, 2014, 2015, 2017, 2016, 2017, 2018]

#Params to be passed into API call
census_variables = "B17001H_020E,B02001_001E" #DO NOT INCLUCE SPACES

In [3]:
#This blank list will hold the yearly API results
output = []

for year in years: 
    
    #API base url is dependent on year
    if year == 2009: #2009 Detailed Tables API Call
        url = f'https://api.census.gov/data/{year}/acs5?key={census_key}&get={census_variables},NAME&for=state:*'      

    elif year == 2010: #2010 Detailed Tables API Call     
        url = f'https://api.census.gov/data/{year}/acs/acs5?key={census_key}&get={census_variables},NAME&for=state:*'
        
    else: #2011-2018 Detailed Tables API Call
        url = f'https://api.census.gov/data/{year}/acs/acs5?get=NAME,{census_variables}&for=state:*&key={census_key}'
        
        
    #Incorporating try/except to catch any year w/o data        
    try:
        print(f'Retrieving {year} data...')
        print(url)
        response = requests.get(url).json()
        output.append(response)
        print(f'{year} data successfully retrieved!')
        print('-------------------')
    
    except:
        print(f'Unable to retrieve {year} data...skipping')

Retrieving 2009 data...
https://api.census.gov/data/2009/acs5?key=56fd45ad99936050c5b62b7ea0591503e40b8364&get=B17001H_020E,B02001_001E,NAME&for=state:*
2009 data successfully retrieved!
-------------------
Retrieving 2010 data...
https://api.census.gov/data/2010/acs/acs5?key=56fd45ad99936050c5b62b7ea0591503e40b8364&get=B17001H_020E,B02001_001E,NAME&for=state:*
2010 data successfully retrieved!
-------------------
Retrieving 2011 data...
https://api.census.gov/data/2011/acs/acs5?get=NAME,B17001H_020E,B02001_001E&for=state:*&key=56fd45ad99936050c5b62b7ea0591503e40b8364
2011 data successfully retrieved!
-------------------
Retrieving 2012 data...
https://api.census.gov/data/2012/acs/acs5?get=NAME,B17001H_020E,B02001_001E&for=state:*&key=56fd45ad99936050c5b62b7ea0591503e40b8364
2012 data successfully retrieved!
-------------------
Retrieving 2013 data...
https://api.census.gov/data/2013/acs/acs5?get=NAME,B17001H_020E,B02001_001E&for=state:*&key=56fd45ad99936050c5b62b7ea0591503e40b8364
201

In [4]:
#Converting yearly lists in dataframes and consolidating into one large df
for year in years:
    year_index = years.index(year)
    data = output[year_index][1:]
    headers = output[year_index][0]
    
    if year_index == 0:
        first_year = pd.DataFrame(data, columns = headers)
        first_year['year'] = year
        first_year = first_year[sorted(first_year.columns)]
        census_df = first_year
        
    else:
        headers = output[year_index][0] #setting headers again because column order is not consistent across year
        next_year = pd.DataFrame(data, columns = headers)
        next_year['year'] = year
        next_year = next_year[sorted(next_year.columns)]
        census_df = pd.concat([census_df, next_year], ignore_index=True)
        
census_df.head()

Unnamed: 0,B02001_001E,B17001H_020E,NAME,state,year
0,683142,1137,Alaska,2,2009
1,4633360,14235,Alabama,1,2009
2,2838143,13179,Arkansas,5,2009
3,6324865,10486,Arizona,4,2009
4,36308527,34197,California,6,2009


In [5]:
#rename variables
census_df = census_df.rename(columns={'B02001_001E': 'Poverty(Pop)',
                                      'B17001H_020E':'Income ($)',
                                      'NAME':'State'})

#reorganize columns, dropping state code
census_df = census_df[['State','year', 'Poverty(Pop)', 'Income ($)']]

census_df.head()

Unnamed: 0,State,year,Poverty(Pop),Income ($)
0,Alaska,2009,683142,1137
1,Alabama,2009,4633360,14235
2,Arkansas,2009,2838143,13179
3,Arizona,2009,6324865,10486
4,California,2009,36308527,34197


In [6]:
census_df.to_csv('../Resources/inc_data.csv', index=False)