In [None]:
# Dependencies
import numpy as np
import pandas as pd
import requests
import json

from config import census_key

In [None]:
#Due to Census' API restraints this code is only valid for 2009-2018. Below list must remain in chronological order
years = [2009, 2010, 2011, 2012, 2013, 2014, 2015, 2017, 2016, 2017, 2018]

#Params to be passed into API call, DO NOT INCLUCE SPACES
census_variables = "B01003_001E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B02001_008E,B03001_003E" 

In [None]:
#This blank list will hold the yearly API results
output = []

for year in years: 
    
    #API base url is dependent on year
    if year == 2009: #2009 Detailed Tables API Call
        url = f'https://api.census.gov/data/{year}/acs5?key={census_key}&get={census_variables},NAME&for=state:*'      

    elif year == 2010: #2010 Detailed Tables API Call     
        url = f'https://api.census.gov/data/{year}/acs/acs5?key={census_key}&get={census_variables},NAME&for=state:*'
        
    else: #2011-2018 Detailed Tables API Call
        url = f'https://api.census.gov/data/{year}/acs/acs5?get=NAME,{census_variables}&for=state:*&key={census_key}'
        
        
    #Incorporating try/except to catch any year w/o data        
    try:
        print(f'Retrieving {year} data...')
        print(url)
        response = requests.get(url).json()
        output.append(response)
        print(f'{year} data successfully retrieved!')
        print('-------------------')
    
    except:
        print(f'Unable to retrieve {year} data...skipping')

In [None]:
#Converting yearly lists in dataframes and consolidating into one large df
for year in years:
    year_index = years.index(year)
    data = output[year_index][1:]
    headers = output[year_index][0]
    
    if year_index == 0:
        first_year = pd.DataFrame(data, columns = headers)
        first_year['year'] = year
        first_year = first_year[sorted(first_year.columns)]
        census_df = first_year
        
    else:
        headers = output[year_index][0] #setting headers again because column order is not consistent across year
        next_year = pd.DataFrame(data, columns = headers)
        next_year['year'] = year
        next_year = next_year[sorted(next_year.columns)]
        census_df = pd.concat([census_df, next_year], ignore_index=True)
        
census_df.head()

In [None]:
#rename variables
census_df = census_df.rename(columns={"B01003_001E" : "Total Population",
                                      "B02001_002E" : "White (pop)",
                                      "B02001_003E" : "Black (pop)",
                                      "B02001_004E" : "Native/Indigenous (pop)",
                                      "B02001_005E" : "Asian (pop)",
                                      "B02001_006E" : "Pacfic Islander (pop)",
                                      "B02001_008E" : "Mixed Race (pop)",
                                      "B03001_003E" : "Hispanic (pop)",
                                      'NAME':'State'})

#reorganize columns, dropping state code
census_df = census_df[['State','year', 'Poverty(Pop)', 'Income ($)']]

census_df.head()

In [None]:
census_df.to_csv('../Resources/sadias_variables.csv', index=False)