In [1]:
# Dependencies
import numpy as np
import pandas as pd
import requests
import json

from config import census_key

In [2]:
# Due to Census' API restraints this code is only valid for 2009-2018. 
# Below list must remain in chronological order
years = [2009, 2010, 2011, 2012, 2013, 2014, 2015, 2017, 2016, 2017, 2018]

# Params to be passed into API call, *DO NOT INCLUCE SPACES
census_variables = "B01003_001E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B02001_008E,B03001_003E" 

'''
CENSUS VARIABLE DESCRIPTIONS:
B01003_001E,	Total population,
B02001_002E,	Population (White Alone),
B02001_003E,	Population (Black or African American Alone),
B02001_004E,	Population (American Indian or Alaskan Native Alone),
B02001_005E,	Population (Asian Alone),
B02001_006E,	Population (Native Hawaiian and Other Pacific Islander Alone),
B02001_008E,	Population (Two or more races),
B03001_003E,	Population (Hispanic Origin)
'''

'\nCENSUS VARIABLE DESCRIPTIONS:\nB01003_001E,\tTotal population,\nB02001_002E,\tPopulation (White Alone),\nB02001_003E,\tPopulation (Black or African American Alone),\nB02001_004E,\tPopulation (American Indian or Alaskan Native Alone),\nB02001_005E,\tPopulation (Asian Alone),\nB02001_006E,\tPopulation (Native Hawaiian and Other Pacific Islander Alone),\nB02001_008E,\tPopulation (Two or more races),\nB03001_003E,\tPopulation (Hispanic Origin)\n'

In [3]:
#This blank list will hold the yearly API results
results = []

for year in years: 
    
    #API base url is dependent on year
    if year == 2009: #2009 Detailed Tables API Call
        url = f'https://api.census.gov/data/{year}/acs5?key={census_key}&get={census_variables},NAME&for=state:*'      

    elif year == 2010: #2010 Detailed Tables API Call     
        url = f'https://api.census.gov/data/{year}/acs/acs5?key={census_key}&get={census_variables},NAME&for=state:*'
        
    else: #2011-2018 Detailed Tables API Call
        url = f'https://api.census.gov/data/{year}/acs/acs5?get=NAME,{census_variables}&for=state:*&key={census_key}'
        
        
    #Incorporating try/except to catch any year w/o data        
    try:
        print(f'Retrieving {year} data...')
        print(url)
        response = requests.get(url).json()
        results.append(response)
        print(f'{year} data successfully retrieved!')
        print('-------------------')
    
    except:
        print(f'Unable to retrieve {year} data...skipping')
        print('-------------------')
        
print('Data retrieval complete')

Retrieving 2009 data...
https://api.census.gov/data/2009/acs5?key=56fd45ad99936050c5b62b7ea0591503e40b8364&get=B01003_001E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B02001_008E,B03001_003E,NAME&for=state:*
2009 data successfully retrieved!
-------------------
Retrieving 2010 data...
https://api.census.gov/data/2010/acs/acs5?key=56fd45ad99936050c5b62b7ea0591503e40b8364&get=B01003_001E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B02001_008E,B03001_003E,NAME&for=state:*
2010 data successfully retrieved!
-------------------
Retrieving 2011 data...
https://api.census.gov/data/2011/acs/acs5?get=NAME,B01003_001E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B02001_008E,B03001_003E&for=state:*&key=56fd45ad99936050c5b62b7ea0591503e40b8364
2011 data successfully retrieved!
-------------------
Retrieving 2012 data...
https://api.census.gov/data/2012/acs/acs5?get=NAME,B01003_001E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B02001_008E

In [12]:
#Converting api results into yearly dataframes and consolidating into one large df
for year in years:
    year_index = years.index(year)
    data = results[year_index][1:]
    headers = results[year_index][0]
    
    if year_index == 0:
        first_year = pd.DataFrame(data, columns = headers)
        first_year['Year'] = year
        first_year = first_year[sorted(first_year.columns)]
        census_df = first_year
        
    else:
        headers = results[year_index][0] #setting headers again bc column order is in consistent across year (ie '09 vs '16)
        next_year = pd.DataFrame(data, columns = headers)
        next_year['Year'] = year
        next_year = next_year[sorted(next_year.columns)]
        census_df = pd.concat([census_df, next_year], ignore_index=True)
        
census_df.head()

Unnamed: 0,B01003_001E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B02001_008E,B03001_003E,NAME,Year,state
0,683142,467650,25161,91939,31878,4269,50632,39661,Alaska,2009,2
1,4633360,3256941,1209938,22969,46655,2025,56490,130220,Alabama,2009,1
2,2838143,2228798,439355,19233,31120,2505,49997,153630,Arkansas,2009,5
3,6324865,4906936,227282,284265,153301,11045,164255,1881878,Arizona,2009,4
4,36308527,22258042,2249404,283031,4473292,132535,1272989,13102161,California,2009,6


In [13]:
#convert api results to interger
census_df = census_df.astype({"B01003_001E" : 'int32',
                              "B02001_002E" : 'int32',
                              "B02001_003E" : 'int32',
                              "B02001_004E" : 'int32',
                              "B02001_005E" : 'int32',
                              "B02001_006E" : 'int32',
                              "B02001_008E" : 'int32',
                              "B03001_003E" : 'int32'})

In [16]:
#rename variables
census_df = census_df.rename(columns={"B01003_001E" : "Total Population",
                                      "B02001_002E" : "White (pop)",
                                      "B02001_003E" : "Black (pop)",
                                      "B02001_004E" : "Indigenous (pop)",
                                      "B02001_005E" : "Asian (pop)",
                                      "B02001_006E" : "Pacfic Islander (pop)",
                                      "B02001_008E" : "Mixed Race (pop)",
                                      "B03001_003E" : "Hispanic (pop)",
                                      'NAME':'State'})

#reorganize columns, dropping state code
census_df = census_df[['State','Year',"Total Population",
                                      "White (pop)",
                                      "Black (pop)",
                                      "Indigenous (pop)",
                                      "Asian (pop)",
                                      "Pacfic Islander (pop)",
                                      "Mixed Race (pop)",
                                      "Hispanic (pop)"]]

In [20]:
#Calculate % of population
census_df["White (%)"] = round(census_df["White (pop)"]/census_df['Total Population'],3)*100
census_df["Black (%)"] = round(census_df["Black (pop)"]/census_df['Total Population'],3)*100
census_df["Indigenous (%)"] = round(census_df["Indigenous (pop)"]/census_df['Total Population'],3)*100
census_df["Asian (%)"] = round(census_df["Asian (pop)"]/census_df['Total Population'],3)*100
census_df["Pacfic Islander (%)"] = round(census_df["Pacfic Islander (pop)"]/census_df['Total Population'],3)*100
census_df["Mixed Race (%)"] = round(census_df["Mixed Race (pop)"]/census_df['Total Population'],3)*100
census_df["Hispanic (%)"] = round(census_df["Hispanic (pop)"]/census_df['Total Population'],3)*100

In [21]:
census_df

Unnamed: 0,State,Year,Total Population,White (pop),Black (pop),Indigenous (pop),Asian (pop),Pacfic Islander (pop),Mixed Race (pop),Hispanic (pop),White (%),Black (%),Indigenous (%),Asian (%),Pacfic Islander (%),Mixed Race (%),Hispanic (%)
0,Alaska,2009,683142,467650,25161,91939,31878,4269,50632,39661,68.5,3.7,13.5,4.7,0.6,7.4,5.8
1,Alabama,2009,4633360,3256941,1209938,22969,46655,2025,56490,130220,70.3,26.1,0.5,1.0,0.0,1.2,2.8
2,Arkansas,2009,2838143,2228798,439355,19233,31120,2505,49997,153630,78.5,15.5,0.7,1.1,0.1,1.8,5.4
3,Arizona,2009,6324865,4906936,227282,284265,153301,11045,164255,1881878,77.6,3.6,4.5,2.4,0.2,2.6,29.8
4,California,2009,36308527,22258042,2249404,283031,4473292,132535,1272989,13102161,61.3,6.2,0.8,12.3,0.4,3.5,36.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
567,Louisiana,2018,4663616,2901106,1502916,26272,79872,1468,91563,234920,62.2,32.2,0.6,1.7,0.0,2.0,5.0
568,Maine,2018,1332813,1259217,17881,8302,14917,318,29208,21421,94.5,1.3,0.6,1.1,0.0,2.2,1.6
569,Maryland,2018,6003435,3373181,1788090,15644,374277,3059,199369,588912,56.2,29.8,0.3,6.2,0.1,3.3,9.8
570,Massachusetts,2018,6830193,5360006,510558,14493,442574,2329,215493,789127,78.5,7.5,0.2,6.5,0.0,3.2,11.6


In [22]:
census_df.to_csv('../Resources/race_data.csv', index=False)