# getacsdata
### First version:  June 18, 2023
### Lastest update:  June 18, 2023
### Matthew Beattie
### University of Oklahoma

This notebook uses the US Census API to download data from the American Community Survey dataset.  The features selected are intended for study of homeless trends.  The method works by calling the API for each year of interest in the study.  It pulls down variables from ACS and then normalizes them into per capita values.

The variables included here have not changed from 2011-2021.  Note that there were no ACS data released for the year 2020 due to COVID.  Also, the API didn't work for 2009 or 2010 despite the variables being the same.

In [1]:
# Initialize libraries
import requests
import pandas as pd
import numpy as np
import os

In [2]:
# Set working directory
os.chdir('/Users/mjbeattie/OneDrive/GitHub/homelessness_data')
os.listdir()

['acsdatafeatures.txt',
 '.DS_Store',
 'state_abbreviations.txt',
 'ACS Feature Dictionary.xlsx',
 'README.md',
 'getacsdata.ipynb',
 '.ipynb_checkpoints',
 'Political Landscape Study.xlsx',
 'acs_feature_dictionary.txt',
 '.git']

In [3]:
# Read in baseline data dictionary
basedict = pd.read_csv('acs_feature_dictionary.txt',sep='\t')
basedict

Unnamed: 0,Category,Feature,Alias,ACS Field Name,Per Capita Value,Per Capita Divisor
0,Population,Total,pop_tot,B01001_001E,pop_tot_pcp_remove,pop_tot
1,Population,Male,pop_male,B01001_002E,pop_male_pcp,pop_tot
2,Population,Female,pop_female,B01001_026E,pop_female_pcp,pop_tot
3,Population,White Not Hisp,pop_white,B01001H_001E,pop_white_pcp,pop_tot
4,Population,Black Alone,pop_black,B01001B_001E,pop_black_pcp,pop_tot
5,Population,Amer Indian/AK,pop_aiak,B01001C_001E,pop_aiak_pcp,pop_tot
6,Population,Asian Alone,pop_asian,B01001D_001E,pop_asian_pcp,pop_tot
7,Population,Native HI/PI,pop_hipi,B01001E_001E,pop_hipi_pcp,pop_tot
8,Population,Other,pop_other,B01001F_001E,pop_other_pcp,pop_tot
9,Population,Two or more,pop_mult,B01001G_001E,pop_mult_pcp,pop_tot


In [4]:
# Save categories in a list to iterate through
categorylist = basedict['Category'].unique().tolist()
allfieldlist = basedict['ACS Field Name'].unique().tolist()
percaplist = basedict['Per Capita Value'].unique().tolist()
percaplist = [item for item in percaplist if "remove" not in item]

# Create tuples of category and field lists
catfields = []
for category in categorylist:
    fieldlist = basedict[basedict['Category']==category]['ACS Field Name'].tolist()
    fieldstr = ','.join(fieldlist)
    catfields.append((category,fieldstr))
    
# Create list of years for surveys to include
yearlist = [2011,2012,2013,2014,2015,2016,2017,2018,2019,2021]


In [5]:
# Iterate through the years of the survey
firstyear = True
for year in yearlist:
    print("Getting data for year", year)
    # Iterate through catfields and call ACS API
    firstcategory = True
    for item in catfields:
        # Construct the API call
        baseurl = "https://api.census.gov/data/" + str(year) + "/acs/acs1?get=NAME,"
        geostr = "&for=state:*"
        url = baseurl + item[1] + geostr

        payload = {}
        headers = {
            'Cookie': 'TS010383f0=01283c52a4bcbb7610e59eeaa15d8413e26e6fe3964ed3d6366923cf15872dc497c7b99977c06f32b40443b07f2820836b21943cf2'
        }
        
        # Call the API
        response = requests.request("GET", url, headers=headers, data=payload)

        # Save the response as json and create a pandas dataframe from it
        data = response.json()
        df = pd.DataFrame(data[1:], columns=data[0])
        
        if firstcategory:
            df.drop(['state'],axis=1,inplace=True)
            allcolsdf = df
            firstcategory = False
        else:
            df.drop(['NAME', 'state'],axis=1,inplace=True)
            allcolsdf = pd.concat([allcolsdf, df], axis=1)
            
    # Add year to dataframe and change NAME to state
    allcolsdf['year'] = year
    allcolsdf = allcolsdf.rename(columns={'NAME': 'state'})
    
    # Merge dataframe to prior years' dataframe
    if firstyear:
        allyearsdf = allcolsdf
        firstyear = False
    else:
        allyearsdf = pd.concat([allyearsdf, allcolsdf], axis=0)
        

Getting data for year 2011
Getting data for year 2012
Getting data for year 2013
Getting data for year 2014
Getting data for year 2015
Getting data for year 2016
Getting data for year 2017
Getting data for year 2018
Getting data for year 2019
Getting data for year 2021


In [6]:
# Rename the columns from the ACS field name to the alias for the study
for acsname in allfieldlist:
    newname = basedict.loc[basedict['ACS Field Name'] == acsname, 'Alias'].values[0]
    allyearsdf.rename(columns={acsname : newname}, inplace=True)


In [7]:
# Convert data types in df from object to float
dontconvert = ['state', 'year']
convertlist = [item for item in allyearsdf.columns.tolist() if item not in dontconvert]

for item in convertlist:
    allyearsdf[item] = allyearsdf[item].astype(float)


In [8]:
# Generate the per capita features

# Setup a list to iterate through
calclist = []
for percapfield in percaplist:
    alias = basedict.loc[basedict['Per Capita Value'] == percapfield, 'Alias'].values[0]
    if basedict.loc[basedict['Per Capita Value'] == percapfield, 'Per Capita Divisor'].isnull().values[0]:
        divisor = 1
    else:
        divisor = basedict.loc[basedict['Per Capita Value'] == percapfield, 'Per Capita Divisor'].values[0]
    calclist.append((alias, percapfield, divisor)) 
    
# Create the calculated fields
for item in calclist:
    if item[2] == 1:
        allyearsdf[item[1]] = allyearsdf[item[0]]
    else:
        allyearsdf[item[1]] = allyearsdf[item[0]]/allyearsdf[item[2]]


  allyearsdf[item[1]] = allyearsdf[item[0]]/allyearsdf[item[2]]
  allyearsdf[item[1]] = allyearsdf[item[0]]/allyearsdf[item[2]]
  allyearsdf[item[1]] = allyearsdf[item[0]]/allyearsdf[item[2]]
  allyearsdf[item[1]] = allyearsdf[item[0]]/allyearsdf[item[2]]
  allyearsdf[item[1]] = allyearsdf[item[0]]/allyearsdf[item[2]]
  allyearsdf[item[1]] = allyearsdf[item[0]]/allyearsdf[item[2]]
  allyearsdf[item[1]] = allyearsdf[item[0]]/allyearsdf[item[2]]
  allyearsdf[item[1]] = allyearsdf[item[0]]


In [9]:
# Defragment the dataframe
allyearsdf = allyearsdf.copy()

# Add in two character statecode
stateabbvdf = pd.read_csv('state_abbreviations.txt', sep='\t')
allyearsdf = pd.merge(allyearsdf, stateabbvdf, on='state')

# Save dataframe to a tab delimited file
allyearsdf.to_csv('acsdatafeatures.txt', sep='\t', index=False)