# getacsdata
### First version:  June 18, 2023
### Lastest update:  June 18, 2023
### Matthew Beattie
### University of Oklahoma

This notebook uses the US Census API to download data from the American Community Survey dataset.  The features selected are intended for study of homeless trends.  The method works by calling the API for each year of interest in the study.  It pulls down variables from ACS and then normalizes them into per capita values.

In [4]:
# Initialize libraries
import requests
import pandas as pd
import numpy as np
import os

In [12]:
# Set working directory
os.chdir('/Users/mjbeattie/OneDrive/GitHub/homelessness_data')
os.listdir()

['.DS_Store',
 'ACS Feature Dictionary.txt',
 'ACS Feature Dictionary.xlsx',
 'README.md',
 'getacsdata.ipynb',
 '.ipynb_checkpoints',
 'Political Landscape Study.xlsx',
 'acs_feature_dictionary.txt',
 '.git']

In [16]:
# Read in baseline data dictionary
basedict = pd.read_csv('acs_feature_dictionary.txt',sep='\t')
basedict

Unnamed: 0,Category,Feature,Alias,ACS Field Name,Per Capita Value
0,Population,Total,pop_tot,B01001_001E,pop_tot_pcp
1,Population,Male,pop_male,B01001_002E,pop_male_pcp
2,Population,Female,pop_female,B01001_003E,pop_female_pcp
3,Population,White Not Hisp,pop_white,B01001H_001E,pop_white_pcp
4,Population,Black Alone,pop_black,B01001B_001E,pop_black_pcp
5,Population,Amer Indian/AK,pop_aiak,B01001C_001E,pop_aiak_pcp
6,Population,Asian Alone,pop_asian,B01001D_001E,pop_asian_pcp
7,Population,Native HI/PI,pop_hipi,B01001E_001E,pop_hipi_pcp
8,Population,Other,pop_other,B01001F_001E,pop_other_pcp
9,Population,Two or more,pop_mult,B01001G_001E,pop_mult_pcp


In [15]:
acsfield = 'B01001H_001E'
alias = basedict.loc[basedict['ACS Field Name'] == acsfield, 'Alias'].values[0]
alias

'pop_white'

In [38]:
# Save categories in a list to iterate through
categorylist = basedict['Category'].unique().tolist()

# Create tuples of category and field lists
catfields = []
for category in categorylist:
    fieldlist = basedict[basedict['Category']==category]['ACS Field Name'].tolist()
    fieldstr = ','.join(fieldlist)
    catfields.append((category,fieldstr))


[('Population',
  'B01001_001E,B01001_002E,B01001_003E,B01001H_001E,B01001B_001E,B01001C_001E,B01001D_001E,B01001E_001E,B01001F_001E,B01001G_001E,B01001I_001E,B01002_001E'),
 ('Mobility',
  'B07001_001E,B07001_017E,B07001_033E,B07001_049E,B07001_065E,B07001_081E'),
 ('Poverty Level',
  'B17002_001E,B17002_002E,B17002_003E,B17002_004E,B17002_005E,B17002_006E,B17002_007E,B17002_008E,B17002_009E'),
 ('Median Earnings', 'B18140_001E'),
 ('Assistance', 'B22001_001E,B22001_002E'),
 ('Employment', 'B23025_001E,B23025_003E'),
 ('Housing-Occupancy', 'B25002_001E,B25002_003E'),
 ('Tenure', 'B25003_001E,B25003_003E'),
 ('Vacancy', 'B25004_001E,B25004_002E,B25004_004E'),
 ('Household size', 'B25010_001E,B25010_002E,B25010_003E'),
 ('Number of rooms', 'B25021_001E,B25021_002E,B25021_003E'),
 ('Rent',
  'B25070_001E,B25070_002E,B25070_003E,B25070_004E,B25070_005E,B25070_006E,B25070_007E,B25070_008E,B25070_009E,B25070_010E,B25071_001E')]

In [79]:
# Iterate through catfields and call ACS API
firstcategory = True
for item in catfields:
    # Construct the API call
    baseurl = "https://api.census.gov/data/2019/acs/acs1?get=NAME,"
    geostr = "&for=state:17,18"
    url = baseurl + item[1] + geostr

    payload = {}
    headers = {
        'Cookie': 'TS010383f0=01283c52a4bcbb7610e59eeaa15d8413e26e6fe3964ed3d6366923cf15872dc497c7b99977c06f32b40443b07f2820836b21943cf2'
    }

    # Call the API
    response = requests.request("GET", url, headers=headers, data=payload)
    
    # Save the response as json and create a pandas dataframe from it
    data = response.json()
    df = pd.DataFrame(data[1:], columns=data[0])
    df.drop(['state'],axis=1,inplace=True)
    if firstcategory:
        df = pd.DataFrame(data[1:], columns=data[0])
        df.drop(['state'],axis=1,inplace=True)
    else:
        dfnew = pd.DataFrame(data[1:], columns=data[0])
        dfnew.drop(['NAME','state'],axis=1,inplace=True)
        df = pd.concat([df, dfnew], axis=1)
    firstcategory = False
    
# Add year to dataframe and change NAME to state
df['year']=2019
df = df.rename(columns={'NAME': 'state'})