## Getting ACS Data

This notebook goes through the steps for getting data from the American Community Survey (ACS) and joining it with our data from the Eviction Lab.

Steps:
1. Read Eviction Lab data and drop the years and counties we don't need
2. Obtain data from ACS API
3. Clean up ACS data - give columns meaningful names, etc.
4. Merge eviction data and ACS data to get our full dataset

In [1]:
import pandas as pd
import requests

In [2]:
evic_df = pd.read_csv("../data/raw/block-groups.csv")
print(evic_df.shape)

# keep rows for Cook County between 2012 and 2016
evic_df = evic_df.loc[evic_df["parent-location"] == "Cook County, Illinois"]
evic_df = evic_df.loc[(evic_df["year"] >= 2012) & (evic_df["year"] <= 2016)]
print(evic_df.shape)

(164747, 27)
(19965, 27)


In [3]:
evic_df.columns

Index(['GEOID', 'year', 'name', 'parent-location', 'population',
       'poverty-rate', 'renter-occupied-households', 'pct-renter-occupied',
       'median-gross-rent', 'median-household-income', 'median-property-value',
       'rent-burden', 'pct-white', 'pct-af-am', 'pct-hispanic', 'pct-am-ind',
       'pct-asian', 'pct-nh-pi', 'pct-multiple', 'pct-other',
       'eviction-filings', 'evictions', 'eviction-rate',
       'eviction-filing-rate', 'low-flag', 'imputed', 'subbed'],
      dtype='object')

In [4]:
# check distribution of GEOID values to see how many unique GEOIDs there are and confirm that
# each GEOID (block group) has five rows (years 2012-2016)
evic_df["GEOID"].value_counts().describe()

count    3993.0
mean        5.0
std         0.0
min         5.0
25%         5.0
50%         5.0
75%         5.0
max         5.0
Name: GEOID, dtype: float64

In [5]:
evic_df.head()

Unnamed: 0,GEOID,year,name,parent-location,population,poverty-rate,renter-occupied-households,pct-renter-occupied,median-gross-rent,median-household-income,...,pct-nh-pi,pct-multiple,pct-other,eviction-filings,evictions,eviction-rate,eviction-filing-rate,low-flag,imputed,subbed
8376,170310101001,2012,101.1,"Cook County, Illinois",435.0,18.92,156.0,67.23,785.0,,...,0.0,3.22,0.0,18.0,7.0,4.5,11.56,0,0,0
8377,170310101001,2013,101.1,"Cook County, Illinois",435.0,18.92,159.0,67.23,785.0,,...,0.0,3.22,0.0,14.0,4.0,2.52,8.83,0,0,0
8378,170310101001,2014,101.1,"Cook County, Illinois",435.0,18.92,161.0,67.23,785.0,,...,0.0,3.22,0.0,19.0,4.0,2.48,11.78,0,0,0
8379,170310101001,2015,101.1,"Cook County, Illinois",435.0,18.92,164.0,67.23,785.0,,...,0.0,3.22,0.0,13.0,3.0,1.83,7.92,0,0,0
8380,170310101001,2016,101.1,"Cook County, Illinois",435.0,18.92,167.0,67.23,785.0,,...,0.0,3.22,0.0,12.0,2.0,1.2,7.19,0,0,0


In [6]:
# define components of URL for ACS API request
ACS_URL = "https://api.census.gov/data/2016/acs/acs5"
ACS_KEY = "bda4a7c6f7629b79fb63adf9b726149c173467bf" # TODO - make env variable

acs_vars = {
    "B23025_002E": "estimate_total_in_labor_force",
    "B23025_005E": "estimate_civilian_unemployed",
    "B19057_001E": "total_for_public_assistance_income",
    "B19057_002E": "with_public_assistance_income",
    "B25038_001E": "total_for_householder_tenure",
    "B25038_009E": "renter_occupied",
    "B25038_010E": "renter_moved_2015_later",
    "B25038_011E": "renter_moved_2010_2014",
    "B25038_012E": "renter_moved_2000_2009",
    "B25038_013E": "renter_moved_1990_1999",
    "B25038_014E": "renter_moved_1980_1989",
    "B25038_015E": "renter_moved_1979_earlier"
}

vars_list = [code for code, var in acs_vars.items()]
get_str = ",".join(sorted(vars_list))

params = {
    "get": get_str,
    "for": "block group:*",
    "in": ["state:17", "county:031"],
    "key": ACS_KEY
}

years = [2012, 2013, 2014, 2015, 2016]

In [7]:
# make the request
results = requests.get(ACS_URL, params=params)

In [8]:
# look at the headers and first row of the JSON response
json_res = results.json()
json_res[:2]

[['B19057_001E',
  'B19057_002E',
  'B23025_002E',
  'B23025_005E',
  'B25038_001E',
  'B25038_009E',
  'B25038_010E',
  'B25038_011E',
  'B25038_012E',
  'B25038_013E',
  'B25038_014E',
  'B25038_015E',
  'state',
  'county',
  'tract',
  'block group'],
 ['312',
  '24',
  '394',
  '44',
  '312',
  '213',
  '0',
  '100',
  '113',
  '0',
  '0',
  '0',
  '17',
  '031',
  '010100',
  '1']]

In [9]:
# read_json did not work with results.json() so I'm passing it the URL constructed by requests
acs_df = pd.read_json(path_or_buf=results.url, orient="records")

# use the first row as the column names
acs_df.columns = acs_df.iloc[0]
acs_df = acs_df.reindex(acs_df.index.drop(0))

print(len(acs_df)) # confirm we have the same number of block groups here as in evic_df

3993


In [10]:
# Concatenate "state", "county", "tract", and "block group" to form a GEOID column
# in the ACS dataframe for joining with the evictions dataframe
acs_df["GEOID"] = acs_df["state"] + acs_df["county"] + acs_df["tract"] + acs_df["block group"]
acs_df["GEOID"] = acs_df["GEOID"].astype(int)
acs_df.head()

Unnamed: 0,B19057_001E,B19057_002E,B23025_002E,B23025_005E,B25038_001E,B25038_009E,B25038_010E,B25038_011E,B25038_012E,B25038_013E,B25038_014E,B25038_015E,state,county,tract,block group,GEOID
1,312,24,394,44,312,213,0,100,113,0,0,0,17,31,10100,1,170310101001
2,833,75,871,144,833,794,60,421,262,38,0,13,17,31,10100,2,170310101002
3,1061,14,1492,82,1061,883,34,594,204,51,0,0,17,31,10100,3,170310101003
4,619,0,675,101,619,396,36,221,117,22,0,0,17,31,10201,1,170310102011
5,1673,157,2279,248,1673,1278,118,738,323,68,31,0,17,31,10201,2,170310102012


In [11]:
# Rename columns to give them a more useful name than the ACS variable code
acs_df.rename(columns=acs_vars, inplace=True)
acs_df.head()

Unnamed: 0,total_for_public_assistance_income,with_public_assistance_income,estimate_total_in_labor_force,estimate_civilian_unemployed,total_for_householder_tenure,renter_occupied,renter_moved_2015_later,renter_moved_2010_2014,renter_moved_2000_2009,renter_moved_1990_1999,renter_moved_1980_1989,renter_moved_1979_earlier,state,county,tract,block group,GEOID
1,312,24,394,44,312,213,0,100,113,0,0,0,17,31,10100,1,170310101001
2,833,75,871,144,833,794,60,421,262,38,0,13,17,31,10100,2,170310101002
3,1061,14,1492,82,1061,883,34,594,204,51,0,0,17,31,10100,3,170310101003
4,619,0,675,101,619,396,36,221,117,22,0,0,17,31,10201,1,170310102011
5,1673,157,2279,248,1673,1278,118,738,323,68,31,0,17,31,10201,2,170310102012


In [12]:
# Merge the ACS and evictions dataframes
merged_df = evic_df.merge(acs_df)

# Make sure the merged dataframe has the same number of rows as the evictions dataframe
print(len(merged_df))
merged_df.head()

19965


Unnamed: 0,GEOID,year,name,parent-location,population,poverty-rate,renter-occupied-households,pct-renter-occupied,median-gross-rent,median-household-income,...,renter_moved_2015_later,renter_moved_2010_2014,renter_moved_2000_2009,renter_moved_1990_1999,renter_moved_1980_1989,renter_moved_1979_earlier,state,county,tract,block group
0,170310101001,2012,101.1,"Cook County, Illinois",435.0,18.92,156.0,67.23,785.0,,...,0,100,113,0,0,0,17,31,10100,1
1,170310101001,2013,101.1,"Cook County, Illinois",435.0,18.92,159.0,67.23,785.0,,...,0,100,113,0,0,0,17,31,10100,1
2,170310101001,2014,101.1,"Cook County, Illinois",435.0,18.92,161.0,67.23,785.0,,...,0,100,113,0,0,0,17,31,10100,1
3,170310101001,2015,101.1,"Cook County, Illinois",435.0,18.92,164.0,67.23,785.0,,...,0,100,113,0,0,0,17,31,10100,1
4,170310101001,2016,101.1,"Cook County, Illinois",435.0,18.92,167.0,67.23,785.0,,...,0,100,113,0,0,0,17,31,10100,1
