## Getting ACS Data

This notebook goes through the steps for getting data from the American Community Survey (ACS) and joining it with our data from the Eviction Lab.

Steps:
1. Read Eviction Lab data and drop the years and counties we don't need
2. Obtain data from ACS API

In [6]:
import pandas as pd
import requests

In [59]:
evic_df = pd.read_csv("../data/raw/block-groups.csv")
print(evic_df.shape)

# keep rows for Cook County between 2012 and 2016
evic_df = evic_df.loc[evic_df["parent-location"] == "Cook County, Illinois"]
evic_df = evic_df.loc[(evic_df["year"] >= 2012) & (evic_df["year"] <= 2016)]
print(evic_df.shape)

(164747, 27)
(19965, 27)


In [68]:
# check distribution of GEOID values to see how many unique GEOIDs there are and confirm that
# each GEOID (block group) has five rows (years 2012-2016)
evic_df["GEOID"].value_counts().describe()

count    3993.0
mean        5.0
std         0.0
min         5.0
25%         5.0
50%         5.0
75%         5.0
max         5.0
Name: GEOID, dtype: float64

In [16]:
# define components of URL for ACS API request
ACS_URL = "https://api.census.gov/data/2016/acs/acs5"
ACS_KEY = "bda4a7c6f7629b79fb63adf9b726149c173467bf" # TODO - make env variable

params = {
    "get": "NAME,group(B23025)",
    "for": "block group:*",
    "in": ["state:17", "county:031"],
    "key": ACS_KEY
}

# make the request
results = requests.get(ACS_URL, params=params)

In [17]:
results.url

'https://api.census.gov/data/2016/acs/acs5?for=block+group%3A%2A&in=state%3A17&in=county%3A031&key=bda4a7c6f7629b79fb63adf9b726149c173467bf&get=NAME%2Cgroup%28B23025%29'

In [70]:
# look at the headers and first row of the JSON response
json_res = results.json()
json_res[:2]

[['NAME',
  'B23025_001E',
  'B23025_001M',
  'B23025_002E',
  'B23025_002M',
  'B23025_003E',
  'B23025_003M',
  'B23025_004E',
  'B23025_004M',
  'B23025_005E',
  'B23025_005M',
  'B23025_006E',
  'B23025_006M',
  'B23025_007E',
  'B23025_007M',
  'B23025_001M',
  'B23025_001EA',
  'B23025_001MA',
  'B23025_002M',
  'B23025_002EA',
  'B23025_002MA',
  'B23025_003M',
  'B23025_003EA',
  'B23025_003MA',
  'B23025_004M',
  'B23025_004EA',
  'B23025_004MA',
  'B23025_005M',
  'B23025_005EA',
  'B23025_005MA',
  'B23025_006M',
  'B23025_006EA',
  'B23025_006MA',
  'B23025_007M',
  'B23025_007EA',
  'B23025_007MA',
  'state',
  'county',
  'tract',
  'block group'],
 ['Block Group 1, Census Tract 101, Cook County, Illinois',
  '555',
  '196',
  '394',
  '161',
  '394',
  '161',
  '350',
  '168',
  '44',
  '52',
  '0',
  '11',
  '161',
  '97',
  '196',
  None,
  None,
  '161',
  None,
  None,
  '161',
  None,
  None,
  '168',
  None,
  None,
  '52',
  None,
  None,
  '11',
  None,
  None,
 

In [74]:
# read_json did not work with results.json() so I'm passing it the URL constructed by requests
acs_df = pd.read_json(path_or_buf=results.url, orient="records")

# use the first row as the column names
acs_df.columns = acs_df.iloc[0]
acs_df = acs_df.reindex(acs_df.index.drop(0))

print(len(acs_df)) # confirm we have the same number of block groups here as in evic_df
acs_df.head()

3993


Unnamed: 0,NAME,B23025_001E,B23025_001M,B23025_002E,B23025_002M,B23025_003E,B23025_003M,B23025_004E,B23025_004M,B23025_005E,...,B23025_006M,B23025_006EA,B23025_006MA,B23025_007M,B23025_007EA,B23025_007MA,state,county,tract,block group
1,"Block Group 1, Census Tract 101, Cook County, ...",555,196,394,161,394,161,350,168,44,...,11,,,97,,,17,31,10100,1
2,"Block Group 2, Census Tract 101, Cook County, ...",1311,294,871,226,871,226,727,224,144,...,11,,,182,,,17,31,10100,2
3,"Block Group 3, Census Tract 101, Cook County, ...",1787,382,1492,334,1492,334,1410,340,82,...,11,,,165,,,17,31,10100,3
4,"Block Group 1, Census Tract 102.01, Cook Count...",1209,235,675,197,675,197,574,166,101,...,11,,,189,,,17,31,10201,1
5,"Block Group 2, Census Tract 102.01, Cook Count...",3620,552,2279,360,2279,360,2031,308,248,...,11,,,374,,,17,31,10201,2
