In [38]:
import geopandas as gpd
import pandas as pd
import censusdata

In [39]:
# Download ACS data via Census API
# Note: IL FIPS = 17, Cook County FIPS = 031, 
# Table = B02001_001E (Total Population) B02001_002E (Total White), 
    # B02001_003E (Black or African American), B19013_001E (Median Household Income)
acs = censusdata.download("acs5", 2015, censusdata.censusgeo([("state", "17"), ("county", "031"), 
                                                              ("block group", "*")]),
                                  ["B02001_001E", "B02001_002E", 
                                   "B02001_003E", "B19013_001E", "GEO_ID"])

# Retrieve Census block boundaries for Chicago 
census_gdf = gpd.read_file("data/census_block_groups/chi_blockgroups.shp")

In [40]:
# Extract 12-digit FIPS code from both datasets 
census_gdf["geo_12"] = census_gdf["geoid10"].map(lambda x: str(x)[:12])
acs["geo_12"] = acs["GEO_ID"].map(lambda x: str(x)[-12:])

# Dissolve Census information
census_group = census_gdf.dissolve(by='geo_12').reset_index()

# Merge ACS data with Census block boundaries 
acs_gdf = (gpd.GeoDataFrame(acs_example.merge(census_group, on="geo_12", how="inner"), 
                               crs="epsg:4326"))
acs_gdf.rename(mapper={"B02001_001E": "tot_pop", "B02001_002E": "tot_white", 
                        "B02001_003E": "tot_black", "B19013_001E": "med_income"}, 
                  axis="columns", inplace=True)
acs_gdf.head(3)

Unnamed: 0,tot_pop,tot_white,tot_black,med_income,GEO_ID,geo_12,geometry,blockce10,countyfp10,geoid10,name10,statefp10,tract_bloc,tractce10
0,808,449,211,37571.0,1500000US170314607002,170314607002,"POLYGON ((-87.55480 41.73369, -87.55522 41.733...",2005,31,170314607002005,Block 2005,17,4607002005,460700
1,472,212,218,22292.0,1500000US170314607003,170314607003,"POLYGON ((-87.55485 41.73551, -87.55526 41.735...",3000,31,170314607003000,Block 3000,17,4607003000,460700
2,1531,3,1514,45250.0,1500000US170314910004,170314910004,"POLYGON ((-87.62326 41.69805, -87.62325 41.697...",4014,31,170314910004014,Block 4014,17,4910004014,491000


In [41]:
# Generating and cleaning variables
acs_gdf["perc_black"] = (acs_gdf["tot_black"]/acs_gdf["tot_pop"])*100
acs_gdf["perc_white"] = (acs_gdf["tot_white"]/acs_gdf["tot_pop"])*100
acs_gdf["med_income"].replace({-666666666.0: acs_gdf["med_income"].median()}, inplace=True)

final_acs = acs_gdf[["GEO_ID", "name10", "med_income", "perc_black", "perc_white", 
                          "tot_pop", "tot_black", "tot_white", "geometry"]]
final_acs.head(3)

Unnamed: 0,GEO_ID,name10,med_income,perc_black,perc_white,tot_pop,tot_black,tot_white,geometry
0,1500000US170314607002,Block 2005,37571.0,26.113861,55.569307,808,211,449,"POLYGON ((-87.55480 41.73369, -87.55522 41.733..."
1,1500000US170314607003,Block 3000,22292.0,46.186441,44.915254,472,218,212,"POLYGON ((-87.55485 41.73551, -87.55526 41.735..."
2,1500000US170314910004,Block 4014,45250.0,98.889615,0.19595,1531,1514,3,"POLYGON ((-87.62326 41.69805, -87.62325 41.697..."


In [42]:
final_acs.shape

(2194, 9)

In [43]:
final_acs.to_file("data/acs/final_acs.shp")