In [1]:
import pandas as pd
import shapely.wkt
from shapely import to_geojson
import json
from census_area import Census
import os

##census variables found here https://api.census.gov/data/2019/acs/acs1/variables.html
#you can acquire a census key here: https://api.census.gov/data/key_signup.html

CENSUS_KEY = '0798a44481b1ec2a6a85ec1193e61b827826e853'
c = Census(CENSUS_KEY)

In [2]:
#these are the variables we will pull from the census API along with string description
#variables can be found here: https://api.census.gov/data/2019/acs/acs1/variables.html
race_and_income_variables = {'B01001_001E' : 'Total Population',
             'B01001A_001E' : 'White',
             'B01001B_001E' : 'Black or African American',
             'B01001C_001E' : 'American Indian or Alaska Native',
             'B01001D_001E' : 'Asian',
             'B01001E_001E' : 'Native Hawaiian or Pacific Islander',
             'B01001F_001E' : 'Other',
             'B01001G_001E' : 'Multiracial',
             'B01001H_001E' : 'White Not Hispanic or Latino',
             'B01001I_001E' : 'Hispanic or Latino',
             'B19101_002E' : 'Less than $10,000',
             'B19101_003E' : '10,000 to 14,999',
             'B19101_004E' : '15,000 to 19,999',
             'B19101_005E' : '20,000 to 25,000',
             'B19101_006E' : '25,000 to 29,999',
             'B19101_007E' : '30,000 to 34,999',
             'B19101_008E' : '35,000 to 39,999',
             'B19101_009E' : '40,000 to 44,999',
             'B19101_010E' : '45,000 to 49,999',
             'B19101_011E' : '50,000 to 59,999',
             'B19101_012E' : '60,000 to 74,999',
             'B19101_013E' : '75,000 to 99,999',
             'B19101_014E' : '100,000 to 124,999',
             'B19101_015E' : '125,000 to 149,999',
             'B19101_016E' : '150,000 to 199,999',
             'B19101_017E' : '200,000 +'} 

#the API can only accept 25 variables at a time, so I broke it into 2 dictionaries 
age_variables = {'B01001_003E' : 'Male under 5',
                 'B01001_004E' : 'Male 5 to 9',
                 'B01001_005E' : 'Male 10 to 14',
                 'B01001_006E' : 'Male 15 to 17',
                 'B01001_007E' : 'Male 18 to 19',
                 'B01001_008E' : 'Male 20',
                 'B01001_009E' : 'Male 21',
                 'B01001_010E' : 'Male 22 to 24',
                 'B01001_011E' : 'Male 25 to 29',
                 'B01001_012E' : 'Male 30 to 34',
                 'B01001_013E' : 'Male 35 to 39',
                 'B01001_014E' : 'Male 40 to 44',
                 'B01001_015E' : 'Male 45 to 49',
                 'B01001_016E' : 'Male 50 to 54',
                 'B01001_017E' : 'Male 55 to 59',
                 'B01001_018E' : 'Male 60 to 61',
                 'B01001_019E' : 'Male 62 to 64',
                 'B01001_020E' : 'Male 65 to 66',
                 'B01001_021E' : 'Male 67 to 69',
                 'B01001_022E' : 'Male 70 to 74',
                 'B01001_023E' : 'Male 75 to 79',
                 'B01001_024E' : 'Male 80 to 84',
                 'B01001_025E' : 'Male 85 and over',
                 'B01001_027E' : 'Female under 5',
                 'B01001_028E' : 'Female 5 to 9',
                 'B01001_029E' : 'Female 10 to 14',
                 'B01001_030E' : 'Female 15 to 17',
                 'B01001_031E' : 'Female 18 to 19',
                 'B01001_032E' : 'Female 20',
                 'B01001_033E' : 'Female 21',
                 'B01001_034E' : 'Female 22 to 24',
                 'B01001_035E' : 'Female 25 to 29',
                 'B01001_036E' : 'Female 30 to 34',
                 'B01001_037E' : 'Female 35 to 39',
                 'B01001_038E' : 'Female 40 to 44',
                 'B01001_039E' : 'Female 45 to 49',
                 'B01001_040E' : 'Female 50 to 54',
                 'B01001_041E' : 'Female 55 to 59',
                 'B01001_042E' : 'Female 60 to 61',
                 'B01001_043E' : 'Female 62 to 64',
                 'B01001_044E' : 'Female 65 to 66',
                 'B01001_045E' : 'Female 67 to 69',
                 'B01001_046E' : 'Female 70 to 74',
                 'B01001_047E' : 'Female 75 to 79',
                 'B01001_048E' : 'Female 80 to 84',
                 'B01001_049E' : 'Female 85+'}

def unpack_geo_json(geometry):
    """This function takes the json data strcuture embedded in the shapefile and
        transforms it into a geojson object that works with the census_area python package

        census_area documentation: https://census-area.readthedocs.io/en/latest/index.html
        shapely documentation: https://shapely.readthedocs.io/en/stable/manual.html
    """

    #load the object as a shapely object and then transform to geojson
    geometry = shapely.wkt.loads(geometry)
    geometry = to_geojson(geometry)

    return geometry

def request_with_ward_geometry(variable_dictionary, ward_geometry):
    '''This function makes a request to the census API using the census_area package

    variable_dictionary: dict of variables with the variable as a key 
    ward_gemotery: geojson object
    '''

    #make the request to the census_are package
    #you need to create an instance of census area and instantiate as "c": c = Census(CENSUS_KEY)
    request_output = c.acs5.geo_tract(tuple(variable_dictionary.keys()), json.loads(ward_geometry))
    

    return request_output

def get_tract_and_percent(ward_geo, input_variables_list):
    '''This function takes a given geometry geojson object and a list of dictionaries with target variables
    because the census API  is limited to requesting and returning 25 variables at a time

    using a dictionary structure for the variables with the variable as a key is just QOL to manage variables
    with descriptions because there are so many

    age_variables = {'B01001_003E' : 'Male under 5',
                 'B01001_004E' : 'Male 5 to 9',
                 'B01001_005E' : 'Male 10 to 14',
                 'B01001_006E' : 'Male 15 to 17',
                 'B01001_007E' : 'Male 18 to 19',
                 'B01001_008E' : 'Male 20',
                 'B01001_009E' : 'Male 21',
                 'B01001_010E' : 'Male 22 to 24',
                 'B01001_011E' : 'Male 25 to 29'...}

    ward_geo: geojson object
    '''

    #create empty dict for output and empty list to store all the requests
    out_dict = {}
    request_output_list = []

    #make the API request for each variable in the list
    for input_variables in input_variables_list:
        request_output_list.append(request_with_ward_geometry(input_variables,ward_geo))

    #flatten out the first request response to the output dict
    for tract in request_output_list[0]:
        out_dict[tract[1]['tract']] = {'out_variables' : tract[1], '%_of_tract' : tract[2]}

    #if there are multiple dicts in the list of request variables, this will flatten them out and combine the dicts
    if len(request_output_list) > 1:
        for index in range(1,len(request_output_list)):
            for tract in request_output_list[index]:
                #make a copy of the variables
                variable_copy = out_dict[tract[1]['tract']]['out_variables'].copy()
                #add the response to the output response dict
                out_dict[tract[1]['tract']]['out_variables'] = variable_copy | tract[1]

    return out_dict

In [3]:
out_directory = 'demo_data_by_ward_2'

#combine input variable dicts into a list
input_variable_list = [race_and_income_variables,age_variables]

#create a blank geojson object to store all of the geojson objects
geojson_dict = {}

#load in a shapefile 
data = pd.read_csv('data/ward_shapefile.csv')

#each key is a description of an arbitrary geometry (ward, community area, etc.). turn the json
#into a geojson object
for index in data.index:
    geojson_dict[data.loc[index,'WARD']] = unpack_geo_json(data.loc[index,'the_geom'])

#i iterate geometry by geometry and store them in a directory to concat later
#the census API often times out or rejects requests for a variety of reasons
for key in geojson_dict.keys():
    #store the resulting arbirary geometry as a json object and skip any that have already been processed
    if str(key) in os.listdir(out_directory) or 'ipynb' in str(key):
        pass
    else:
        #process the geojson object with the input variables and save as a json in a given directory
        demo_dict = get_tract_and_percent(geojson_dict[key], input_variable_list)
        with open(f'{out_directory}/{key}', 'w') as f:
            json.dump(demo_dict,f)
        print(key)



12
16
15
20
49
23
29
14
3
4
2
35
21
24
13
48
31
47
38
33
30
34
28
40
44
25
50
22
41
18
17
6
5
43
8
42
7
39
46
32
1
19
9
36
37
27
10
11
26
45
