In [1]:
## Connect your google colab runtime to your Google Drive 
from google.colab import drive
drive.mount('/content/drive')
%cd "/content/drive/My Drive/Colab Notebooks/"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/Colab Notebooks


In [0]:
import requests
import json
from pandas import DataFrame, concat

In [0]:
# %%================================
# Constants
# ==================================

APIURL = 'https://api.chicagohealthatlas.org/api/v1/'

In [0]:
def get_dict_from_url(urlAppendStr):
  """get JSON response and convert to dict"""
  resp = requests.get(APIURL + urlAppendStr)
  return json.loads(resp.content)  

In [0]:
def parse_list_of_dicts_to_df(list_of_dicts, keep_keys, index_key=None):
  """parse list of dicts into easy to use dataframe"""
  
  df = DataFrame()
  for Dict in list_of_dicts:
    for k in keep_keys: 
      if index_key is None:
        idx = df.shape[0]
      else: 
        idx = Dict[index_key]
      df.loc[idx, k] = Dict[k]
  return df

In [0]:
# get list of places
places_dict = get_dict_from_url('places')

# parse community areas into a nice table
community_areas_df = parse_list_of_dicts_to_df(
    list_of_dicts=places_dict['community_areas'], 
    keep_keys=['geo_type', 'name', 'part'], 
    index_key='slug')

# parse zip codes into a nice table
zip_codes_df = parse_list_of_dicts_to_df(
    list_of_dicts=places_dict['zip_codes'], 
    keep_keys=['geo_type', 'id', 'name', 
               'adjacent_zips', 'adjacent_community_areas'], 
    index_key='slug')

In [7]:
community_areas_df.head()

Unnamed: 0,geo_type,name,part
albany-park,Community Area,Albany Park,Far North Side
archer-heights,Community Area,Archer Heights,Southwest side
armour-square,Community Area,Armour Square,South Side
ashburn,Community Area,Ashburn,Far Southwest side
auburn-gresham,Community Area,Auburn Gresham,Far Southwest side


In [8]:
zip_codes_df.head()

Unnamed: 0,geo_type,id,name,adjacent_zips,adjacent_community_areas
12311,Zip,12311.0,"60601, 60602, 60603, 60604, 60605 & 60611","[""60610"",""6761""]","[""32"",""33"",""8""]"
6761,Zip,6761.0,"60606, 60607 & 60661","[""60622"",""60608"",""60616"",""60612"",""12311"",""60610""]","[""24"",""28"",""31"",""32"",""33"",""8""]"
60608,Zip,60608.0,60608,"[""6761"",""60616"",""60609"",""60612"",""60632"",""60623""]","[""28"",""29"",""30"",""31"",""58"",""59"",""60""]"
60609,Zip,60609.0,60609,"[""60637"",""60615"",""60621"",""60636"",""60653"",""6061...","[""34"",""35"",""37"",""38"",""40"",""58"",""59"",""60"",""61"",..."
60610,Zip,60610.0,60610 & 60654,"[""60614"",""12311"",""6761"",""60622""]","[""24"",""32"",""7"",""8""]"


In [0]:
# get list of topics
topics_dict = get_dict_from_url('topics')

# parse topics into a nice table
topics_df = parse_list_of_dicts_to_df(
    list_of_dicts=topics_dict, keep_keys=['id', 'name'], index_key='slug')

In [10]:
topics_df.head()

Unnamed: 0,id,name
demography,7.0,Demography
clinical-care,4.0,Clinical Care
social-and-economic-factors,6.0,Social and Economic Factors
mortality,1.0,Mortality
morbidity,5.0,Morbidity


In [0]:
# parse topic sub-categories into a nice table

topic_subCategories_df = DataFrame()

for topicDict in topics_dict:
  # get details for subcategory
  df = parse_list_of_dicts_to_df(
    list_of_dicts=topicDict['sub_categories'], 
      keep_keys=['id', 'name'], index_key='slug')
  # add details about parent category
  for cn in ['slug',]:
    df.loc[:, 'topic-'+cn] = topicDict[cn]
    
  # concat with main
  topic_subCategories_df = concat((topic_subCategories_df, df), axis=0)

In [12]:
topic_subCategories_df.head()

Unnamed: 0,id,name,topic-slug
age-group,30.0,Age Group,demography
sex,18.0,Sex,demography
race-ethnicity,26.0,Race-ethnicity,demography
access-to-care,11.0,Access to Care,clinical-care
quality-of-care,5.0,Quality of Care,clinical-care


In [0]:
# parse indicators into a nice table

indicators_df = DataFrame()

for topicDict in topics_dict:
  for subcategoryDict in topicDict['sub_categories']:
    # get details for subcategory
    df = parse_list_of_dicts_to_df(
      list_of_dicts=subcategoryDict['indicators'], 
        keep_keys=['id', 'name', 'sub_category_id'], 
        index_key='slug')
    # add details about parent sub-category & category
    for cn in ['slug',]:
      df.loc[:, 'subcategory-'+cn] = subcategoryDict[cn]
      df.loc[:, 'topic-'+cn] = topicDict[cn]
    # concat with main
    indicators_df = concat((indicators_df, df), axis=0)  

In [14]:
indicators_df.head()

Unnamed: 0,id,name,sub_category_id,subcategory-slug,topic-slug
total-population,1234.0,Total population,30.0,age-group,demography
male-population,1235.0,Male population,18.0,sex,demography
female-population,1240.0,Female population,18.0,sex,demography
non-hispanic-african-american-or-black,1236.0,Non-Hispanic African American or Black,26.0,race-ethnicity,demography
non-hispanic-asian-or-pacific-islander,1237.0,Non-Hispanic Asian or Pacific Islander,26.0,race-ethnicity,demography


In [0]:
INDICATOR_LIST = [
    'total-population', 
    'non-hispanic-african-american-or-black'
]
# INDICATOR_LIST = []

GEO_LIST = [
    'albany-park',
    'ashburn'
]
# GEO_LIST = []

In [0]:
if len(INDICATOR_LIST) < 1:
  INDICATOR_LIST = list(indicators_df.index)

if len(GEO_LIST) < 1:
  GEO_LIST = list(community_areas_df.index)  

In [42]:
dict_of_dfs = dict()

for indicator_slug in INDICATOR_LIST:
  for geo_slug in GEO_LIST:
    
    print("Getting data for: %s: %s" % (indicator_slug, geo_slug))
    
    # get detailed data for indicator and community area
    data_dict = get_dict_from_url('topic_info/%s/%s' % (
        geo_slug, indicator_slug))
    
    for tDict in data_dict['area_data']:
      for kname in ['number', 'percent']:

        # Identify and/or init dataframe
        dfname = "from-%d-to-%d_%s" % (
            tDict['year_from'], tDict['year_to'], kname)
        if dfname not in dict_of_dfs.keys():
          dict_of_dfs[dfname] = DataFrame()

        # Now add data about this time period
        dict_of_dfs[dfname].loc[indicator_slug, geo_slug] = tDict[kname]

Getting data for: total-population: albany-park
Getting data for: total-population: ashburn
Getting data for: non-hispanic-african-american-or-black: albany-park
Getting data for: non-hispanic-african-american-or-black: ashburn


In [47]:
list(dict_of_dfs.keys())

['from-2012-to-2016_number',
 'from-2012-to-2016_percent',
 'from-2011-to-2015_number',
 'from-2011-to-2015_percent',
 'from-2006-to-2010_number',
 'from-2006-to-2010_percent']

In [48]:
dict_of_dfs['from-2012-to-2016_percent']

Unnamed: 0,albany-park,ashburn
total-population,,
non-hispanic-african-american-or-black,4.531265,47.873299


In [28]:
dict_of_dfs

[{'ave_annual_number': None,
  'category_group_name': 'Demography',
  'crude_rate': None,
  'cum_number': None,
  'demo_group': None,
  'id': 409459,
  'indicator': {'created_at': '2019-03-18T19:30:02.868Z',
   'id': 1234,
   'name': 'Total population',
   'partner': {'created_at': '2019-03-14T02:26:27.252Z',
    'description': None,
    'id': 1,
    'name': 'Chicago Department of Public Health',
    'slug': 'cdph',
    'updated_at': '2019-03-14T02:26:27.252Z'},
   'slug': 'total-population',
   'sub_category_id': 30,
   'updated_at': '2019-03-18T19:30:02.868Z'},
  'lower_95ci_crude_rate': None,
  'lower_95ci_percent': None,
  'lower_95ci_weight_percent': None,
  'map_key': None,
  'number': 51575,
  'percent': None,
  'sub_category_name': 'Age Group',
  'upper_95ci_crude_rate': None,
  'upper_95ci_percent': None,
  'upper_95ci_weight_percent': None,
  'weight_number': 51575,
  'weight_percent': None,
  'year_from': 2012,
  'year_to': 2016},
 {'ave_annual_number': None,
  'category_gro