In [1]:
# Dependencies
import pandas as pd
import json
import requests


In [2]:
# Base url for grabbing data from City of Chicago (Income information)
base_url = "https://data.cityofchicago.org/resource/iqnk-2tcu.json"

# Get data from base_url in json format
health_data = requests.get(base_url).json()

In [3]:
# Check that we pulled data correctly from API
health_data[0]

{'assault_homicide': '7.7',
 'below_poverty_level': '22.7',
 'birth_rate': '16.4',
 'breast_cancer_in_females': '23.3',
 'cancer_all_sites': '176.9',
 'childhood_blood_lead_level_screening': '364.7',
 'childhood_lead_poisoning': '0.5',
 'colorectal_cancer': '25.3',
 'community_area': '1',
 'community_area_name': 'Rogers Park',
 'crowded_housing': '7.9',
 'dependency': '28.8',
 'diabetes_related': '77.1',
 'firearm_related': '5.2',
 'general_fertility_rate': '62',
 'gonorrhea_in_females': '322.5',
 'gonorrhea_in_males': '423.3',
 'infant_mortality_rate': '6.4',
 'low_birth_weight': '11',
 'lung_cancer': '36.7',
 'no_high_school_diploma': '18.1',
 'per_capita_income': '23714',
 'prenatal_care_beginning_in_first_trimester': '73',
 'preterm_births': '11.2',
 'prostate_cancer_in_males': '21.7',
 'stroke_cerebrovascular_disease': '33.7',
 'teen_birth_rate': '40.8',
 'tuberculosis': '11.4',
 'unemployment': '7.5'}

In [4]:
# Create data frame from json data and orient by columns
income_sum_df = pd.DataFrame.from_dict(health_data, orient='columns')


In [5]:
# Print snapshot of Income Summary DataFrame
income_sum_df.head()

Unnamed: 0,assault_homicide,below_poverty_level,birth_rate,breast_cancer_in_females,cancer_all_sites,childhood_blood_lead_level_screening,childhood_lead_poisoning,colorectal_cancer,community_area,community_area_name,...,lung_cancer,no_high_school_diploma,per_capita_income,prenatal_care_beginning_in_first_trimester,preterm_births,prostate_cancer_in_males,stroke_cerebrovascular_disease,teen_birth_rate,tuberculosis,unemployment
0,7.7,22.7,16.4,23.3,176.9,364.7,0.5,25.3,1,Rogers Park,...,36.7,18.1,23714,73.0,11.2,21.7,33.7,40.8,11.4,7.5
1,5.8,15.1,17.3,20.2,155.9,331.4,1.0,17.3,2,West Ridge,...,36.0,19.6,21375,71.1,8.3,14.2,34.7,29.9,8.9,7.9
2,5.4,22.7,13.1,21.3,183.3,353.7,0.5,20.5,3,Uptown,...,50.5,13.6,32355,77.7,10.3,25.2,41.7,35.1,13.6,7.7
3,5.0,9.5,17.1,21.7,153.2,273.3,0.4,8.6,4,Lincoln Square,...,43.1,12.5,35503,80.5,9.7,27.6,36.9,38.4,8.5,6.8
4,1.0,7.1,22.4,16.6,152.1,178.1,0.9,26.1,5,North Center,...,42.4,5.4,51615,80.4,9.8,15.1,41.6,8.4,1.9,4.5


In [6]:
# Rename columns that we will use later for income levels
income_sum_df.rename(columns={'community_area_name': 'COMMUNITY AREA NAME',
                              'per_capita_income': 'PER CAPITA INCOME'}, inplace=True)

# Check renamed income_sum_df
income_sum_df

Unnamed: 0,assault_homicide,below_poverty_level,birth_rate,breast_cancer_in_females,cancer_all_sites,childhood_blood_lead_level_screening,childhood_lead_poisoning,colorectal_cancer,community_area,COMMUNITY AREA NAME,...,lung_cancer,no_high_school_diploma,PER CAPITA INCOME,prenatal_care_beginning_in_first_trimester,preterm_births,prostate_cancer_in_males,stroke_cerebrovascular_disease,teen_birth_rate,tuberculosis,unemployment
0,7.7,22.7,16.4,23.3,176.9,364.7,0.5,25.3,1,Rogers Park,...,36.7,18.1,23714,73,11.2,21.7,33.7,40.8,11.4,7.5
1,5.8,15.1,17.3,20.2,155.9,331.4,1,17.3,2,West Ridge,...,36,19.6,21375,71.1,8.3,14.2,34.7,29.9,8.9,7.9
2,5.4,22.7,13.1,21.3,183.3,353.7,0.5,20.5,3,Uptown,...,50.5,13.6,32355,77.7,10.3,25.2,41.7,35.1,13.6,7.7
3,5,9.5,17.1,21.7,153.2,273.3,0.4,8.6,4,Lincoln Square,...,43.1,12.5,35503,80.5,9.7,27.6,36.9,38.4,8.5,6.8
4,1,7.1,22.4,16.6,152.1,178.1,0.9,26.1,5,North Center,...,42.4,5.4,51615,80.4,9.8,15.1,41.6,8.4,1.9,4.5
5,1.4,10.5,13.5,20.1,126.9,179.2,0.4,13,6,Lake View,...,32.5,2.9,58227,79.1,8.1,17,24.4,15.8,3.2,4.7
6,0.7,11.8,13.2,23.7,152.9,173.3,0.6,16.7,7,Lincoln Park,...,40,4.3,71403,75.7,7.8,27.3,35.3,2.1,1.2,4.5
7,3.7,13.4,10.7,24,142.7,311.2,0.1,15.1,8,Near North Side,...,33.6,3.4,87163,69.7,9.6,15.1,22,34,5.5,5.2
8,0,5.1,11.3,13.8,189.7,134.7,0,15.1,9,Edison Park,...,45.2,8.5,38337,86.6,12.6,28,38.9,3.9,1.8,7.4
9,4.7,5.9,10.4,20.7,180.8,163.1,0,18.9,10,Norwood Park,...,44.5,13.5,31659,89.4,8.3,26.4,45.2,3.4,1.6,7.3


In [7]:
# Create new data frame for relevant columns
communities_incomes_df = income_sum_df[["COMMUNITY AREA NAME", "PER CAPITA INCOME"]]

# Check paired down communities_incomes_df 
communities_incomes_df.head()

Unnamed: 0,COMMUNITY AREA NAME,PER CAPITA INCOME
0,Rogers Park,23714
1,West Ridge,21375
2,Uptown,32355
3,Lincoln Square,35503
4,North Center,51615


In [8]:
# Save community area and per capita income data to separate csv file
communities_incomes_df.to_csv("community_and_per_capita_income.csv")