In [143]:
from census import Census
from config import (census_api_key)
import pandas as pd
import requests
import json
import os
import gmaps
import csv

In [296]:
# url for US Census data api call with codes for desired data fields

url = "https://api.census.gov/data/2017/acs/acs5?get=B01003_001E,B01001_020E,B01001_021E,B01001_022E,B01001_023E,B01001_024E,B01001_025E,B01001_044E,B01001_045E,B01001_046E,B01001_047E,B01001_048E,B01001_049E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B02001_007E,B01001I_001E,B09020_001E,B15003_002E,B15003_017E,B15003_022E,B15003_023E,B15003_025E,B17001_002E,B27001_002E,B27001_030E,NAME&for=place:*&key=" + census_api_key

# Make api call and get json object of returned data 
response = requests.get(url)
response_json = response.json()

#Convert json object and make first row column names
census_df = pd.DataFrame(response_json)

census_df.columns = census_df.iloc[0]
census_df = census_df[1:]

census_df.head()


Unnamed: 0,B01003_001E,B01001_020E,B01001_021E,B01001_022E,B01001_023E,B01001_024E,B01001_025E,B01001_044E,B01001_045E,B01001_046E,...,B15003_017E,B15003_022E,B15003_023E,B15003_025E,B17001_002E,B27001_002E,B27001_030E,NAME,state,place
1,174,0,0,0,0,0,0,0,0,0,...,0,0,0,0,45,69,105,"Abanda CDP, Alabama",1,100
2,2594,51,40,35,60,40,29,8,27,109,...,567,112,112,4,500,1248,1164,"Abbeville city, Alabama",1,124
3,4404,49,67,84,46,67,0,25,65,201,...,755,283,103,38,701,2063,2326,"Adamsville city, Alabama",1,460
4,725,15,12,38,13,5,0,24,14,29,...,171,59,38,0,246,385,340,"Addison town, Alabama",1,484
5,318,0,5,2,0,1,6,2,2,14,...,52,5,1,0,155,139,179,"Akron town, Alabama",1,676


In [297]:
# Dictionary to change dataframe column names from census code to meaningful names
column_names = {"B01003_001E":"total_pop", "B01001_020E":"male_65_66", "B01001_021E":"male_67_69",
                "B01001_022E":"male_70_74", "B01001_023E":"male_75_79", "B01001_024E":"male_80_84",
                "B01001_025E":"male_over_85", "B01001_044E":"female_65_66", "B01001_045E":"female_67_69",
                "B01001_046E":"female_70_74", "B01001_047E":"female_75_79", "B01001_048E":"female_80_84",
                "B01001_049E":"female_over_85", "B02001_002E":"white_pop", "B02001_003E":"black_pop",
                "B02001_004E":"native_amer_pop", "B02001_005E":"asian_pop", "B02001_006E":"pac_island_pop",
                "B02001_007E":"other_race_pop", "B01001I_001E":"hispanic_pop", "B15003_002E":"no_high_school",
                "B15003_017E":"high_school_grad", "B15003_022E":"bachelor_deg", "B15003_023E":"master_deg", 
                "B15003_025E":"doctorate_deg", "B17001_002E":"below_poverty", "B27001_002E":"male_w_health_ins",
                "B27001_030E":"female_w_health_ins", "place":"city_FIPS"}

census_df.rename(columns=column_names, 
                 inplace=True)

# Convert all columns with numeric values to numneric in the dataframe
for column in census_df.iloc[:, 0:29]:
    census_df[column] = pd.to_numeric(census_df[column])

# Combine health insurance data for male and female into one column
census_df["with_health_ins"] = census_df["male_w_health_ins"] + census_df["female_w_health_ins"]

# Combine city and state FIPS codes into one column to match format of the health data set
census_df["city_FIPS"] = census_df["state"] + census_df["city_FIPS"]

# Combine all age range columns into single column to get single value of population over 65
census_df["pop_over_65"] = census_df.iloc[:,1:13].sum(axis=1)

# Combine all columns of number of people with different types of degrees into single column of number with degrees
census_df["with_degree"] = census_df.iloc[:, 23:26].sum(axis=1)

# Select desired columns for final data set
census_demographics_df=census_df[["total_pop", "with_health_ins", "pop_over_65", "white_pop", "black_pop", "native_amer_pop",
                                "asian_pop", "pac_island_pop", "other_race_pop", "hispanic_pop", "no_high_school",
                                "high_school_grad", "with_degree", "below_poverty", "city_FIPS"]]

# Divide all columns with population numbers by the total population to convert to percentage
for column in census_demographics_df.iloc[:, 1:14]:
    census_demographics_df[column] = census_demographics_df[column]/census_demographics_df.iloc[:,0]
census_demographics_df.head()