In [9]:
# Dependencies and Setup
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from matplotlib import rcParams
import scipy.stats as sts
import os
from collections import Counter
import requests
import json
from census import Census
from uszipcode import SearchEngine

# API Key
from api_keys import census_key


## Using Census API to get data for different US zip codes 

In [3]:
#Recording 2020 census data
c = Census(census_key, year=2019)
census_data = c.acs5.get(("NAME", "B19013_001E", "B02001_002E","B02001_003E",
                           "B03001_003E","B02001_005E","B01003_001E", "B01002_001E",
                          "B19301_001E","B17001_002E","B23025_005E","B25077_001E","B08136_003E",
                          "B25035_001E","B25088_002E","B25064_001E","B08301_001E",
                          "B08301_010E","B08301_003E","B08136_007E","B15003_017E",
                          "B15003_022E","B15003_002E","B16001_002E","B16001_003E"),
                          {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_df = pd.DataFrame(census_data)

# Column Reordering
census_df = census_df.rename(columns={"B01003_001E": "Population",
                                      "B02001_002E":"Population White",
                                      "B02001_003E": "Population Black",
                                      "B03001_003E": "Population Hispanic",
                                      "B02001_005E": "Population Asian",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B23025_005E": "Unemployment Count",
                                      "B08301_001E":"Total Transport",
                                      "B08301_010E":"Public Transport",
                                      "B08301_003E":"Personal Transport",
                                      "B08136_007E":"Commute Time Public",
                                      "B08136_003E":"Commute Time Car",
                                      "B15003_017E":"High School Count",
                                      "B15003_022E":"College Count",
                                      "B15003_002E":"Uneducated Count",
                                      "NAME": "Name", "zip code tabulation area": "Zipcode"})

# Add in Poverty Rate (Poverty Count / Population)
census_df["Poverty Rate"] = (100 * 
                census_df["Poverty Count"].astype(
                    int) / census_df["Population"].astype(int))

# Add in Employment Rate (Employment Count / Population)
census_df["Unemployment Rate"] = (100 * 
                census_df["Unemployment Count"].astype(
                    int) / census_df["Population"].astype(int))

# Add in High school education Rate (High School Count / Population)
census_df["High School Rate"] = (100 * 
                census_df["High School Count"].astype(
                    int) / census_df["Population"].astype(int))

# Add in College Education Rate (College Count / Population)
census_df["College Rate"] = (100 * 
                census_df["College Count"].astype(
                    int) / census_df["Population"].astype(int))

# Add in Uneducated Rate (uneducated Count / Population)
census_df["Uneducated Rate"] = (100 * 
                census_df["Uneducated Count"].astype(
                    int) / census_df["Population"].astype(int))

# Add in Public Transport Rate (Public Transport / Total Transportation)
census_df["Public Transport Rate"] = (100 * 
                census_df["Public Transport"].astype(
                    int) / census_df["Total Transport"].astype(int))

# Add in Personal Transport Rate (Personal Transport  / Total transportation)
census_df["Personal Transport Rate"] = (100 * 
                census_df["Personal Transport"].astype(
                    int) / census_df["Total Transport"].astype(int))
      
# Add in White Population Rate
census_df["White Population Rate"] = (100 * 
                census_df["Population White"].astype(
                    int) / census_df["Population"].astype(int))

# Add in Black Population Rate
census_df["Black Population Rate"] = (100 * 
                census_df["Population Black"].astype(
                    int) / census_df["Population"].astype(int))

# Add in Hispanic Population Rate (Population Hispanic / Total Population)
census_df["Hispanic Population Rate"] = (100 * 
                census_df["Population Hispanic"].astype(
                    int) / census_df["Population"].astype(int))

# Add in Asian Population Rate (Population Asian  / Total population)
census_df["Asian Population Rate"] = (100 * 
                census_df["Population Asian"].astype(
                    int) / census_df["Population"].astype(int))

# Final DataFrame
census_df = census_df[["Zipcode", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Rate", 
                       "Unemployment Rate", "Public Transport Rate",
                      "Personal Transport Rate","Commute Time Public","Commute Time Car",
                    "High School Rate","College Rate","Uneducated Rate",
                       "White Population Rate","Black Population Rate",
                      "Hispanic Population Rate","Asian Population Rate"]]


In [10]:
#adding other data from zipcode source

In [4]:
#Adding new city, county, lat, lng, housing units columns  to census 2012
census_df["City"]=''
census_df["County"]=''
census_df['Lat']=''
census_df['Lng']=''
census_df['State']=''
for index, row in census_df.iterrows():

    # get zipcode type from df
    zipcode = int(row['Zipcode'])
    #Searching by zipcode
    search = SearchEngine()
    results = search.by_zipcode(str(zipcode))
    #adding to City columns
    try:
        census_df.loc[index, 'City'] = results.city
        census_df.loc[index, 'County'] = results.county
        census_df.loc[index, 'Lat'] = results.lat
        census_df.loc[index, 'Lng'] = results.lng
        census_df.loc[index, 'State'] = results.state
    except (KeyError, IndexError):
        print("Missing field/result for... skipping.")

In [5]:
#making a combined csv file for the added columns
census_df.to_csv("data/census_data.csv", encoding="utf-8", index=False)