In [11]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census

# Census API Key
from config import api_key
c = Census(api_key, year=2016)

In [12]:
# Run Census Search to retrieve data on all zip codes (2013 ACS5 Census)
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels
# ethnicities into columns
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E","B25058_001E","B25077_001E", "B17001_017E", 
                          "B17001B_002E", "B17001E_002E", 
                         "B17001I_002E", "B17012_003E", "B17012_009E",
                         "B17012_014E", "B17001G_002E", "B17001D_002E", "B17001A_002E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count (total)",
                                      "B25058_001E": "average rent",
                                      "B25077_001E": "Median Home Value",
                                      "B17001_017E": "Female Poverty",
                                      "B17001B_002E": "African poverty",
                                      "B17001E_002E": "Native Hawaiian/Pacific islander poverty",
                                      "B17001I_002E": "Hispanic origin - poverty",
                                      "B17012_003E": "Family maried - poverty", 
                                      "B17012_009E": "Single father - poverty",
                                      "B17012_014E": "Single mother - poverty",
                                      "B17001G_002E": "Mixed race people - poverty",
                                      "B17001D_002E": "Asian - poverty",
                                      "B17001A_002E": "Caucasian - poverty",
                                      "NAME": "Name", "zip code tabulation area": "Zipcode"})

# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rate"] = 100 * \
    census_pd["Poverty Count (total)"].astype(
        int) / census_pd["Population"].astype(int)

# add % ethnic population - poverty rate
census_pd["African poverty rate"] = 100* census_pd["African poverty"].astype(int) / census_pd["Population"].astype(int)
census_pd["Native Hawaiian / Pacific Islander poverty rate"] = 100* census_pd["Native Hawaiian/Pacific islander poverty"].astype(int) / census_pd["Population"].astype(int)
census_pd["Hispanic poverty rate"] = 100* census_pd["Hispanic origin - poverty"].astype(int) / census_pd["Population"].astype(int)
census_pd["Mix race poverty rate"] = 100* census_pd["Mixed race people - poverty"].astype(int) / census_pd["Population"].astype(int)
census_pd["Single father poverty rate"] = 100* census_pd["Single father - poverty"].astype(int) / census_pd["Population"].astype(int)
census_pd["Single mother poverty rate"] = 100* census_pd["Single mother - poverty"].astype(int) / census_pd["Population"].astype(int)
census_pd["Asian poverty rate"] = 100* census_pd["Asian - poverty"].astype(int) / census_pd["Population"].astype(int)
census_pd["Caucasian poverty rate"] = 100* census_pd["Caucasian - poverty"].astype(int) / census_pd["Population"].astype(int)

# Final DataFrame
census_pd = census_pd[["Zipcode", "Name","Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count (total)", "Poverty Rate","average rent", "Median Home Value", 
                       "African poverty rate", "Native Hawaiian / Pacific Islander poverty rate", "Hispanic poverty rate",
                      "Mix race poverty rate", "Single father poverty rate", "Single mother poverty rate", "Asian poverty rate",
                      "Caucasian poverty rate"]]

# Visualize
print(len(census_pd))
census_pd.head()

33120


Unnamed: 0,Zipcode,Name,Population,Median Age,Household Income,Per Capita Income,Poverty Count (total),Poverty Rate,average rent,Median Home Value,African poverty rate,Native Hawaiian / Pacific Islander poverty rate,Hispanic poverty rate,Mix race poverty rate,Single father poverty rate,Single mother poverty rate,Asian poverty rate,Caucasian poverty rate
0,1001,ZCTA5 01001,17423.0,45.0,56714.0,30430.0,1462.0,8.391207,835.0,202800.0,0.086093,0.0,3.128049,0.143488,0.0,0.843712,0.918326,7.007978
1,1002,ZCTA5 01002,29970.0,23.2,48923.0,26072.0,8351.0,27.864531,1108.0,344000.0,2.809476,0.02002,1.881882,0.583917,0.053387,1.044378,2.599266,21.491491
2,1003,ZCTA5 01003,11296.0,19.9,2499.0,3829.0,54.0,0.478045,1150.0,-666666666.0,0.274433,0.0,0.0,0.0,0.0,0.0,0.008853,0.13279
3,1005,ZCTA5 01005,5228.0,44.1,70568.0,32169.0,230.0,4.399388,761.0,213700.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.399388
4,1007,ZCTA5 01007,14888.0,42.5,80502.0,36359.0,1410.0,9.470715,787.0,258000.0,0.235089,0.0,1.30978,0.0,0.194788,0.711983,0.980656,8.25497


In [14]:
# Filter to California only
census_pd = census_pd[(census_pd['Zipcode'] >= str(90001)) & (census_pd['Zipcode'] <= str(96162))]
print(len(census_pd))
census_pd.head()

1763


Unnamed: 0,Zipcode,Name,Population,Median Age,Household Income,Per Capita Income,Poverty Count (total),Poverty Rate,average rent,Median Home Value,African poverty rate,Native Hawaiian / Pacific Islander poverty rate,Hispanic poverty rate,Mix race poverty rate,Single father poverty rate,Single mother poverty rate,Asian poverty rate,Caucasian poverty rate
29879,90001,ZCTA5 90001,57942.0,28.0,34323.0,11294.0,18902.0,32.622277,915.0,272200.0,3.299852,0.0,29.018674,0.47979,1.040696,2.820061,0.063857,10.954058
29880,90002,ZCTA5 90002,51826.0,27.5,32520.0,11212.0,18302.0,35.314321,902.0,248200.0,7.679543,0.00193,26.243584,0.501679,0.700421,3.573496,0.279782,12.146413
29881,90003,ZCTA5 90003,70208.0,28.0,31878.0,10611.0,24556.0,34.976071,958.0,268500.0,10.59281,0.0,24.42884,0.331871,0.965702,3.419838,0.044155,6.732851
29882,90004,ZCTA5 90004,63095.0,35.5,43180.0,29194.0,12919.0,20.475473,1044.0,838000.0,1.225137,0.0,13.363975,0.310643,0.505587,1.672082,3.650052,8.178144
29883,90005,ZCTA5 90005,39338.0,33.9,31485.0,20265.0,11520.0,29.284661,943.0,672600.0,1.769282,0.0,20.04169,0.470283,0.716864,2.854746,6.111139,5.389191


In [15]:
# replace negative numbers with NaN
census= census_pd.replace(-666666666.0, np.nan)
clear_census = census.dropna(how="any")
print(len(clear_census))
clear_census.head()

1493


Unnamed: 0,Zipcode,Name,Population,Median Age,Household Income,Per Capita Income,Poverty Count (total),Poverty Rate,average rent,Median Home Value,African poverty rate,Native Hawaiian / Pacific Islander poverty rate,Hispanic poverty rate,Mix race poverty rate,Single father poverty rate,Single mother poverty rate,Asian poverty rate,Caucasian poverty rate
29879,90001,ZCTA5 90001,57942.0,28.0,34323.0,11294.0,18902.0,32.622277,915.0,272200.0,3.299852,0.0,29.018674,0.47979,1.040696,2.820061,0.063857,10.954058
29880,90002,ZCTA5 90002,51826.0,27.5,32520.0,11212.0,18302.0,35.314321,902.0,248200.0,7.679543,0.00193,26.243584,0.501679,0.700421,3.573496,0.279782,12.146413
29881,90003,ZCTA5 90003,70208.0,28.0,31878.0,10611.0,24556.0,34.976071,958.0,268500.0,10.59281,0.0,24.42884,0.331871,0.965702,3.419838,0.044155,6.732851
29882,90004,ZCTA5 90004,63095.0,35.5,43180.0,29194.0,12919.0,20.475473,1044.0,838000.0,1.225137,0.0,13.363975,0.310643,0.505587,1.672082,3.650052,8.178144
29883,90005,ZCTA5 90005,39338.0,33.9,31485.0,20265.0,11520.0,29.284661,943.0,672600.0,1.769282,0.0,20.04169,0.470283,0.716864,2.854746,6.111139,5.389191


In [16]:
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
clear_census.to_csv("census_data_updatePD.csv", encoding="utf-8", index=False)