In [11]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import gmaps
from census import Census 
from us import states

from config_pauline import g_keypa
# Census API Key
from config_pauline import census_keypa
c = Census(census_keypa, year=2018)
# print(census_keypa)


In [12]:
# Run Census Search to retrieve data on all zip codes 
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels

census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E", "B01002_002E",
                          "B01002_003E","B19301_001E"),
                           {'for': 'zip code tabulation area:*'})
                         
census_data
# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B01002_002E": "Median Male Age",
                                      "B01002_003E": "Median Female Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",                                      
                                      "NAME": "Name", "zip code tabulation area": "Zipcode"})
census_pd = census_pd[["Zipcode", "Population","Household Income", "Median Age", "Median Male Age","Median Female Age", "Per Capita Income"]]

# Visualize
print(len(census_pd))
census_pd.head()
                         



33120


Unnamed: 0,Zipcode,Population,Household Income,Median Age,Median Male Age,Median Female Age,Per Capita Income
0,601,17242.0,13092.0,40.5,39.5,41.3,6999.0
1,602,38442.0,16358.0,42.3,41.9,42.8,9277.0
2,603,48814.0,16603.0,41.1,39.1,42.9,11307.0
3,606,6437.0,12832.0,43.3,43.2,43.4,5943.0
4,610,27073.0,19309.0,42.1,40.4,44.2,10220.0


In [13]:
census_pd.dtypes

Zipcode               object
Population           float64
Household Income     float64
Median Age           float64
Median Male Age      float64
Median Female Age    float64
Per Capita Income    float64
dtype: object

In [14]:
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
census_pd.to_csv("census_data_states.csv", encoding="utf-8", index=False)

In [19]:
# Import the us zip data. Use dtype="object" to ensure all data is read in accurately.
us_zips = pd.read_csv("Copy of uszips.csv",converters={'zip': lambda x: str(x)})

# Visualize
us_zips.head()

us_zips = us_zips.rename(columns={"zip":"Zipcode"})
us_zips

Unnamed: 0,Zipcode,lat,lng,city,state_id,state_name
0,601,18.18004,-66.75218,Adjuntas,PR,Puerto Rico
1,602,18.36073,-67.17517,Aguada,PR,Puerto Rico
2,603,18.45439,-67.12202,Aguadilla,PR,Puerto Rico
3,606,18.16724,-66.93828,Maricao,PR,Puerto Rico
4,610,18.29032,-67.12243,Anasco,PR,Puerto Rico
...,...,...,...,...,...,...
33092,99923,55.97796,-130.03671,Hyder,AK,Alaska
33093,99925,55.55767,-132.97627,Klawock,AK,Alaska
33094,99926,55.12617,-131.48928,Metlakatla,AK,Alaska
33095,99927,56.25100,-133.37571,Point Baker,AK,Alaska


In [20]:
# us_zips["Zipcode"]=us_zips["Zipcode"].astype(int)
us_zips.dtypes

Zipcode        object
lat           float64
lng           float64
city           object
state_id       object
state_name     object
dtype: object

In [21]:
# Merge the two data sets along zip code
census_data_complete = census_pd.merge(us_zips, how="left", left_on="Zipcode", right_on="Zipcode")


# Visualize
census_data_complete

Unnamed: 0,Zipcode,Population,Household Income,Median Age,Median Male Age,Median Female Age,Per Capita Income,lat,lng,city,state_id,state_name
0,00601,17242.0,13092.0,40.5,39.5,41.3,6999.0,,,,,
1,00602,38442.0,16358.0,42.3,41.9,42.8,9277.0,,,,,
2,00603,48814.0,16603.0,41.1,39.1,42.9,11307.0,,,,,
3,00606,6437.0,12832.0,43.3,43.2,43.4,5943.0,,,,,
4,00610,27073.0,19309.0,42.1,40.4,44.2,10220.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
33115,87515,363.0,,44.2,47.8,22.9,,36.50231,-106.37718,Canjilon,NM,New Mexico
33116,87518,9.0,,-666666666.0,-666666666.0,-666666666.0,,36.50733,-106.56265,Cebolla,NM,New Mexico
33117,87511,2896.0,,36.0,44.3,27.8,,36.10226,-105.92584,Alcalde,NM,New Mexico
33118,87578,245.0,,48.0,63.0,39.6,,36.03903,-105.76530,Truchas,NM,New Mexico


In [24]:
# clean up the data
census_data_complete.dropna()

Unnamed: 0,Zipcode,Population,Household Income,Median Age,Median Male Age,Median Female Age,Per Capita Income,lat,lng,city,state_id,state_name
2558,10001,22924.0,88526.0,36.7,40.1,34.3,84765.0,40.75061,-73.99716,New York,NY,New York
2559,10002,74993.0,35859.0,43.6,40.7,46.0,32694.0,40.71586,-73.98613,New York,NY,New York
2560,10003,54682.0,112131.0,32.3,33.3,30.7,92781.0,40.73180,-73.98911,New York,NY,New York
2561,10004,3028.0,157645.0,37.1,38.3,37.0,122165.0,40.69465,-74.02106,New York,NY,New York
2562,10005,8831.0,173333.0,30.1,30.4,29.9,106702.0,40.70616,-74.00907,New York,NY,New York
...,...,...,...,...,...,...,...,...,...,...,...,...
33079,99922,330.0,34028.0,39.5,39.4,39.8,18213.0,55.30211,-133.03248,Hydaburg,AK,Alaska
33081,99925,927.0,57375.0,43.6,42.0,45.1,25840.0,55.55767,-132.97627,Klawock,AK,Alaska
33082,99926,1635.0,53409.0,34.5,34.6,34.3,22453.0,55.12617,-131.48928,Metlakatla,AK,Alaska
33083,99927,38.0,-666666666.0,55.5,-666666666.0,-666666666.0,13658.0,56.25100,-133.37571,Point Baker,AK,Alaska


In [29]:
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
census_data_complete.to_csv("census_data_complete.csv", encoding="utf-8", index=False)

In [30]:
#configure gmaps
gmaps.configure(api_key= g_keypa)

In [28]:
# # Create a map using uszips coordinates to set markers
# marker_locations = us_zips[['lat', 'lng']].astype(int)

# # # Create a marker_layer using the poverty list to fill the info box
# # fig = gmaps.figure()
# # markers = gmaps.marker_layer(marker_locations,
# #     info_box_content=[f"Poverty Rate: {rate}" for rate in poverty_rate])
# # fig.add_layer(markers)
# # fig