In [1]:
from census import Census
from config import (census_key, google_key)
import gmaps
import numpy as np
import pandas as pd
import requests
import time
from us import states
c = Census(census_key, year=2017)

In [2]:
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E"), {'for': 'zip code tabulation area:*'})

census_pd = pd.DataFrame(census_data)

census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "NAME": "Name", "zip code tabulation area": "Zipcode"})

census_pd["Poverty Rate"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

census_pd = census_pd[["Zipcode", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rate"]]

print(len(census_pd))
census_pd.head()

33120


Unnamed: 0,Zipcode,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate
0,601,17599.0,38.9,11757.0,7041.0,11282.0,64.105915
1,602,39209.0,40.9,16190.0,8978.0,20428.0,52.100283
2,603,50135.0,40.4,16645.0,10897.0,25176.0,50.216416
3,606,6304.0,42.8,13387.0,5960.0,4092.0,64.911168
4,610,27590.0,41.4,18741.0,9266.0,12553.0,45.498369


In [3]:
zip_poverty_df = census_pd[["Zipcode", "Poverty Rate"]]
zip_poverty_df.head()

Unnamed: 0,Zipcode,Poverty Rate
0,601,64.105915
1,602,52.100283
2,603,50.216416
3,606,64.911168
4,610,45.498369


In [4]:
zip_poverty_df.count()

Zipcode         33120
Poverty Rate    32803
dtype: int64

In [5]:
zip_df = pd.read_csv("../jupyter_notebook/zip.csv", dtype="object", encoding="utf-8")

zip_df.head()

Unnamed: 0,Zip,Latitude,Longitude
0,71937,34.398483,-94.39398
1,72044,35.624351,-92.16056
2,56171,43.660847,-94.74357
3,49430,43.010337,-85.89754
4,52585,41.194129,-91.98027


In [6]:
zip_df = zip_df.rename(columns={'Zip': 'Zipcode'})
zip_df.head()

Unnamed: 0,Zipcode,Latitude,Longitude
0,71937,34.398483,-94.39398
1,72044,35.624351,-92.16056
2,56171,43.660847,-94.74357
3,49430,43.010337,-85.89754
4,52585,41.194129,-91.98027


In [7]:
zip_df.dtypes

Zipcode      object
Latitude     object
Longitude    object
dtype: object

In [8]:
zip_df["Latitude"] = zip_df.Latitude.astype(float)

In [9]:
zip_df["Longitude"] = zip_df.Longitude.astype(float)

In [10]:
# Merge the two data sets along zip code
zip_complete = pd.merge(
    zip_poverty_df, zip_df, how="right", on=["Zipcode", "Zipcode"])

# Save the revised Data Frame as a csv
zip_complete.to_csv(
    "../jupyter_notebook/zip_complete.csv", encoding="utf-8", index=False)

# Visualize
zip_complete.head()

Unnamed: 0,Zipcode,Poverty Rate,Latitude,Longitude
0,10001,17.584666,40.750742,-73.99653
1,10002,27.223612,40.71704,-73.987
2,10003,7.219042,40.732509,-73.98935
3,10004,3.846154,40.699226,-74.04118
4,10005,10.506912,40.706019,-74.00858


In [11]:
zip_complete.count()

Zipcode         43191
Poverty Rate    30057
Latitude        43191
Longitude       43191
dtype: int64

In [12]:
zip_complete.dtypes

Zipcode          object
Poverty Rate    float64
Latitude        float64
Longitude       float64
dtype: object

In [13]:
drop_na_df = zip_complete.dropna()
drop_na_df.head()

Unnamed: 0,Zipcode,Poverty Rate,Latitude,Longitude
0,10001,17.584666,40.750742,-73.99653
1,10002,27.223612,40.71704,-73.987
2,10003,7.219042,40.732509,-73.98935
3,10004,3.846154,40.699226,-74.04118
4,10005,10.506912,40.706019,-74.00858


In [14]:
drop_na_df.count()

Zipcode         30057
Poverty Rate    30057
Latitude        30057
Longitude       30057
dtype: int64

In [15]:
povlatlon_df = drop_na_df[["Poverty Rate", "Latitude", "Longitude"]]
povlatlon_df.head()

Unnamed: 0,Poverty Rate,Latitude,Longitude
0,17.584666,40.750742,-73.99653
1,27.223612,40.71704,-73.987
2,7.219042,40.732509,-73.98935
3,3.846154,40.699226,-74.04118
4,10.506912,40.706019,-74.00858


In [16]:
gmaps.configure(api_key=google_key)

In [17]:
locations = povlatlon_df[["Latitude", "Longitude"]].astype(float)

poverty_rate = povlatlon_df["Poverty Rate"].astype(float)

In [18]:
fig = gmaps.figure()
heat_layer = gmaps.heatmap_layer(
    locations,
    weights=poverty_rate,
    dissipating=True,
    max_intensity=20,
    point_radius = 1)
heat_layer.dissipating = True
heat_layer.max_intensity = 20
heat_layer.point_radius = 1
fig.add_layer(heat_layer)
fig

Figure(layout=FigureLayout(height='420px'))

In [19]:
povlatlon_df.to_csv("poverty.csv", index=False, header=True)