In [1]:
import warnings
warnings.filterwarnings('ignore')

# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
from citipy import citipy
import gmaps
import os

# Census API Key
from config import (census_api_key, g_key)
c = Census(census_api_key, year=2014)

In [2]:
# Run Census Search to retrieve data on all zip codes (2013 ACS5 Census)
# See: https://github.com/datamade/census for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels
census_data = c.acs5.get(("NAME", "B08301_001E", "B01003_001E", "B01002_001E", "B08301_010E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B08301_001E": "Transportation (total)",
                                      "B08301_010E": "Public Transportation",
                                      "NAME": "Name", "zip code tabulation area": "Zipcode"})

# Add in Public Transportation Rate (Public Transportation / Transportation (total))
census_pd["Public Transportation Rate"] = 100 * \
    census_pd["Public Transportation"].astype(
        int) / census_pd["Transportation (total)"].astype(int)


# Final DataFrame
census_pd = census_pd[["Zipcode", "Population", "Median Age", "Transportation (total)",
                       "Public Transportation","Public Transportation Rate"]]

# Visualize
print(len(census_pd))
census_pd.to_csv("Resources/census_data.csv", encoding="utf-8", index=False)
census_pd.head()

33120


Unnamed: 0,Zipcode,Population,Median Age,Transportation (total),Public Transportation,Public Transportation Rate
0,1740,5019.0,42.7,2623.0,72.0,2.744949
1,1741,4967.0,48.5,2211.0,14.0,0.633198
2,1742,18948.0,46.7,7623.0,571.0,7.490489
3,1745,309.0,50.4,162.0,15.0,9.259259
4,1746,14008.0,42.6,6904.0,260.0,3.765933


In [3]:
# Add columns for city, latitude and longitude
census_pd["City"] = ""
census_pd["State"] = ""
census_pd["Lat"] = ""
census_pd["Lng"] = ""
census_pd.head()

Unnamed: 0,Zipcode,Population,Median Age,Transportation (total),Public Transportation,Public Transportation Rate,City,State,Lat,Lng
0,1740,5019.0,42.7,2623.0,72.0,2.744949,,,,
1,1741,4967.0,48.5,2211.0,14.0,0.633198,,,,
2,1742,18948.0,46.7,7623.0,571.0,7.490489,,,,
3,1745,309.0,50.4,162.0,15.0,9.259259,,,,
4,1746,14008.0,42.6,6904.0,260.0,3.765933,,,,


In [4]:
# filtering census data to get the population size of zip codes between 30k and 50k
filtered_small_census_pd = census_pd.loc[(census_pd["Population"]>=30000) & (census_pd["Population"]<=50000)]
filtered_small_census_pd.count()

Zipcode                       2555
Population                    2555
Median Age                    2555
Transportation (total)        2555
Public Transportation         2555
Public Transportation Rate    2555
City                          2555
State                         2555
Lat                           2555
Lng                           2555
dtype: int64

In [5]:
# create a params dict that will be updated with new city each iteration
params = {"key": g_key}
base_url = "https://maps.googleapis.com/maps/api/geocode/json"

count = 0
# Loop through the cities_pd and run a lat/long search for each city
for index, row in filtered_small_census_pd.iterrows():
    
#     if count == 15:
#         break

    zipcode = row['Zipcode']

    # update address key value
    params['address'] = f"{zipcode}"

    # make request
    cities_lat_lng = requests.get(base_url, params=params)
    
#     convert to json
    cities_lat_lng = cities_lat_lng.json()
    
    try:
        filtered_small_census_pd.loc[index, "Lat"] = cities_lat_lng["results"][0]["geometry"]["location"]["lat"]
        filtered_small_census_pd.loc[index, "Lng"] = cities_lat_lng["results"][0]["geometry"]["location"]["lng"]
        filtered_small_census_pd.loc[index, "City"] = cities_lat_lng["results"][0]["address_components"][1]["long_name"]
        filtered_small_census_pd.loc[index, "State"] = cities_lat_lng["results"][0]["address_components"][3]["short_name"]
#         print(f"count {count}")
    except (KeyError, IndexError):
        print("Missing field/result... skipping.")
    
    count = count + 1

# Print to csv
filtered_small_census_pd.to_csv("Resources/census_data_with_city_lat_lng.csv", encoding="utf-8", index=False)
    
# Visualize to confirm lat lng city appear
filtered_small_census_pd.head()

Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/

Unnamed: 0,Zipcode,Population,Median Age,Transportation (total),Public Transportation,Public Transportation Rate,City,State,Lat,Lng
8,1752,39141.0,40.0,22183.0,331.0,1.492134,Marlborough,MA,42.3474,-71.5368
12,1760,34043.0,41.7,18658.0,1611.0,8.634366,Natick,MA,42.2775,-71.3468
19,1801,39315.0,40.0,21322.0,1005.0,4.713442,Woburn,MA,42.4885,-71.133
21,1810,34251.0,41.5,16374.0,811.0,4.952974,Andover,Essex County,42.6569,-71.1408
22,1821,31157.0,40.7,16757.0,481.0,2.870442,Billerica,MA,42.5465,-71.2518


In [6]:
filtered_small_census_pd = filtered_small_census_pd.dropna(how="any")
filtered_small_census_pd = filtered_small_census_pd.sort_values("Public Transportation Rate", ascending=False)
filtered_small_census_pd.to_csv("Resources/30-50k_population_top_public_trans.csv", encoding="utf-8", index=False)
filtered_small_census_pd.head()

Unnamed: 0,Zipcode,Population,Median Age,Transportation (total),Public Transportation,Public Transportation Rate,City,State,Lat,Lng
4161,10035,34760.0,34.7,12680.0,9752.0,76.908517,Manhattan,New York County,40.7941,-73.9272
4152,10026,38372.0,34.3,16906.0,12748.0,75.405182,Manhattan,New York County,40.8032,-73.9526
4394,11106,38633.0,36.1,20206.0,14520.0,71.859844,Long Island City,Queens County,40.7595,-73.9272
4214,10454,38751.0,28.6,12048.0,8572.0,71.148738,The Bronx,NY,40.8023,-73.9154
4411,11217,38567.0,35.4,21880.0,15519.0,70.927788,Brooklyn,NY,40.6816,-73.9786


In [7]:
# filtering census data to get the population size of zip codes between 100k and 200k
filtered_large_census_pd = census_pd.loc[(census_pd["Population"]>=100000) & (census_pd["Population"]<=200000)]
filtered_large_census_pd.count()

Zipcode                       12
Population                    12
Median Age                    12
Transportation (total)        12
Public Transportation         12
Public Transportation Rate    12
City                          12
State                         12
Lat                           12
Lng                           12
dtype: int64

In [8]:
# create a params dict that will be updated with new city each iteration
params = {"key": g_key}
base_url = "https://maps.googleapis.com/maps/api/geocode/json"

count = 0
# Loop through the cities_pd and run a lat/long search for each city
for index, row in filtered_large_census_pd.iterrows():
    
#     if count == 15:
#         break

    zipcode = row['Zipcode']

    # update address key value
    params['address'] = f"{zipcode}"

    # make request
    cities_lat_lng = requests.get(base_url, params=params)
    
#     convert to json
    cities_lat_lng = cities_lat_lng.json()
    
    try:
        filtered_large_census_pd.loc[index, "Lat"] = cities_lat_lng["results"][0]["geometry"]["location"]["lat"]
        filtered_large_census_pd.loc[index, "Lng"] = cities_lat_lng["results"][0]["geometry"]["location"]["lng"]
        filtered_large_census_pd.loc[index, "City"] = cities_lat_lng["results"][0]["address_components"][1]["long_name"]
        filtered_large_census_pd.loc[index, "State"] = cities_lat_lng["results"][0]["address_components"][3]["short_name"]
#         print(f"count {count}")
    except (KeyError, IndexError):
        print("Missing field/result... skipping.")
    
    count = count + 1

# Print to csv
filtered_large_census_pd.to_csv("Resources/census_data_with_city_lat_lng.csv", encoding="utf-8", index=False)
    
# Visualize to confirm lat lng city appear
filtered_large_census_pd.head()

Missing field/result... skipping.


Unnamed: 0,Zipcode,Population,Median Age,Transportation (total),Public Transportation,Public Transportation Rate,City,State,Lat,Lng
626,926,105093.0,40.6,41370.0,1214.0,2.934494,San Juan,,18.3586,-66.0703
4414,11220,101715.0,32.5,43505.0,26155.0,60.119526,Brooklyn,NY,40.6385,-74.0153
4448,11368,110385.0,31.6,52359.0,34637.0,66.152906,Corona,Queens County,40.7506,-73.8478
4453,11373,100713.0,36.3,48188.0,34797.0,72.210924,Elmhurst,Queens County,40.738,-73.8801
4459,11385,100132.0,35.3,45510.0,27766.0,61.010767,Glendale,Queens County,40.6981,-73.8948


In [9]:
filtered_large_census_pd = filtered_large_census_pd.dropna(how="any")
filtered_large_census_pd = filtered_large_census_pd.sort_values("Public Transportation Rate", ascending=False)
filtered_large_census_pd.to_csv("Resources/100-200k_population_top_public_trans.csv", encoding="utf-8", index=False)
filtered_large_census_pd.head()

Unnamed: 0,Zipcode,Population,Median Age,Transportation (total),Public Transportation,Public Transportation Rate,City,State,Lat,Lng
4453,11373,100713.0,36.3,48188.0,34797.0,72.210924,Elmhurst,Queens County,40.738,-73.8801
4448,11368,110385.0,31.6,52359.0,34637.0,66.152906,Corona,Queens County,40.7506,-73.8478
4459,11385,100132.0,35.3,45510.0,27766.0,61.010767,Glendale,Queens County,40.6981,-73.8948
4414,11220,101715.0,32.5,43505.0,26155.0,60.119526,Brooklyn,NY,40.6385,-74.0153
27419,90011,102926.0,27.8,38934.0,8496.0,21.821544,South Los Angeles,Los Angeles County,34.0079,-118.259


In [10]:
# Configure gmaps
gmaps.configure(api_key=g_key)

In [11]:
# Store Lat and Lng into Locations
locations = filtered_small_census_pd[["Lat","Lng"]].astype(float)
public_transit_rate = filtered_small_census_pd["Public Transportation Rate"].astype(float)

# Create a public transit heatmap
fig = gmaps.figure()
heat_layer = gmaps.heatmap_layer(locations, weights=public_transit_rate,dissipating=False,max_intensity=100,point_radius=1)
fig.add_layer(heat_layer)
fig

Figure(layout=FigureLayout(height='420px'))