In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
from citipy import citipy

# Census API Key
from config import (census_api_key, g_key)
c = Census(census_api_key, year=2014)

In [2]:
# Run Census Search to retrieve data on all zip codes (2013 ACS5 Census)
# See: https://github.com/datamade/census for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels
census_data = c.acs5.get(("NAME", "B08301_001E", "B01003_001E", "B01002_001E", "B08301_010E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B08301_001E": "Transportation (total)",
                                      "B08301_010E": "Public Transportation",
                                      "NAME": "Name", "zip code tabulation area": "Zipcode"})

# Add in Public Transportation Rate (Public Transportation / Transportation (total))
census_pd["Public Transportation Rate"] = 100 * \
    census_pd["Public Transportation"].astype(
        int) / census_pd["Transportation (total)"].astype(int)


# Final DataFrame
census_pd = census_pd[["Zipcode", "Population", "Median Age", "Transportation (total)",
                       "Public Transportation","Public Transportation Rate"]]

# Visualize
print(len(census_pd))
census_pd.to_csv("Resources/census_data.csv", encoding="utf-8", index=False)
census_pd.head()

33120


Unnamed: 0,Zipcode,Population,Median Age,Transportation (total),Public Transportation,Public Transportation Rate
0,1740,5019.0,42.7,2623.0,72.0,2.744949
1,1741,4967.0,48.5,2211.0,14.0,0.633198
2,1742,18948.0,46.7,7623.0,571.0,7.490489
3,1745,309.0,50.4,162.0,15.0,9.259259
4,1746,14008.0,42.6,6904.0,260.0,3.765933


In [3]:
# Add columns for city, latitude and longitude
census_pd["City"] = ""
census_pd["State"] = ""
census_pd["Lat"] = ""
census_pd["Lng"] = ""
census_pd.head()

Unnamed: 0,Zipcode,Population,Median Age,Transportation (total),Public Transportation,Public Transportation Rate,City,State,Lat,Lng
0,1740,5019.0,42.7,2623.0,72.0,2.744949,,,,
1,1741,4967.0,48.5,2211.0,14.0,0.633198,,,,
2,1742,18948.0,46.7,7623.0,571.0,7.490489,,,,
3,1745,309.0,50.4,162.0,15.0,9.259259,,,,
4,1746,14008.0,42.6,6904.0,260.0,3.765933,,,,


In [4]:
filtered_census_pd = census_pd.loc[census_pd["Population"]>30000]
filtered_census_pd.count()

Zipcode                       3342
Population                    3342
Median Age                    3342
Transportation (total)        3342
Public Transportation         3342
Public Transportation Rate    3342
City                          3342
State                         3342
Lat                           3342
Lng                           3342
dtype: int64

In [12]:
# create a params dict that will be updated with new city each iteration
params = {"key": g_key}
base_url = "https://maps.googleapis.com/maps/api/geocode/json"

count = 0
# Loop through the cities_pd and run a lat/long search for each city
for index, row in filtered_census_pd.iterrows():
    
#     if count == 15:
#         break

    zipcode = row['Zipcode']

    # update address key value
    params['address'] = f"{zipcode}"

    # make request
    cities_lat_lng = requests.get(base_url, params=params)
    
    # print the cities_lat_lng url, avoid doing for public github repos in order to avoid exposing key
#     print(cities_lat_lng.url)
    
#     convert to json
    cities_lat_lng = cities_lat_lng.json()
    
#     print(f'adding index {index} - {city},{state}')
    
    try:
        filtered_census_pd.loc[index, "Lat"] = cities_lat_lng["results"][0]["geometry"]["location"]["lat"]
        filtered_census_pd.loc[index, "Lng"] = cities_lat_lng["results"][0]["geometry"]["location"]["lng"]
        filtered_census_pd.loc[index, "City"] = cities_lat_lng["results"][0]["address_components"][1]["long_name"]
        filtered_census_pd.loc[index, "State"] = cities_lat_lng["results"][0]["address_components"][3]["short_name"]
#         print(f"count {count}")
    except (KeyError, IndexError):
        print("Missing field/result... skipping.")
    
    count = count + 1

# Print to csv
filtered_census_pd.to_csv("Resources/census_data_with_city_lat_lng.csv", encoding="utf-8", index=False)
    
# Visualize to confirm lat lng city appear
filtered_census_pd.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/

Unnamed: 0,Zipcode,Population,Median Age,Transportation (total),Public Transportation,Public Transportation Rate,City,State,Lat,Lng
8,1752,39141.0,40.0,22183.0,331.0,1.492134,Marlborough,MA,42.3474,-71.5368
12,1760,34043.0,41.7,18658.0,1611.0,8.634366,Natick,MA,42.2775,-71.3468
19,1801,39315.0,40.0,21322.0,1005.0,4.713442,Woburn,MA,42.4885,-71.133
21,1810,34251.0,41.5,16374.0,811.0,4.952974,Andover,Essex County,42.6569,-71.1408
22,1821,31157.0,40.7,16757.0,481.0,2.870442,Billerica,MA,42.5465,-71.2518


In [15]:
filtered_census_pd = filtered_census_pd.dropna(how="any")
filtered_census_pd.head()

Unnamed: 0,Zipcode,Population,Median Age,Transportation (total),Public Transportation,Public Transportation Rate,City,State,Lat,Lng
8,1752,39141.0,40.0,22183.0,331.0,1.492134,Marlborough,MA,42.3474,-71.5368
12,1760,34043.0,41.7,18658.0,1611.0,8.634366,Natick,MA,42.2775,-71.3468
19,1801,39315.0,40.0,21322.0,1005.0,4.713442,Woburn,MA,42.4885,-71.133
21,1810,34251.0,41.5,16374.0,811.0,4.952974,Andover,Essex County,42.6569,-71.1408
22,1821,31157.0,40.7,16757.0,481.0,2.870442,Billerica,MA,42.5465,-71.2518
