In [17]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import gmaps
from census import Census 
from us import states

from config_pauline import g_keypa
# Census API Key
from config_pauline import census_keypa
c = Census(census_keypa, year=2018)
# print(census_keypa)


In [18]:
# Run Census Search to retrieve data on all zip codes 
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels

census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E", "B01002_002E",
                          "B01002_003E","B19301_001E"),
                           {'for': 'zip code tabulation area:*'})
                         
census_data
# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B01002_002E": "Median Male Age",
                                      "B01002_003E": "Median Female Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",                                      
                                      "NAME": "Name", "zip code tabulation area": "Zipcode"})
census_pd = census_pd[["Zipcode", "Population","Household Income", "Median Age", "Median Male Age","Median Female Age", "Per Capita Income"]]

# Visualize
print(len(census_pd))
census_pd.head()
                         



33120


Unnamed: 0,Zipcode,Population,Household Income,Median Age,Median Male Age,Median Female Age,Per Capita Income
0,601,17242.0,13092.0,40.5,39.5,41.3,6999.0
1,602,38442.0,16358.0,42.3,41.9,42.8,9277.0
2,603,48814.0,16603.0,41.1,39.1,42.9,11307.0
3,606,6437.0,12832.0,43.3,43.2,43.4,5943.0
4,610,27073.0,19309.0,42.1,40.4,44.2,10220.0


In [19]:
census_pd.dtypes

Zipcode               object
Population           float64
Household Income     float64
Median Age           float64
Median Male Age      float64
Median Female Age    float64
Per Capita Income    float64
dtype: object

In [20]:
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
census_pd.to_csv("census_data_states.csv", encoding="utf-8", index=False)

In [21]:
# Import the us zip data. Use dtype="object" to ensure all data is read in accurately.
us_zips = pd.read_csv("Copy of uszips.csv",converters={'zip': lambda x: str(x)})

# Visualize
us_zips.head()

us_zips = us_zips.rename(columns={"zip":"Zipcode"})
us_zips

Unnamed: 0,Zipcode,lat,lng,city,state_id,state_name
0,601,18.18004,-66.75218,Adjuntas,PR,Puerto Rico
1,602,18.36073,-67.17517,Aguada,PR,Puerto Rico
2,603,18.45439,-67.12202,Aguadilla,PR,Puerto Rico
3,606,18.16724,-66.93828,Maricao,PR,Puerto Rico
4,610,18.29032,-67.12243,Anasco,PR,Puerto Rico
...,...,...,...,...,...,...
33092,99923,55.97796,-130.03671,Hyder,AK,Alaska
33093,99925,55.55767,-132.97627,Klawock,AK,Alaska
33094,99926,55.12617,-131.48928,Metlakatla,AK,Alaska
33095,99927,56.25100,-133.37571,Point Baker,AK,Alaska


In [22]:
# us_zips["Zipcode"]=us_zips["Zipcode"].astype(int)
us_zips.dtypes

Zipcode        object
lat           float64
lng           float64
city           object
state_id       object
state_name     object
dtype: object

In [23]:
# Merge the two data sets along zip code
census_data_complete = census_pd.merge(us_zips, how="left", left_on="Zipcode", right_on="Zipcode")


# Visualize
census_data_complete

Unnamed: 0,Zipcode,Population,Household Income,Median Age,Median Male Age,Median Female Age,Per Capita Income,lat,lng,city,state_id,state_name
0,00601,17242.0,13092.0,40.5,39.5,41.3,6999.0,,,,,
1,00602,38442.0,16358.0,42.3,41.9,42.8,9277.0,,,,,
2,00603,48814.0,16603.0,41.1,39.1,42.9,11307.0,,,,,
3,00606,6437.0,12832.0,43.3,43.2,43.4,5943.0,,,,,
4,00610,27073.0,19309.0,42.1,40.4,44.2,10220.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
33115,87515,363.0,,44.2,47.8,22.9,,36.50231,-106.37718,Canjilon,NM,New Mexico
33116,87518,9.0,,-666666666.0,-666666666.0,-666666666.0,,36.50733,-106.56265,Cebolla,NM,New Mexico
33117,87511,2896.0,,36.0,44.3,27.8,,36.10226,-105.92584,Alcalde,NM,New Mexico
33118,87578,245.0,,48.0,63.0,39.6,,36.03903,-105.76530,Truchas,NM,New Mexico


In [24]:
# clean up the data
census_data_complete.dropna()

Unnamed: 0,Zipcode,Population,Household Income,Median Age,Median Male Age,Median Female Age,Per Capita Income,lat,lng,city,state_id,state_name
2558,10001,22924.0,88526.0,36.7,40.1,34.3,84765.0,40.75061,-73.99716,New York,NY,New York
2559,10002,74993.0,35859.0,43.6,40.7,46.0,32694.0,40.71586,-73.98613,New York,NY,New York
2560,10003,54682.0,112131.0,32.3,33.3,30.7,92781.0,40.73180,-73.98911,New York,NY,New York
2561,10004,3028.0,157645.0,37.1,38.3,37.0,122165.0,40.69465,-74.02106,New York,NY,New York
2562,10005,8831.0,173333.0,30.1,30.4,29.9,106702.0,40.70616,-74.00907,New York,NY,New York
...,...,...,...,...,...,...,...,...,...,...,...,...
33079,99922,330.0,34028.0,39.5,39.4,39.8,18213.0,55.30211,-133.03248,Hydaburg,AK,Alaska
33081,99925,927.0,57375.0,43.6,42.0,45.1,25840.0,55.55767,-132.97627,Klawock,AK,Alaska
33082,99926,1635.0,53409.0,34.5,34.6,34.3,22453.0,55.12617,-131.48928,Metlakatla,AK,Alaska
33083,99927,38.0,-666666666.0,55.5,-666666666.0,-666666666.0,13658.0,56.25100,-133.37571,Point Baker,AK,Alaska


In [25]:
# Save as a csv for teammate to be able to use for zscores
# Note to avoid any issues later, use encoding="utf-8"
census_data_complete.to_csv("census_data_complete.csv", encoding="utf-8", index=False)

In [26]:
#configure gmaps
gmaps.configure(api_key= g_keypa)

In [27]:
#read and add the census file that has been updated with the zscores  file
census_zscores= pd.read_csv("census_zscores.csv")

# Visualize
census_zscores


Unnamed: 0,Zipcode,Population,Household Income,Median Age,Per Capita Income,lat,lng,city,state_id,Age_sqd,Popn_sqd,Income_sqd,zscore_age,zscore_pop,zscore_inc
0,10001,22924.0,88526.0,36.7,84765.0,40.75061,-73.99716,New York,NY,44.89,1.543594e+08,9.565317e+08,0.548956,0.173792,-0.306582
1,10002,74993.0,35859.0,43.6,32694.0,40.71586,-73.98613,New York,NY,184.96,4.159366e+09,4.725908e+08,0.084447,-3.288808,0.081605
2,10003,54682.0,112131.0,32.3,92781.0,40.73180,-73.98911,New York,NY,5.29,1.952062e+09,2.973831e+09,0.845164,-1.938122,-1.303802
3,10004,3028.0,157645.0,37.1,122165.0,40.69465,-74.02106,New York,NY,50.41,5.582859e+07,1.000937e+10,0.522028,0.503120,-3.226593
4,10005,8831.0,173333.0,30.1,106702.0,40.70616,-74.00907,New York,NY,0.01,2.785072e+06,1.339455e+10,0.993268,0.889021,-3.889351
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28415,99921,1986.0,58571.0,41.4,34414.0,55.45479,-132.77209,Craig,AK,129.96,7.248570e+07,9.464321e+05,0.232551,0.433827,0.958901
28416,99922,330.0,34028.0,39.5,18213.0,55.30211,-133.03248,Hydaburg,AK,90.25,1.034259e+08,5.555521e+08,0.360459,0.323703,0.004252
28417,99925,927.0,57375.0,43.6,25840.0,55.55767,-132.97627,Klawock,AK,184.96,9.163952e+07,4.979707e+04,0.084447,0.363403,0.990573
28418,99926,1635.0,53409.0,34.5,22453.0,55.12617,-131.48928,Metlakatla,AK,20.25,7.858563e+07,1.754900e+07,0.697060,0.410485,0.823024


In [28]:

new_dataframe=census_zscores.loc[(census_zscores["zscore_age"]>0)&(census_zscores["zscore_pop"]>0)&(census_zscores["zscore_inc"]>0)] 


new_dataframe["c_zscore"] = new_dataframe["zscore_age"]*new_dataframe["zscore_pop"]*new_dataframe["zscore_inc"]
new_dataframe

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_dataframe["c_zscore"] = new_dataframe["zscore_age"]*new_dataframe["zscore_pop"]*new_dataframe["zscore_inc"]


Unnamed: 0,Zipcode,Population,Household Income,Median Age,Per Capita Income,lat,lng,city,state_id,Age_sqd,Popn_sqd,Income_sqd,zscore_age,zscore_pop,zscore_inc,c_zscore
33,10037,21139.0,45041.0,35.3,30750.0,40.81286,-73.93739,New York,NY,28.09,1.131914e+08,1.576821e+08,0.643204,0.292494,0.469509,0.088330
46,10302,18204.0,64103.0,34.9,27428.0,40.63043,-74.13772,Staten Island,NY,24.01,5.935387e+07,4.231304e+07,0.670132,0.487673,0.725195,0.236997
51,10307,15232.0,80893.0,39.7,39923.0,40.50794,-74.23965,Staten Island,NY,94.09,2.239321e+07,5.426499e+08,0.346995,0.685311,0.015883,0.003777
54,10310,23262.0,72093.0,38.8,32884.0,40.63235,-74.11617,Staten Island,NY,77.44,1.628724e+08,2.101006e+08,0.407583,0.151314,0.387649,0.023908
76,10470,15432.0,57160.0,40.5,31829.0,40.89594,-73.86812,Bronx,NY,110.25,2.432607e+07,1.919777e+05,0.293139,0.672011,0.981490,0.193347
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28412,99901,13818.0,70356.0,39.7,34886.0,55.56908,-131.01272,Ketchikan,AK,94.09,1.101010e+07,1.627627e+08,0.346995,0.779343,0.461030,0.124676
28415,99921,1986.0,58571.0,41.4,34414.0,55.45479,-132.77209,Craig,AK,129.96,7.248570e+07,9.464321e+05,0.232551,0.433827,0.958901,0.096741
28416,99922,330.0,34028.0,39.5,18213.0,55.30211,-133.03248,Hydaburg,AK,90.25,1.034259e+08,5.555521e+08,0.360459,0.323703,0.004252,0.000496
28417,99925,927.0,57375.0,43.6,25840.0,55.55767,-132.97627,Klawock,AK,184.96,9.163952e+07,4.979707e+04,0.084447,0.363403,0.990573,0.030399


In [29]:
#top_three_dataframe = new_dataframe["c_zscore"].sort_values(ascending=False)
top_three_dataframe = new_dataframe.loc[new_dataframe["c_zscore"]>=0.88963]
top_three_dataframe


Unnamed: 0,Zipcode,Population,Household Income,Median Age,Per Capita Income,lat,lng,city,state_id,Age_sqd,Popn_sqd,Income_sqd,zscore_age,zscore_pop,zscore_inc,c_zscore
15693,57107,10102.0,56953.0,30.9,27131.0,43.59488,-96.82121,Sioux Falls,SD,0.81,158287.3,416221.836789,0.939412,0.973543,0.972745,0.889631
25051,86040,10821.0,57377.0,30.9,20904.0,36.69612,-111.41907,Page,AZ,0.81,103135.1,48908.458604,0.939412,0.978644,0.990657,0.91076
26411,93926,9262.0,57246.0,30.1,18743.0,36.52923,-121.4077,Gonzales,CA,0.01,1532281.0,124011.431581,0.993268,0.917682,0.985123,0.897944


In [30]:
# Create a map using lat & long coordinates to set markers
# Create a population Heatmap layer
locations = new_dataframe[['lat', 'lng']].astype(float)
pzscore_pop = new_dataframe["zscore_pop"].astype(float)
pzscore_age = new_dataframe["zscore_age"].astype(float)
pzscore_inc = new_dataframe["zscore_inc"].astype(float)
fig = gmaps.figure()
fig
heat_layer_pop = gmaps.heatmap_layer(locations, weights=pzscore_pop, 
                                 dissipating=False, max_intensity=100,
                                 point_radius = 1)
heat_layer_age = gmaps.heatmap_layer(locations, weights=pzscore_age, 
                                 dissipating=False, max_intensity=100,
                                 point_radius = 1)
heat_layer_inc = gmaps.heatmap_layer(locations, weights=pzscore_inc, 
                                 dissipating=False, max_intensity=100,
                                 point_radius = 1)
fig.add_layer(heat_layer_pop)
fig.add_layer(heat_layer_age)
fig.add_layer(heat_layer_inc )

fig


Figure(layout=FigureLayout(height='420px'))

In [31]:
# Using the template add the hotel marks to the heatmap
info_box_template = """
<dl>
<dt>City</dt><dd>{city}</dd>
<dt>State</dt><dd>{state_id}</dd>
</dl>
"""
# Store the DataFrame Row
# NOTE: be sure to update with your DataFrame name
top_three_labels = [info_box_template.format(**row) for index, row in top_three_dataframe.iterrows()]
locations = top_three_dataframe[["lat", "lng"]]

In [33]:
markers = gmaps.marker_layer(locations, info_box_content=top_three_labels)
# Add the layer to the map
fig.add_layer(heat_layer_pop)
fig.add_layer(heat_layer_age)
fig.add_layer(heat_layer_inc )
fig.add_layer(markers)
# Display figure
fig
plt.savefig("Heatmap.png")

<Figure size 432x288 with 0 Axes>