## Import Necessary Libraries

In [297]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium # map rendering library
import wget

from bs4 import BeautifulSoup
import requests
import geocoder 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import os
from PIL import Image
import time 
from selenium import webdriver

delay=5

In [298]:
#pip install geopy
#conda install folium
#pip install wget
#pip install selenium

## New York Data

In [299]:
#!wget -P C:/Users/andre.mercanzini/Desktop/ -q -O 'newyork_data.json' https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs/newyork_data.json
NY_data = "C://Users//andre.mercanzini//Documents//Python Scripts//newyork_data.json"
with open(NY_data) as json_data:
    newyork_data = json.load(json_data)

In [300]:
neighborhoods_data = newyork_data['features']
len(neighborhoods_data)

# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)    

In [302]:
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [303]:
# k means
kmeans = KMeans(n_clusters=5, random_state=0)
neighborhoods['cluster'] = kmeans.fit_predict(neighborhoods[['Latitude', 'Longitude']])

# get centroids
centroids = kmeans.cluster_centers_
cen_x = [i[0] for i in centroids] 
cen_y = [i[1] for i in centroids]

## add to df
neighborhoods['cen_x'] = neighborhoods.cluster.map({0:cen_x[0], 1:cen_x[1], 2:cen_x[2], 3:cen_x[3], 4:cen_x[4]})
neighborhoods['cen_y'] = neighborhoods.cluster.map({0:cen_y[0], 1:cen_y[1], 2:cen_y[2], 3:cen_y[3], 4:cen_y[4]})

# define and map colors
colors = ['blue', 'green', 'red', 'orange', 'yellow']
neighborhoods['color'] = neighborhoods.cluster.map({0:colors[0], 1:colors[1], 2:colors[2], 3:colors[3], 4:colors[4]})

neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,cluster,cen_x,cen_y,color
0,Bronx,Wakefield,40.894705,-73.847201,4,40.843482,-73.876488,yellow
1,Bronx,Co-op City,40.874294,-73.829939,4,40.843482,-73.876488,yellow
2,Bronx,Eastchester,40.887556,-73.827806,4,40.843482,-73.876488,yellow
3,Bronx,Fieldston,40.895437,-73.905643,4,40.843482,-73.876488,yellow
4,Bronx,Riverdale,40.890834,-73.912585,4,40.843482,-73.876488,yellow


In [304]:
manhattan_data = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
manhattan_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,cluster,cen_x,cen_y,color
0,Manhattan,Marble Hill,40.876551,-73.91066,4,40.843482,-73.876488,yellow
1,Manhattan,Chinatown,40.715618,-73.994279,2,40.734036,-73.966889,red
2,Manhattan,Washington Heights,40.851903,-73.9369,4,40.843482,-73.876488,yellow
3,Manhattan,Inwood,40.867684,-73.92121,4,40.843482,-73.876488,yellow
4,Manhattan,Hamilton Heights,40.823604,-73.949688,4,40.843482,-73.876488,yellow


In [305]:
# k means
kmeans = KMeans(n_clusters=5, random_state=0)
manhattan_data['cluster'] = kmeans.fit_predict(manhattan_data[['Latitude', 'Longitude']])

# get centroids
centroids = kmeans.cluster_centers_
cen_x = [i[0] for i in centroids] 
cen_y = [i[1] for i in centroids]

## add to df
manhattan_data['cen_x'] = manhattan_data.cluster.map({0:cen_x[0], 1:cen_x[1], 2:cen_x[2], 3:cen_x[3], 4:cen_x[4]})
manhattan_data['cen_y'] = manhattan_data.cluster.map({0:cen_y[0], 1:cen_y[1], 2:cen_y[2], 3:cen_y[3], 4:cen_y[4]})

# define and map colors
colors = ['blue', 'green', 'red', 'orange', 'yellow']
manhattan_data['color'] = manhattan_data.cluster.map({0:colors[0], 1:colors[1], 2:colors[2], 3:colors[3], 4:colors[4]})

manhattan_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,cluster,cen_x,cen_y,color
0,Manhattan,Marble Hill,40.876551,-73.91066,2,40.865379,-73.922923,red
1,Manhattan,Chinatown,40.715618,-73.994279,1,40.720266,-73.99946,green
2,Manhattan,Washington Heights,40.851903,-73.9369,2,40.865379,-73.922923,red
3,Manhattan,Inwood,40.867684,-73.92121,2,40.865379,-73.922923,red
4,Manhattan,Hamilton Heights,40.823604,-73.949688,3,40.809012,-73.953775,orange


## Toronto Data

In [309]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
website_url = requests.get(url).text

soup = BeautifulSoup(website_url)

table_contents=[]
table=soup.find('table')

for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

Toronto_data=pd.DataFrame(table_contents)
Toronto_data['Borough']=Toronto_data['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

In [310]:
file_location = "C:\\Users\\andre.mercanzini\\Documents\\Python Scripts"
basefile_name = "Geospatial_Coordinates.csv"
file_name = os.path.join(file_location, basefile_name)
dfGEO = pd.read_csv(file_name)

Lat_list = []
Long_list = []

for index, row in Toronto_data.iterrows():
    GEOindex = (dfGEO[dfGEO['Postal Code']==row['PostalCode']].index.values)
    Lat = dfGEO._get_value(int(GEOindex), 'Latitude')
    Long = dfGEO._get_value(int(GEOindex), 'Longitude')
    Lat_list.append(Lat)
    Long_list.append(Long)
    #df._set_value(index, 'Latitude', Lat)
    #df._set_value(index, 'Longitude', Long)
    
Toronto_data.insert(loc = 3, column='Latitude', value=Lat_list)
Toronto_data.insert(loc = 4, column='Longitude', value=Long_list)

In [311]:
Toronto_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494


In [312]:
# k means
kmeans = KMeans(n_clusters=5, random_state=0)
Toronto_data['cluster'] = kmeans.fit_predict(Toronto_data[['Latitude', 'Longitude']])

# get centroids
centroids = kmeans.cluster_centers_
cen_x = [i[0] for i in centroids] 
cen_y = [i[1] for i in centroids]

## add to df
Toronto_data['cen_x'] = Toronto_data.cluster.map({0:cen_x[0], 1:cen_x[1], 2:cen_x[2], 3:cen_x[3], 4:cen_x[4]})
Toronto_data['cen_y'] = Toronto_data.cluster.map({0:cen_y[0], 1:cen_y[1], 2:cen_y[2], 3:cen_y[3], 4:cen_y[4]})

# define and map colors
colors = ['blue', 'green', 'red', 'orange', 'yellow']
Toronto_data['color'] = Toronto_data.cluster.map({0:colors[0], 1:colors[1], 2:colors[2], 3:colors[3], 4:colors[4]})

for idx, val in Toronto_data.iterrows():
    x = [val.Latitude, val.cen_x]
    y = [val.Longitude, val.cen_y]
    Toronto_data.at[idx, 'x_line'] = x[0]
    Toronto_data.at[idx, 'y_line'] = y[0] 

Toronto_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,cluster,cen_x,cen_y,color,x_line,y_line
0,M3A,North York,Parkwoods,43.753259,-79.329656,4,43.700213,-79.324393,yellow,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572,4,43.700213,-79.324393,yellow,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,2,43.662429,-79.397089,red,43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,3,43.750577,-79.414192,orange,43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494,2,43.662429,-79.397089,red,43.662301,-79.389494


In [327]:
central_list = ["Queen's Park", "East York", "East Toronto Business", "Downtown Toronto Stn A", 
                "Downtown Toronto","Central Toronto", "Etobicoke", "West Toronto", "East York/East Toronto","West Toronto"]
Toronto_downtown = Toronto_data.loc[Toronto_data['Borough'].isin(central_list)]

In [329]:
# k means
kmeans = KMeans(n_clusters=5, random_state=0)
Toronto_downtown['cluster'] = kmeans.fit_predict(Toronto_downtown[['Latitude', 'Longitude']])

# get centroids
centroids = kmeans.cluster_centers_
cen_x = [i[0] for i in centroids] 
cen_y = [i[1] for i in centroids]

## add to df
Toronto_downtown['cen_x'] = Toronto_downtown.cluster.map({0:cen_x[0], 1:cen_x[1], 2:cen_x[2], 3:cen_x[3], 4:cen_x[4]})
Toronto_downtown['cen_y'] = Toronto_downtown.cluster.map({0:cen_y[0], 1:cen_y[1], 2:cen_y[2], 3:cen_y[3], 4:cen_y[4]})

# define and map colors
colors = ['blue', 'green', 'red', 'orange', 'yellow']
Toronto_downtown['color'] = Toronto_downtown.cluster.map({0:colors[0], 1:colors[1], 2:colors[2], 3:colors[3], 4:colors[4]})

Toronto_downtown.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Toronto_downtown['cluster'] = kmeans.fit_predict(Toronto_downtown[['Latitude', 'Longitude']])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Toronto_downtown['cen_x'] = Toronto_downtown.cluster.map({0:cen_x[0], 1:cen_x[1], 2:cen_x[2], 3:cen_x[3], 4:cen_x[4]})
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,cluster,cen_x,cen_y,color,x_line,y_line
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,1,43.654949,-79.390928,green,43.65426,-79.360636
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494,1,43.654949,-79.390928,green,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242,2,43.681159,-79.556595,red,43.667856,-79.532242
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937,3,43.69104,-79.327472,orange,43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,1,43.654949,-79.390928,green,43.657162,-79.378937


## New York Map

In [335]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


In [336]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=11, control_scale = True, tiles='cartodbpositron')

# add markers to map
for lat, lng, cen_x, cen_y, color in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['cen_x'], neighborhoods['cen_y'], neighborhoods['color']):
    x = [lat, lng,]
    y = [cen_x, cen_y]
    folium.PolyLine([x,y], color=color, weight=2, opacity=0.5).add_to(map_newyork)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)
    folium.CircleMarker(
        [cen_x, cen_y],
        radius=50,
        color=color,
        fill=False,
        fill_color=color,
        fill_opacity=0.01,
        parse_html=False).add_to(map_newyork)

map_newyork

In [337]:
map_newyork.save("NY.html")

fn='mapimageNY.html'
tmpurl='file://{path}/{mapfile}'.format(path=os.getcwd(),mapfile=fn)
map_newyork.save(fn)

browser = webdriver.Chrome("C://Users//andre.mercanzini//Documents//Python Scripts//phantomjs-2.1.1-windows//bin//chromedriver.exe")
browser.get(tmpurl)
#Give the map tiles some time to load
time.sleep(delay)
browser.save_screenshot('map_NY.png')
browser.quit()

## Manhattan Map

In [338]:
address = 'Manhattan, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Manhattan are 40.7896239, -73.9598939.


In [339]:
# create map of Manhattan using latitude and longitude values
map_manhattan = folium.Map(location=[latitude - 0.01, longitude], zoom_start=12, control_scale = True) 

# add markers to map
for lat, lng, label in zip(manhattan_data['Latitude'], manhattan_data['Longitude'], manhattan_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_manhattan)  
    
map_manhattan


In [340]:
# create map of New York using latitude and longitude values
map_manhat_clustered = folium.Map(location=[latitude, longitude], zoom_start=11, control_scale = True, tiles='cartodbpositron')

# add markers to map
for lat, lng, cen_x, cen_y, color in zip(manhattan_data['Latitude'], manhattan_data['Longitude'], manhattan_data['cen_x'], manhattan_data['cen_y'], manhattan_data['color']):
    x = [lat, lng,]
    y = [cen_x, cen_y]
    folium.PolyLine([x,y], color=color, weight=2, opacity=0.5).add_to(map_manhat_clustered)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7,
        parse_html=False).add_to(map_manhat_clustered)
    folium.CircleMarker(
        [cen_x, cen_y],
        radius=50,
        color=color,
        fill=False,
        fill_color=color,
        fill_opacity=0.01,
        parse_html=False).add_to(map_manhat_clustered)

map_manhat_clustered

### save Manhattan map as png file

In [342]:
map_manhat_clustered.save("manhat_cluster.html")

fn='mapimageMANclus.html'
tmpurl='file://{path}/{mapfile}'.format(path=os.getcwd(),mapfile=fn)
map_manhat_clustered.save(fn)

browser = webdriver.Chrome("C://Users//andre.mercanzini//Documents//Python Scripts//phantomjs-2.1.1-windows//bin//chromedriver.exe")
browser.get(tmpurl)
#Give the map tiles some time to load
time.sleep(delay)
browser.save_screenshot('manhat_cluster.png')
browser.quit()

In [None]:
map_manhattan.save("manhat.html")

fn='mapimageMAN.html'
tmpurl='file://{path}/{mapfile}'.format(path=os.getcwd(),mapfile=fn)
map_manhattan.save(fn)

browser = webdriver.Chrome("C://Users//andre.mercanzini//Documents//Python Scripts//phantomjs-2.1.1-windows//bin//chromedriver.exe")
browser.get(tmpurl)
#Give the map tiles some time to load
time.sleep(delay)
browser.save_screenshot('map_Manhat.png')
browser.quit()

## Toronto Map

In [320]:
address = 'Toronto, Ontario'

#geolocator = Nominatim(user_agent="ny_explorer")
T_location = geolocator.geocode(address)
T_latitude = T_location.latitude
T_longitude = T_location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(T_latitude, T_longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [321]:
# create map of Toronto using latitude and longitude values
map_Toronto_neigh = folium.Map(location=[T_latitude, T_longitude], zoom_start=11, control_scale = True)

# add markers to map
for lat, lng, cen_x, cen_y, color in zip(Toronto_data['Latitude'], Toronto_data['Longitude'], Toronto_data['cen_x'], Toronto_data['cen_y'], Toronto_data['color']):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto_neigh)  
    
map_Toronto_neigh

In [322]:
# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[T_latitude, T_longitude], zoom_start=11, control_scale = True, tiles='cartodbpositron')

# add markers to map
for lat, lng, cen_x, cen_y, color in zip(Toronto_data['Latitude'], Toronto_data['Longitude'], Toronto_data['cen_x'], Toronto_data['cen_y'], Toronto_data['color']):
    label = folium.Popup(label, parse_html=True)
    x = [lat, lng,]
    y = [cen_x, cen_y]
    folium.PolyLine([x,y], color=color, weight=2, opacity=0.5).add_to(map_Toronto)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    folium.CircleMarker(
        [cen_x, cen_y],
        radius=50,
        color=color,
        fill=False,
        fill_color= color,
        fill_opacity=0.01,
        parse_html=False).add_to(map_Toronto)
    
map_Toronto

In [323]:
map_Toronto.save("Toronto.html")

fn='mapimageTOR.html'
tmpurl='file://{path}/{mapfile}'.format(path=os.getcwd(),mapfile=fn)
map_Toronto.save(fn)

browser = webdriver.Chrome("C://Users//andre.mercanzini//Documents//Python Scripts//phantomjs-2.1.1-windows//bin//chromedriver.exe")
browser.get(tmpurl)
#Give the map tiles some time to load
time.sleep(delay)
browser.save_screenshot('map_Toronto.png')
browser.quit()

### Downtown Toronto Map

In [324]:
address = 'Downtown Toronto, Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Downtown Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Downtown Toronto are 43.6541457, -79.38079276646786.


In [325]:
# create map of Downtown Toronto using latitude and longitude values
map_DT_Toronto = folium.Map(location=[latitude, longitude], zoom_start=12, control_scale = True) 

# add markers to map
for lat, lng, label in zip(Toronto_downtown['Latitude'], Toronto_downtown['Longitude'], Toronto_downtown['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False,
        scrollWheelZoom = False).add_to(map_DT_Toronto)  
    
map_DT_Toronto

In [331]:
# create map of Downtown Toronto using latitude and longitude values
map_TorCore_clustered = folium.Map(location=[latitude, longitude], zoom_start=11, control_scale = True, tiles='cartodbpositron')

# add markers to map
for lat, lng, cen_x, cen_y, color in zip(Toronto_downtown['Latitude'], Toronto_downtown['Longitude'], Toronto_downtown['cen_x'], Toronto_downtown['cen_y'], Toronto_downtown['color']):
    x = [lat, lng,]
    y = [cen_x, cen_y]
    folium.PolyLine([x,y], color=color, weight=2, opacity=0.5).add_to(map_TorCore_clustered)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7,
        parse_html=False).add_to(map_TorCore_clustered)
    folium.CircleMarker(
        [cen_x, cen_y],
        radius=50,
        color=color,
        fill=False,
        fill_color=color,
        fill_opacity=0.01,
        parse_html=False).add_to(map_TorCore_clustered)

map_TorCore_clustered

In [333]:
map_TorCore_clustered.save("TorCore.html")

fn='mapimageTorCore.html'
tmpurl='file://{path}/{mapfile}'.format(path=os.getcwd(),mapfile=fn)
map_TorCore_clustered.save(fn)

browser = webdriver.Chrome("C://Users//andre.mercanzini//Documents//Python Scripts//phantomjs-2.1.1-windows//bin//chromedriver.exe")
browser.get(tmpurl)
#Give the map tiles some time to load
time.sleep(delay)
browser.save_screenshot('map_TorCore.png')
browser.quit()

### Overlay two Maps, Toronto and Manhattan

In [332]:
background = Image.open("map_NY.png")
overlay = Image.open("map_Toronto.png")

background = background.convert("RGBA")
overlay = overlay.convert("RGBA")

new_img = Image.blend(background, overlay, 0.5)
new_img.save("map_Overlayed.png","PNG")

In [343]:
background = Image.open('manhat_cluster.png')
overlay = Image.open("map_TorCore.png")

background = background.convert("RGBA")
overlay = overlay.convert("RGBA")

new_img = Image.blend(background, overlay, 0.5)
new_img.save("map_Overlayed_Core.png","PNG")

## Foursquare Data

In [57]:
CLIENT_ID = 'EAJBJLI0PIE1VHOLA0X5ITPDU0VRXW1NENB3LO3LAHTPT5P0' # your Foursquare ID
CLIENT_SECRET = 'PNKSAT5YIMLRTXHTYJILRREXRU2B3CXCQTFFRTIT3BRHLHLO' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value
ACCESS_TOKEN = 'SFHWKEPJOAQSCY5D23XDMBJBFSZGPGHKMO0AP1ENDC1MLY1J' # your FourSquare Access Token

In [222]:
manhattan_data.loc[0, 'Neighborhood']
neighborhood_latitude = manhattan_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = manhattan_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = manhattan_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))
# type your answer here
search_query = 'Italian'
radius = 500
#url = 'https://api.foursquare.com/v2/venues/trending?client_id={}&client_secret={}&ll={},{}&v={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION)
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, neighborhood_latitude, neighborhood_longitude, radius, LIMIT) # search_query
url

Latitude and longitude values of Marble Hill are 40.87655077879964, -73.91065965862981.


'https://api.foursquare.com/v2/venues/explore?client_id=EAJBJLI0PIE1VHOLA0X5ITPDU0VRXW1NENB3LO3LAHTPT5P0&client_secret=PNKSAT5YIMLRTXHTYJILRREXRU2B3CXCQTFFRTIT3BRHLHLO&v=20180605&ll=40.87655077879964,-73.91065965862981&radius=500&limit=100'

In [223]:
results = requests.get(url).json()

In [224]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [225]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  nearby_venues = json_normalize(venues) # flatten JSON


Unnamed: 0,name,categories,lat,lng
0,Arturo's,Pizza Place,40.874412,-73.910271
1,Bikram Yoga,Yoga Studio,40.876844,-73.906204
2,Dunkin',Donut Shop,40.877136,-73.906666
3,Tibbett Diner,Diner,40.880404,-73.908937
4,Starbucks,Coffee Shop,40.877531,-73.905582


In [227]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [228]:
manhattan_venues = getNearbyVenues(names=manhattan_data['Neighborhood'],
                                   latitudes=manhattan_data['Latitude'],
                                   longitudes=manhattan_data['Longitude']
                                  )

Marble Hill
Chinatown
Washington Heights
Inwood
Hamilton Heights
Manhattanville
Central Harlem
East Harlem
Upper East Side
Yorkville
Lenox Hill
Roosevelt Island
Upper West Side
Lincoln Square
Clinton
Midtown
Murray Hill
Chelsea
Greenwich Village
East Village
Lower East Side
Tribeca
Little Italy
Soho
West Village
Manhattan Valley
Morningside Heights
Gramercy
Battery Park City
Financial District
Carnegie Hill
Noho
Civic Center
Midtown South
Sutton Place
Turtle Bay
Tudor City
Stuyvesant Town
Flatiron
Hudson Yards


In [232]:
manhattan_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Battery Park City,90,90,90,90,90,90
Carnegie Hill,89,89,89,89,89,89
Central Harlem,45,45,45,45,45,45
Chelsea,100,100,100,100,100,100
Chinatown,100,100,100,100,100,100
Civic Center,100,100,100,100,100,100
Clinton,100,100,100,100,100,100
East Harlem,38,38,38,38,38,38
East Village,100,100,100,100,100,100
Financial District,100,100,100,100,100,100


In [233]:
# one hot encoding
manhattan_onehot = pd.get_dummies(manhattan_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
manhattan_onehot['Neighborhood'] = manhattan_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [manhattan_onehot.columns[-1]] + list(manhattan_onehot.columns[:-1])
manhattan_onehot = manhattan_onehot[fixed_columns]

manhattan_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Antique Shop,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auditorium,Australian Restaurant,Austrian Restaurant,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Court,Beach Bar,Beer Bar,Beer Garden,Beer Store,Big Box Store,Bike Rental / Bike Share,Bike Trail,Bistro,Board Shop,Boat or Ferry,Bookstore,Boutique,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Bridal Shop,Bridge,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Butcher,Cafeteria,Café,Cajun / Creole Restaurant,Camera Store,Candy Store,Cantonese Restaurant,Caribbean Restaurant,Caucasian Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Circus,Climbing Gym,Clothing Store,Club House,Cocktail Bar,Coffee Shop,College Academic Building,College Arts Building,College Bookstore,College Cafeteria,Comedy Club,Community Center,Concert Hall,Convenience Store,Cooking School,Cosmetics Shop,Coworking Space,Creperie,Cuban Restaurant,Cupcake Shop,Cycle Studio,Czech Restaurant,Dance Studio,Daycare,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dive Shop,Doctor's Office,Dog Run,Donut Shop,Drugstore,Dumpling Restaurant,Duty-free Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Ethiopian Restaurant,Event Space,Exhibit,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fish Market,Flea Market,Flower Shop,Food & Drink Shop,Food Court,Food Stand,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,German Restaurant,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Heliport,High School,Hill,Himalayan Restaurant,Historic Site,History Museum,Hobby Shop,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hotpot Restaurant,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Indoor Play Area,Irish Pub,Israeli Restaurant,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Karaoke Bar,Kebab Restaurant,Kids Store,Korean Restaurant,Kosher Restaurant,Latin American Restaurant,Laundry Service,Leather Goods Store,Lebanese Restaurant,Library,Lingerie Store,Liquor Store,Lounge,Malay Restaurant,Market,Martial Arts School,Massage Studio,Mattress Store,Medical Center,Mediterranean Restaurant,Memorial Site,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Mini Golf,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Motorcycle Shop,Movie Theater,Moving Target,Museum,Music School,Music Store,Music Venue,Nail Salon,New American Restaurant,Newsstand,Nightclub,Non-Profit,Noodle House,North Indian Restaurant,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Outdoor Sculpture,Outdoors & Recreation,Paella Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Pedestrian Plaza,Performing Arts Venue,Persian Restaurant,Peruvian Restaurant,Peruvian Roast Chicken Joint,Pet Café,Pet Service,Pet Store,Pharmacy,Photography Studio,Physical Therapist,Piano Bar,Pie Shop,Pier,Pilates Studio,Pizza Place,Playground,Plaza,Poke Place,Pool,Portuguese Restaurant,Post Office,Pub,Public Art,Ramen Restaurant,Record Shop,Rental Car Location,Residential Building (Apartment / Condo),Resort,Rest Area,Restaurant,River,Rock Climbing Spot,Rock Club,Roof Deck,Russian Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,School,Seafood Restaurant,Shanghai Restaurant,Shipping Store,Shoe Store,Shopping Mall,Skate Park,Smoke Shop,Snack Place,Soccer Field,Social Club,Soup Place,South Indian Restaurant,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Stables,Stationery Store,Steakhouse,Street Art,Strip Club,Supermarket,Supplement Shop,Sushi Restaurant,Swiss Restaurant,Szechuan Restaurant,Taco Place,Tailor Shop,Taiwanese Restaurant,Tapas Restaurant,Tattoo Parlor,Tea Room,Tech Startup,Tennis Court,Tennis Stadium,Thai Restaurant,Theater,Theme Park Ride / Attraction,Thrift / Vintage Store,Tiki Bar,Tourist Information Center,Toy / Game Store,Trail,Tree,Turkish Restaurant,Udon Restaurant,Used Bookstore,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Volleyball Court,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [234]:
manhattan_grouped = manhattan_onehot.groupby('Neighborhood').mean().reset_index()

In [235]:
num_top_venues = 5

for hood in manhattan_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = manhattan_grouped[manhattan_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Battery Park City----
            venue  freq
0            Park  0.10
1     Coffee Shop  0.07
2  Clothing Store  0.06
3           Hotel  0.04
4             Gym  0.03


----Carnegie Hill----
         venue  freq
0  Coffee Shop  0.08
1         Café  0.07
2    Wine Shop  0.04
3  Yoga Studio  0.03
4  Pizza Place  0.03


----Central Harlem----
                 venue  freq
0   African Restaurant  0.07
1    French Restaurant  0.04
2  American Restaurant  0.04
3           Public Art  0.04
4   Chinese Restaurant  0.04


----Chelsea----
                venue  freq
0         Coffee Shop  0.06
1         Art Gallery  0.05
2              Bakery  0.05
3      Ice Cream Shop  0.03
4  Seafood Restaurant  0.03


----Chinatown----
                 venue  freq
0   Chinese Restaurant  0.09
1               Bakery  0.08
2         Cocktail Bar  0.05
3  American Restaurant  0.04
4         Dessert Shop  0.04


----Civic Center----
                  venue  freq
0           Coffee Shop  0.07
1  Gym / Fitness C

In [236]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [237]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = manhattan_grouped['Neighborhood']

for ind in np.arange(manhattan_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(manhattan_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Battery Park City,Park,Coffee Shop,Clothing Store,Hotel,Gym,Women's Store,Boat or Ferry,Memorial Site,Playground,Pizza Place
1,Carnegie Hill,Coffee Shop,Café,Wine Shop,Yoga Studio,Pizza Place,Bookstore,French Restaurant,Gym / Fitness Center,Bar,Cosmetics Shop
2,Central Harlem,African Restaurant,French Restaurant,American Restaurant,Public Art,Chinese Restaurant,Gym / Fitness Center,Seafood Restaurant,Bar,Tapas Restaurant,Fried Chicken Joint
3,Chelsea,Coffee Shop,Art Gallery,Bakery,Ice Cream Shop,Seafood Restaurant,American Restaurant,Wine Shop,Hotel,French Restaurant,Tapas Restaurant
4,Chinatown,Chinese Restaurant,Bakery,Cocktail Bar,American Restaurant,Dessert Shop,Optical Shop,Salon / Barbershop,Boutique,Coffee Shop,Spa


In [72]:
# set number of clusters
kclusters = 5

manhattan_grouped_clustering = manhattan_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(manhattan_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 1, 1, 1, 1, 0, 1, 1])

In [73]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

manhattan_merged = manhattan_data

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
manhattan_merged = manhattan_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

manhattan_merged.head() # check the last columns!

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Manhattan,Marble Hill,40.876551,-73.91066,4,Gym,Coffee Shop,Sandwich Place,Video Game Store,Tennis Stadium,Pharmacy,Bank,Supplement Shop,Ice Cream Shop,Pizza Place
1,Manhattan,Chinatown,40.715618,-73.994279,1,Chinese Restaurant,Bakery,Cocktail Bar,American Restaurant,Dessert Shop,Optical Shop,Salon / Barbershop,Boutique,Coffee Shop,Spa
2,Manhattan,Washington Heights,40.851903,-73.9369,0,Café,Bakery,Bank,Mobile Phone Shop,Restaurant,Grocery Store,Chinese Restaurant,Spanish Restaurant,Coffee Shop,Tapas Restaurant
3,Manhattan,Inwood,40.867684,-73.92121,0,Mexican Restaurant,Restaurant,Café,Bakery,Lounge,Caribbean Restaurant,Wine Bar,Chinese Restaurant,Park,Pizza Place
4,Manhattan,Hamilton Heights,40.823604,-73.949688,0,Pizza Place,Café,Deli / Bodega,Coffee Shop,Mexican Restaurant,Yoga Studio,Bakery,Caribbean Restaurant,Cocktail Bar,Park


In [74]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(manhattan_merged['Latitude'], manhattan_merged['Longitude'], manhattan_merged['Neighborhood'], manhattan_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [None]:
df