# Segmenting and Clustering Neighborhoods in Toronto

In [1]:
# Task to explore, segment, and cluster the neighborhoods in the city of Toronto.
# As there is no readily available Toronto neighborhood data, it is required to scrape from wiki and complete data wrangel like 
# convert to dataframe using either Python pandas or Beautifulsoap, I have used Python pandas library. 
# 
# Once the data is in a structured format, explore and cluster the neighborhoods in the city of Toronto.

# Store the notebook in Github repository.

In [2]:
# import libraries 
import requests
import pandas as pd

In [3]:
# Obtian the HTML of wiki, convert into table using read_html
url_wiki_toronto = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
wiki_page_toronto = requests.get(url_wiki_toronto)

# convert to dataframe 
df_toronto_preprocess = pd.read_html(wiki_page_toronto.content, header = 0)[0]
df_toronto_preprocess.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [4]:
# remove cell that contain not assigned from Borough column
df_toronto_preprocess = df_toronto_preprocess[df_toronto_preprocess.Borough !='Not assigned']
df_toronto_preprocess.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [5]:
# Find if there are any Not assigned in Neighbourhood and then assign Borough name to it.
df_toronto_preprocess.loc[df_toronto_preprocess.Neighbourhood == 'Not assigned']

Unnamed: 0,Postal Code,Borough,Neighbourhood


In [6]:
# Looks like there are no 'Not assigned' neighbourhood in the table, therefore dataframe is fully formatted

In [7]:
# Rename as formatted data frame.
df_toronto_formatted = df_toronto_preprocess
df_toronto_formatted.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [8]:
# Group the neighbourhood with same postal code
df_toronto_formatted = df_toronto_formatted.groupby(['Postal Code', 'Borough'])['Neighbourhood'].apply(lambda x: '.'.join(x))

# reset the index in the dataframe
df_toronto_formatted = df_toronto_formatted.reset_index()
df_toronto_formatted.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [9]:
df_toronto_formatted.shape


(103, 3)

# this answers first part of submission

In [10]:
# Get the latitude and the longitude coordinates of each neighborhood using csv file 

url = 'http://cocl.us/Geospatial_data'
df_toronto_geo=pd.read_csv(url)
df_toronto_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [11]:
#check the sshape o the csv file
df_toronto_geo.shape

(103, 3)

In [12]:
# Both tables have the same number of columns and rows, can join longitude and latitude as new colums to df_toronto_formatted dataframe.

# No need to change name of column as they are same


df_toronto_formatted = pd.merge(df_toronto_formatted, df_toronto_geo)

df_toronto_formatted.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# This answers second part of submission

In [13]:
# Use the foursquere API to segment and cluster the neighborhoods of Toronto
# import geocoder library
!conda install -c conda-forge geocoder --yes
import geocoder
from geopy.geocoders import Nominatim

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.



In [14]:
address = 'Toronto, Ontario'
geolocator = Nominatim(user_agent = "toronto explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geographical co-ordiantes of Toronto are {}, {}.'.format(latitude, longitude))

The geographical co-ordiantes of Toronto are 43.6534817, -79.3839347.


In [15]:
import folium
# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, Borough, Neighbourhood in zip(df_toronto_formatted['Latitude'], df_toronto_formatted['Longitude'], df_toronto_formatted['Borough'], df_toronto_formatted['Neighbourhood']):
    label = '{}, {}'.format(Neighbourhood, Borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.6,
        ).add_to(map_Toronto)  
    
map_Toronto

In [16]:
# Define foursquare cridentials 
CLIENT_ID = 'PK4BH4MYKQKYUVXDUY403J3KMCAGSQ2TSPQJKDCECO5VLTIC' 
CLIENT_SECRET = 'KDAHVE0IMPH0XDGSV2JEXW41QI1E3MD2LGJPCWZ4OHU0EZUH' 
VERSION = '20180604' 

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: PK4BH4MYKQKYUVXDUY403J3KMCAGSQ2TSPQJKDCECO5VLTIC
CLIENT_SECRET:KDAHVE0IMPH0XDGSV2JEXW41QI1E3MD2LGJPCWZ4OHU0EZUH


In [17]:
# Explore the data, and get the venues in 500 meters range from first entry

neighborhood_latitude = df_toronto_formatted.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df_toronto_formatted.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = df_toronto_formatted.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Malvern, Rouge are 43.806686299999996, -79.19435340000001.


In [18]:
# Create a GET request URL
Limit = 100
radius = 1000
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v= {}&ll={},{}&radius={}&limit{}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    Limit)
url

'https://api.foursquare.com/v2/venues/explore?client_id=PK4BH4MYKQKYUVXDUY403J3KMCAGSQ2TSPQJKDCECO5VLTIC&client_secret=KDAHVE0IMPH0XDGSV2JEXW41QI1E3MD2LGJPCWZ4OHU0EZUH&v= 20180604&ll=43.806686299999996,-79.19435340000001&radius=1000&limit100'

In [19]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5f8a0e2a0bf98854152875bd'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Malvern',
  'headerFullLocation': 'Malvern, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 19,
  'suggestedBounds': {'ne': {'lat': 43.81568630900001,
    'lng': -79.18190576146081},
   'sw': {'lat': 43.797686290999984, 'lng': -79.20680103853921}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4d669cba83865481c948fa53',
       'name': 'Images Salon & Spa',
       'location': {'address': '8130 Sheppard Ave E',
        'crossStreet': 'Morningside Ave',
        'lat': 43.80228301948931,
        'lng': -79.19856472801668,
        'labeledLatLngs'

In [20]:
# define function to extract the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [21]:
import json
from pandas.io.json import json_normalize

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  nearby_venues = json_normalize(venues) # flatten JSON


Unnamed: 0,name,categories,lat,lng
0,Images Salon & Spa,Spa,43.802283,-79.198565
1,Harvey's,Restaurant,43.80002,-79.198307
2,Caribbean Wave,Caribbean Restaurant,43.798558,-79.195777
3,Staples Morningside,Paper / Office Supplies Store,43.800285,-79.196607
4,Wendy's,Fast Food Restaurant,43.802008,-79.19808


In [22]:
# Generalize to obtain the venues from all neighbourhoods in Toronto
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            Limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [23]:
toronto_venues = getNearbyVenues(names=df_toronto_formatted['Neighbourhood'],
                                   latitudes=df_toronto_formatted['Latitude'],
                                   longitudes=df_toronto_formatted['Longitude']
                                )

Malvern, Rouge
Rouge Hill, Port Union, Highland Creek
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
Kennedy Park, Ionview, East Birchmount Park
Golden Mile, Clairlea, Oakridge
Cliffside, Cliffcrest, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Wexford Heights, Scarborough Town Centre
Wexford, Maryvale
Agincourt
Clarks Corners, Tam O'Shanter, Sullivan
Milliken, Agincourt North, Steeles East, L'Amoreaux East
Steeles West, L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
York Mills, Silver Hills
Willowdale, Newtonbrook
Willowdale, Willowdale East
York Mills West
Willowdale, Willowdale West
Parkwoods
Don Mills
Don Mills
Bathurst Manor, Wilson Heights, Downsview North
Northwood Park, York University
Downsview
Downsview
Downsview
Downsview
Victoria Village
Parkview Hill, Woodbine Gardens
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto, Broadview North (Old East York)
The Danforth West, 

In [24]:
# size of the dataframe
toronto_venues.shape

(4876, 7)

In [25]:
# dataframe head
toronto_venues.head()

Unnamed: 0,Neighbourhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.806686,-79.194353,Images Salon & Spa,43.802283,-79.198565,Spa
1,"Malvern, Rouge",43.806686,-79.194353,Harvey's,43.80002,-79.198307,Restaurant
2,"Malvern, Rouge",43.806686,-79.194353,Caribbean Wave,43.798558,-79.195777,Caribbean Restaurant
3,"Malvern, Rouge",43.806686,-79.194353,Staples Morningside,43.800285,-79.196607,Paper / Office Supplies Store
4,"Malvern, Rouge",43.806686,-79.194353,Wendy's,43.802008,-79.19808,Fast Food Restaurant


In [26]:
# groupby venues in neighbourhood and count
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,50,50,50,50,50,50
"Alderwood, Long Branch",24,24,24,24,24,24
"Bathurst Manor, Wilson Heights, Downsview North",29,29,29,29,29,29
Bayview Village,14,14,14,14,14,14
"Bedford Park, Lawrence Manor East",41,41,41,41,41,41
...,...,...,...,...,...,...
"Willowdale, Willowdale West",11,11,11,11,11,11
Woburn,8,8,8,8,8,8
Woodbine Heights,26,26,26,26,26,26
York Mills West,20,20,20,20,20,20


In [27]:
# No of categories using one hot encoding

toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,Afghan Restaurant,African Restaurant,Airport,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,...,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [28]:
toronto_onehot.shape

(4876, 335)

In [29]:
# group by neighbourhood

toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighbourhood,Accessories Store,Afghan Restaurant,African Restaurant,Airport,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,...,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,...,0.02439,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0


In [30]:
toronto_grouped.shape

(98, 335)

In [31]:
# Get the top 10 for each neighbourhood
#define the function for most common venues

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [32]:
import numpy as np
top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Shopping Mall,Bakery,Coffee Shop,Caribbean Restaurant,Sandwich Place,Lounge,Skating Rink,Latin American Restaurant,Sushi Restaurant
1,"Alderwood, Long Branch",Discount Store,Pharmacy,Convenience Store,Pizza Place,Gas Station,Shopping Mall,Liquor Store,Donut Shop,Park,Sandwich Place
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Frozen Yogurt Shop,Dog Run,Gas Station,Chinese Restaurant,Sushi Restaurant,Supermarket,Middle Eastern Restaurant,Trail
3,Bayview Village,Bank,Grocery Store,Japanese Restaurant,Gas Station,Chinese Restaurant,Park,Restaurant,Café,Skating Rink,Trail
4,"Bedford Park, Lawrence Manor East",Italian Restaurant,Coffee Shop,Park,Pizza Place,Sandwich Place,Bank,Café,Bagel Shop,Thai Restaurant,Bakery


In [33]:
# Cluster neighbourhood
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([3, 3, 3, 3, 2, 2, 0, 2, 2, 2])

In [34]:
# combine top 10 and cluster from each neighbourhood
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df_toronto_formatted

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

# check last columns.
toronto_merged.head() 

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,2.0,Coffee Shop,Trail,Fast Food Restaurant,Bank,Restaurant,Chinese Restaurant,Paper / Office Supplies Store,Bakery,Gym,Park
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,0.0,Playground,Burger Joint,Italian Restaurant,Park,Breakfast Spot,Food & Drink Shop,Field,Escape Room,Ethiopian Restaurant,Event Space
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,3.0,Pizza Place,Fast Food Restaurant,Bank,Coffee Shop,Pharmacy,Liquor Store,Bus Line,Sandwich Place,Supermarket,Greek Restaurant
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0.0,Coffee Shop,Park,Chinese Restaurant,Fast Food Restaurant,Indian Restaurant,Mobile Phone Shop,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,3.0,Coffee Shop,Pharmacy,Bank,Gas Station,Indian Restaurant,Bakery,Music Store,Hakka Restaurant,Caribbean Restaurant,Chinese Restaurant


In [35]:
# Plot the clusters in the map
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# drop NaN in the dataframe

toronto_merged_nonan = toronto_merged.dropna(subset=['Cluster Labels'])

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged_nonan['Latitude'], toronto_merged_nonan['Longitude'], toronto_merged_nonan['Neighbourhood'], toronto_merged_nonan['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [36]:
# Examine clusters - Cluster 1
toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 0, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,0.0,Playground,Burger Joint,Italian Restaurant,Park,Breakfast Spot,Food & Drink Shop,Field,Escape Room,Ethiopian Restaurant,Event Space
3,Scarborough,0.0,Coffee Shop,Park,Chinese Restaurant,Fast Food Restaurant,Indian Restaurant,Mobile Phone Shop,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant
9,Scarborough,0.0,Park,Convenience Store,Auto Workshop,Gym Pool,Gym,General Entertainment,Diner,Restaurant,College Stadium,Skating Rink
23,North York,0.0,Park,Restaurant,Coffee Shop,Gym,Pet Store,Dog Run,Chinese Restaurant,Bowling Alley,Playground,Grocery Store
50,Downtown Toronto,0.0,Coffee Shop,Park,Grocery Store,Breakfast Spot,Bistro,Bank,Sandwich Place,BBQ Joint,Filipino Restaurant,Athletics & Sports
94,Etobicoke,0.0,Park,Pizza Place,Restaurant,Bank,Grocery Store,Gym,Mexican Restaurant,Clothing Store,Fish & Chips Shop,Hotel
97,North York,0.0,Auto Workshop,Park,Golf Course,Convenience Store,Bakery,Intersection,Storage Facility,Discount Store,Gas Station,Zoo


In [37]:
# Examine clusters - Cluster 2
toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 1, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
102,Etobicoke,1.0,Lounge,Coffee Shop,Zoo,Field,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant


In [38]:
# Examine clusters - Cluster 3
toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 2, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,2.0,Coffee Shop,Trail,Fast Food Restaurant,Bank,Restaurant,Chinese Restaurant,Paper / Office Supplies Store,Bakery,Gym,Park
18,North York,2.0,Coffee Shop,Clothing Store,Japanese Restaurant,Bank,Bakery,Sandwich Place,Restaurant,Juice Bar,Toy / Game Store,Salon / Barbershop
21,North York,2.0,Korean Restaurant,Café,Park,Diner,Coffee Shop,Pizza Place,Middle Eastern Restaurant,Shopping Mall,Supermarket,Sandwich Place
22,North York,2.0,Coffee Shop,Korean Restaurant,Bubble Tea Shop,Ramen Restaurant,Pizza Place,Japanese Restaurant,Fast Food Restaurant,Sandwich Place,Sushi Restaurant,Bank
26,North York,2.0,Restaurant,Coffee Shop,Japanese Restaurant,Gym,Supermarket,Bank,Burger Joint,Mobile Phone Shop,Café,Asian Restaurant
27,North York,2.0,Restaurant,Coffee Shop,Japanese Restaurant,Gym,Supermarket,Bank,Burger Joint,Mobile Phone Shop,Café,Asian Restaurant
36,East York,2.0,Coffee Shop,Park,Café,Skating Rink,Pizza Place,Sandwich Place,Dance Studio,Athletics & Sports,Curling Ice,Farmers Market
37,East Toronto,2.0,Pub,Coffee Shop,Pizza Place,Breakfast Spot,Beach,Japanese Restaurant,Health Food Store,Caribbean Restaurant,Sandwich Place,Bar
38,East York,2.0,Sporting Goods Shop,Coffee Shop,Furniture / Home Store,Electronics Store,Grocery Store,Burger Joint,Restaurant,Sports Bar,Bank,Brewery
40,East York,2.0,Café,Coffee Shop,Greek Restaurant,Beer Bar,Fast Food Restaurant,Ethiopian Restaurant,Pharmacy,Pizza Place,Bakery,Sandwich Place


In [39]:
# Examine clusters - Cluster 4
toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 3, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Scarborough,3.0,Pizza Place,Fast Food Restaurant,Bank,Coffee Shop,Pharmacy,Liquor Store,Bus Line,Sandwich Place,Supermarket,Greek Restaurant
4,Scarborough,3.0,Coffee Shop,Pharmacy,Bank,Gas Station,Indian Restaurant,Bakery,Music Store,Hakka Restaurant,Caribbean Restaurant,Chinese Restaurant
5,Scarborough,3.0,Ice Cream Shop,Grocery Store,Restaurant,Fast Food Restaurant,Sandwich Place,Pizza Place,Bowling Alley,Japanese Restaurant,Coffee Shop,Convenience Store
6,Scarborough,3.0,Coffee Shop,Chinese Restaurant,Grocery Store,Fast Food Restaurant,Pizza Place,Discount Store,Bank,Asian Restaurant,Sandwich Place,Light Rail Station
7,Scarborough,3.0,Bus Line,Coffee Shop,Bakery,Intersection,Pharmacy,Soccer Field,Metro Station,Beer Store,General Entertainment,Sandwich Place
8,Scarborough,3.0,Pizza Place,Ice Cream Shop,Beach,Hardware Store,Restaurant,Park,Sports Bar,Hookah Bar,Farm,Eastern European Restaurant
10,Scarborough,3.0,Electronics Store,Coffee Shop,Restaurant,Indian Restaurant,Fast Food Restaurant,Chinese Restaurant,Asian Restaurant,Bakery,Pharmacy,Discount Store
11,Scarborough,3.0,Middle Eastern Restaurant,Pizza Place,Grocery Store,Supermarket,Bakery,Korean Restaurant,Soccer Field,Coffee Shop,Gas Station,Seafood Restaurant
12,Scarborough,3.0,Chinese Restaurant,Shopping Mall,Bakery,Coffee Shop,Caribbean Restaurant,Sandwich Place,Lounge,Skating Rink,Latin American Restaurant,Sushi Restaurant
13,Scarborough,3.0,Coffee Shop,Pharmacy,Bank,Pizza Place,Fast Food Restaurant,Convenience Store,Sandwich Place,Intersection,Chinese Restaurant,Taiwanese Restaurant


In [40]:
# Examine clusters - Cluster 5
toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 4, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
20,North York,4.0,Park,Pool,Zoo,Field,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market
91,Etobicoke,4.0,Park,Ice Cream Shop,Italian Restaurant,Bus Stop,Shopping Mall,Eastern European Restaurant,Gym / Fitness Center,Dessert Shop,Design Studio,Event Space
