# Segmenting and Clustering Neighborhoods in Toronto

## The first part

In [1]:
import pandas as pd
print('library imported')

library imported


In [2]:
tables = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
print('table parsed')

table parsed


In [3]:
# The dataframe consist of three columns: PostalCode, Borough, and Neighborhood
postalcode = tables[0]
postalcode

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [4]:
#  Ignore cells with a borough that is Not assigned.
postalcode = postalcode[postalcode['Borough'] != "Not assigned"]
postalcode

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [5]:
# More than one neighborhood can exist in one postal code area, for example 'M5A'
postalcode.iloc[2,:]

Postal Code                            M5A
Borough                   Downtown Toronto
Neighbourhood    Regent Park, Harbourfront
Name: 4, dtype: object

In [6]:
# If a cell has a borough but a Not assigned  neighborhood, then the neighborhood will be the same as the borough.
# split the table into two table. postcodeA for cell has a borough but a Not assigned  neighborhood
postalcodeA = postalcode[postalcode['Neighbourhood'] == 'Not assigned']
postalcodeA['Neighbourhood'] = postalcodeA['Borough']
#postalcodeA

# postcodeB for the rest
postalcodeB = postalcode[postalcode['Neighbourhood'] != 'Not assigned']
#postalcodeB

# combine the two tables. postcodeA & postcodeB
postalcode = postalcodeA.append(postalcodeB)
postalcode

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [7]:
# use the .shape method to print the number of rows of your dataframe
postalcode.shape

(103, 3)

## The second part

In [8]:
# install library -- grocoder
!pip install geocoder

# import geocoder
import geocoder
print('library imported')

library imported


In [9]:
# create a function to get latitude and longitude
def getll(postal_code):
    # initialize your variable to None
    lat_lng_coords = None
    
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        if True:
            g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code))
        if g.latlng is None:
            g = geocoder.geolytica('{}, Toronto, Ontario'.format(postal_code))
        if g.latlng is None:
            g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng
    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    return latitude, longitude
        

print('function created')

function created


In [10]:
# define neighbourhoods range
#postalcode_q = postalcode #uncomment for all
#postalcode_q = postalcode.sample(n=2) #uncomment for test
postalcode_q = postalcode[postalcode['Borough'].str.contains('Toronto')] #uncomment for boroughs that contain the word Toronto

i = postalcode_q.shape[0]
print('neighbourhoods range defined')

neighbourhoods range defined


In [11]:
# create table with latitude and longitude
column_names = ['Postal Code', 'Borough', 'Neighbourhood', 'Latitude', 'Longitude'] 
neighbourhoods = pd.DataFrame(columns=column_names)
for P, B, N in zip(postalcode_q['Postal Code'], postalcode_q['Borough'], postalcode_q['Neighbourhood']):
    print(i, P, B, N)
    i += -1
    lat, lon = getll(P)
    neighbourhoods = neighbourhoods.append({'Postal Code': P,
                                          'Borough': B,
                                          'Neighbourhood': N,
                                          'Latitude': lat,
                                          'Longitude': lon
                                         }, ignore_index=True)
neighbourhoods

39 M5A Downtown Toronto Regent Park, Harbourfront
38 M7A Downtown Toronto Queen's Park, Ontario Provincial Government
37 M5B Downtown Toronto Garden District, Ryerson
36 M5C Downtown Toronto St. James Town
35 M4E East Toronto The Beaches
34 M5E Downtown Toronto Berczy Park
33 M5G Downtown Toronto Central Bay Street
32 M6G Downtown Toronto Christie
31 M5H Downtown Toronto Richmond, Adelaide, King
30 M6H West Toronto Dufferin, Dovercourt Village
29 M5J Downtown Toronto Harbourfront East, Union Station, Toronto Islands
28 M6J West Toronto Little Portugal, Trinity
27 M4K East Toronto The Danforth West, Riverdale
26 M5K Downtown Toronto Toronto Dominion Centre, Design Exchange
25 M6K West Toronto Brockton, Parkdale Village, Exhibition Place
24 M4L East Toronto India Bazaar, The Beaches West
23 M5L Downtown Toronto Commerce Court, Victoria Hotel
22 M4M East Toronto Studio District
21 M4N Central Toronto Lawrence Park
20 M5N Central Toronto Roselawn
19 M4P Central Toronto Davisville North
18 

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66253,-79.39188
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.65739,-79.37804
3,M5C,Downtown Toronto,St. James Town,43.65215,-79.37587
4,M4E,East Toronto,The Beaches,43.67709,-79.29547
5,M5E,Downtown Toronto,Berczy Park,43.64536,-79.37306
6,M5G,Downtown Toronto,Central Bay Street,43.65609,-79.38493
7,M6G,Downtown Toronto,Christie,43.66869,-79.42071
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.6497,-79.38258
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.66505,-79.43891


## The third part

In [12]:
import requests
print('library imported')

library imported


In [13]:
# create a function to get nearby venues
def getNearbyVenues(names, latitudes, longitudes, radius = 500):
    venues_list = []
    
    for name, lat, lon in zip(names, latitudes, longitudes):
        url = 'https://api.foursquare.com/v2/venues/explore?ll={},{}&radius={}&limit={}&client_id={}&client_secret={}&v={}'.format(\
            lat,lon,radius,100,'OSAYJYNU10234WU1ECUOQWQOTCRIOZI50LZMGS5HIFM2UXDJ','3LS42SYJOQ3PBY2JK21DTQ53CHDMSMJKIM2G0ZTZE4K1HM3G','20210101')
        r = requests.get(url).json()['response']['groups'][0]['items']
        venues_list.append([(name, lat, lon, 
            v['venue']['name'], v['venue']['location']['lat'], v['venue']['location']['lng'], v['venue']['categories'][0]['name']) for v in r])
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

print('function created')

function created


In [14]:
Toronto_venues = getNearbyVenues(neighbourhoods['Neighbourhood'],neighbourhoods['Latitude'],neighbourhoods['Longitude'])

print("there are {} unique category".format(len(Toronto_venues['Venue Category'].unique())))

Toronto_venues.groupby('Neighbourhood').count()


there are 225 unique category


Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,62,62,62,62,62,62
"Brockton, Parkdale Village, Exhibition Place",86,86,86,86,86,86
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",100,100,100,100,100,100
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",75,75,75,75,75,75
Central Bay Street,61,61,61,61,61,61
Christie,11,11,11,11,11,11
Church and Wellesley,80,80,80,80,80,80
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
Davisville,27,27,27,27,27,27
Davisville North,7,7,7,7,7,7


In [15]:
# Analyze
# one hot encoding
toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighbourhood'] = Toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1697,"Business reply mail Processing Centre, South C...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1698,"Business reply mail Processing Centre, South C...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1699,"Business reply mail Processing Centre, South C...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1700,"Business reply mail Processing Centre, South C...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [16]:
# group by neighbourhoods and take mean of the frequency
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.016129,0.0,0.016129,0.0,0.0,0.0,...,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.016129
1,"Brockton, Parkdale Village, Exhibition Place",0.011628,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,...,0.0,0.0,0.0,0.011628,0.0,0.0,0.0,0.0,0.0,0.011628
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.01,0.03,...,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013333,...,0.0,0.0,0.0,0.0,0.013333,0.0,0.0,0.0,0.0,0.013333
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.016393,0.016393,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.016393,0.016393,0.016393,0.0,0.0
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.0,0.0,0.0125,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0125
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.04,0.0,0.0,0.01,0.0,0.0,0.01,...,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
# print each neighbourhoods along with top 5 venues
num_top_venues = 5

for hood in toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
                venue  freq
0         Coffee Shop  0.08
1        Cocktail Bar  0.05
2  Seafood Restaurant  0.05
3            Beer Bar  0.03
4      Breakfast Spot  0.03


----Brockton, Parkdale Village, Exhibition Place----
         venue  freq
0          Bar  0.07
1  Coffee Shop  0.06
2         Café  0.06
3   Restaurant  0.05
4    Nightclub  0.03


----Business reply mail Processing Centre, South Central Letter Processing Plant Toronto----
              venue  freq
0       Coffee Shop  0.07
1             Hotel  0.06
2              Café  0.04
3  Asian Restaurant  0.03
4  Sushi Restaurant  0.03


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
                venue  freq
0         Coffee Shop  0.07
1  Italian Restaurant  0.07
2                Café  0.07
3                 Bar  0.04
4   French Restaurant  0.04


----Central Bay Street----
                       venue  freq
0                Coffee Shop  0

In [18]:
# create function to sort the venues in descending order

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]
print('function created')

function created


In [19]:
import numpy as np
print('library imported')

library imported


In [20]:
# top 10 venues for each neighboourhoods
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Seafood Restaurant,Cocktail Bar,Farmers Market,Restaurant,Breakfast Spot,Cheese Shop,Beer Bar,Bakery,Museum
1,"Brockton, Parkdale Village, Exhibition Place",Bar,Coffee Shop,Café,Restaurant,Sandwich Place,Nightclub,Gift Shop,Lounge,Japanese Restaurant,Italian Restaurant
2,"Business reply mail Processing Centre, South C...",Coffee Shop,Hotel,Café,Sushi Restaurant,Asian Restaurant,Concert Hall,Mediterranean Restaurant,Steakhouse,Seafood Restaurant,Sandwich Place
3,"CN Tower, King and Spadina, Railway Lands, Har...",Italian Restaurant,Coffee Shop,Café,Park,French Restaurant,Bar,Speakeasy,Lounge,Bakery,Sandwich Place
4,Central Bay Street,Coffee Shop,Clothing Store,Plaza,Cosmetics Shop,Middle Eastern Restaurant,Restaurant,Bubble Tea Shop,Sandwich Place,Shoe Store,Department Store


In [21]:
from sklearn.cluster import KMeans
print('library imported')

library imported


In [22]:
# cluster neighbourhoods
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:100] 

#toronto_grouped_clustering

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 2, 0,
       0, 0, 0, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [23]:
# create a new data frame to includes top10 venues for each neeighbourhoods
# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = neighbourhoods

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')
toronto_merged = toronto_merged.replace(np.nan, 0)

toronto_merged

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264,0.0,Coffee Shop,Breakfast Spot,Yoga Studio,Thai Restaurant,Spa,Event Space,Food Truck,Electronics Store,Restaurant,Bakery
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66253,-79.39188,0.0,Coffee Shop,Sandwich Place,Park,Theater,Mediterranean Restaurant,Café,Falafel Restaurant,Fried Chicken Joint,Burrito Place,Bank
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.65739,-79.37804,0.0,Coffee Shop,Clothing Store,Cosmetics Shop,Movie Theater,Café,Middle Eastern Restaurant,Japanese Restaurant,Italian Restaurant,Furniture / Home Store,Sandwich Place
3,M5C,Downtown Toronto,St. James Town,43.65215,-79.37587,0.0,Coffee Shop,Italian Restaurant,Hotel,Café,Gastropub,Cosmetics Shop,Clothing Store,Cocktail Bar,Theater,Seafood Restaurant
4,M4E,East Toronto,The Beaches,43.67709,-79.29547,0.0,Health Food Store,Asian Restaurant,Coffee Shop,Pub,Trail,Neighborhood,Farm,Falafel Restaurant,Event Space,Dry Cleaner
5,M5E,Downtown Toronto,Berczy Park,43.64536,-79.37306,0.0,Coffee Shop,Seafood Restaurant,Cocktail Bar,Farmers Market,Restaurant,Breakfast Spot,Cheese Shop,Beer Bar,Bakery,Museum
6,M5G,Downtown Toronto,Central Bay Street,43.65609,-79.38493,0.0,Coffee Shop,Clothing Store,Plaza,Cosmetics Shop,Middle Eastern Restaurant,Restaurant,Bubble Tea Shop,Sandwich Place,Shoe Store,Department Store
7,M6G,Downtown Toronto,Christie,43.66869,-79.42071,0.0,Café,Grocery Store,Baby Store,Coffee Shop,Italian Restaurant,Playground,Candy Store,Athletics & Sports,Yoga Studio,Ethiopian Restaurant
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.6497,-79.38258,0.0,Hotel,Restaurant,Café,Coffee Shop,Gym,Japanese Restaurant,American Restaurant,Asian Restaurant,Steakhouse,Salad Place
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.66505,-79.43891,0.0,Park,Athletics & Sports,Middle Eastern Restaurant,Smoke Shop,Bar,Bank,Bakery,Pool,Furniture / Home Store,Café


In [24]:
# install library -- folium
!pip install folium

import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
print('library imported')

library imported


In [25]:
# visualize the result
lat = getll('')[0]
lon = getll('')[1]
map_clusters = folium.Map(location=[lat, lon], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine cluster


In [26]:
# Cluster n
n = 0
toronto_merged.loc[toronto_merged['Cluster Labels'] == n, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,0.0,Coffee Shop,Breakfast Spot,Yoga Studio,Thai Restaurant,Spa,Event Space,Food Truck,Electronics Store,Restaurant,Bakery
1,Downtown Toronto,0.0,Coffee Shop,Sandwich Place,Park,Theater,Mediterranean Restaurant,Café,Falafel Restaurant,Fried Chicken Joint,Burrito Place,Bank
2,Downtown Toronto,0.0,Coffee Shop,Clothing Store,Cosmetics Shop,Movie Theater,Café,Middle Eastern Restaurant,Japanese Restaurant,Italian Restaurant,Furniture / Home Store,Sandwich Place
3,Downtown Toronto,0.0,Coffee Shop,Italian Restaurant,Hotel,Café,Gastropub,Cosmetics Shop,Clothing Store,Cocktail Bar,Theater,Seafood Restaurant
4,East Toronto,0.0,Health Food Store,Asian Restaurant,Coffee Shop,Pub,Trail,Neighborhood,Farm,Falafel Restaurant,Event Space,Dry Cleaner
5,Downtown Toronto,0.0,Coffee Shop,Seafood Restaurant,Cocktail Bar,Farmers Market,Restaurant,Breakfast Spot,Cheese Shop,Beer Bar,Bakery,Museum
6,Downtown Toronto,0.0,Coffee Shop,Clothing Store,Plaza,Cosmetics Shop,Middle Eastern Restaurant,Restaurant,Bubble Tea Shop,Sandwich Place,Shoe Store,Department Store
7,Downtown Toronto,0.0,Café,Grocery Store,Baby Store,Coffee Shop,Italian Restaurant,Playground,Candy Store,Athletics & Sports,Yoga Studio,Ethiopian Restaurant
8,Downtown Toronto,0.0,Hotel,Restaurant,Café,Coffee Shop,Gym,Japanese Restaurant,American Restaurant,Asian Restaurant,Steakhouse,Salad Place
9,West Toronto,0.0,Park,Athletics & Sports,Middle Eastern Restaurant,Smoke Shop,Bar,Bank,Bakery,Pool,Furniture / Home Store,Café


In [27]:
# Cluster n
n = 1
toronto_merged.loc[toronto_merged['Cluster Labels'] == n, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
29,Central Toronto,1.0,Gym,Trail,Lawyer,Yoga Studio,Dry Cleaner,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm


In [28]:
# Cluster n
n = 2
toronto_merged.loc[toronto_merged['Cluster Labels'] == n, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
23,Central Toronto,2.0,Gym Pool,Playground,Park,Yoga Studio,Donut Shop,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
33,Downtown Toronto,2.0,Park,Playground,Bike Trail,Yoga Studio,Dumpling Restaurant,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm


In [29]:
# Cluster n
n = 3
toronto_merged.loc[toronto_merged['Cluster Labels'] == n, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Central Toronto,3.0,Bus Line,Swim School,Yoga Studio,Eastern European Restaurant,Flea Market,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm


In [30]:
# Cluster n
n = 4
toronto_merged.loc[toronto_merged['Cluster Labels'] == n, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,Central Toronto,4.0,Home Service,Fast Food Restaurant,Yoga Studio,Dumpling Restaurant,Flea Market,Fish Market,Fish & Chips Shop,Farmers Market,Farm,Falafel Restaurant


In [31]:
# Thank you and have a nice day!