# Applied Data Science Capstone - Notebook Week 3


__Segmenting and Clustering Neighborhoods in Toronto__


### Section #1 - Prepare neighborhood data

<span style="color:darkred">*Import the dependencies*</span>

In [1]:
import numpy as np
import pandas as pd

<span style="color:darkred">*Retrieve neighborhood data from Wikipedia*</span><br>
<span style="color:darkred">*We use a specific version of the page since it has been modified several times*</span>

In [2]:
df = pd.read_html('https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&direction=prev&oldid=926287641')[0]
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
...,...,...,...
283,M8Z,Etobicoke,Mimico NW
284,M8Z,Etobicoke,The Queensway West
285,M8Z,Etobicoke,Royal York South West
286,M8Z,Etobicoke,South of Bloor


<span style="color:darkred">*Fix the colums names*</span>

In [3]:
df.columns = ['PostalCode', 'Borough', 'Neighborhood']

<span style="color:darkred">*Filter out the post codes with no assigned borough*</span>

In [4]:
df = df[df['Borough'] != 'Not assigned'].reset_index(drop=True)
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights
...,...,...,...
206,M8Z,Etobicoke,Kingsway Park South West
207,M8Z,Etobicoke,Mimico NW
208,M8Z,Etobicoke,The Queensway West
209,M8Z,Etobicoke,Royal York South West


<span style="color:darkred">*Assign unassigned neighborhoods to same as borough, if any*</span>

In [5]:
print('Before: {} neighborhoods not assigned'.format(len(df[df['Neighborhood'] == 'Not assigned'])))
df['Neighborhood'] = df['Neighborhood'].replace('Not assigned', df['Borough'])
print('After: {} neighborhoods not assigned'.format(len(df[df['Neighborhood'] == 'Not assigned'])))

Before: 1 neighborhoods not assigned
After: 0 neighborhoods not assigned


<span style="color:darkred">*Group together neighborhoods with same post code*</span>

In [6]:
df = df.groupby(['PostalCode', 'Borough'])['Neighborhood'].apply(', '.join).reset_index()
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv..."
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ..."


### Section #2 - Add geospacial data

<span style="color:darkred">*Define the function that retrieves the geospacial coordinates for a given postal code.*</span><br>
<span style="color:darkred">*We use geocoder.arcgis instead of geocoder.google as the latter doesn't seem to work. As a consequence, we will retrieve lattude and longitude coordinates that are slightly different from what is provided in the csv file but that shouldn't change the final clustering results.*</span>

In [7]:
import geocoder # import geocoder

def retrieve_geo_data(postal_code):
    # initialize your variable to None
    lat_lng_coords = None

    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng

    # found coordinates, return
    return lat_lng_coords[0], lat_lng_coords[1]

<span style="color:darkred">*Iterate through postal codes and add the corresponding latitudes and longitudes in the dataframe*</span>

In [8]:
# initialize
latitude = []
longitude = []

# iterate
for postalCode in df['PostalCode']:
    lat, long = retrieve_geo_data(postalCode)
    latitude.append(lat)
    longitude.append(long)
df['Latitude'] = latitude
df['Longitude'] = longitude

# show results
df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.81139,-79.19662
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.78574,-79.15875
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.76575,-79.17470
3,M1G,Scarborough,Woburn,43.76812,-79.21761
4,M1H,Scarborough,Cedarbrae,43.76944,-79.23892
...,...,...,...,...,...
98,M9N,York,Weston,43.70507,-79.51804
99,M9P,Etobicoke,Westmount,43.69630,-79.52926
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv...",43.68681,-79.55728
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.74453,-79.58624


### Section #3 - Clustering

##### <span style="color:darkred">1- Display Toronto neighborhoods</span>

<span style="color:darkred">*Determine Toronto coordinates that will be used as the center of the Folium map*</span>

In [9]:
from geopy.geocoders import Nominatim

geolocator = Nominatim(user_agent="capstone_explorer")
loc = geolocator.geocode("Toronto, ON")
toronto_coord = [loc.latitude, loc.longitude]

<span style="color:darkred">*Define a function that adds circle markers of specific color to the map*</spn>

In [10]:
import folium

def add_markers_to_map(df, color, add_label, map):
    for lat, lng, lbl in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
        label = folium.Popup("({}) {}".format(add_label, lbl), parse_html=True)
        folium.CircleMarker(
            [lat,lng], radius=5, popup=label,
            color=color, fill=True, fill_color=color, fill_opacity = 0.7,
            parse_html=False).add_to(map)

<span style="color:darkred">*Test the function: show the neiborhoods with a color corresponding to the borough*</span>

In [11]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create and center map
map_toronto = folium.Map(location=[loc.latitude, loc.longitude], zoom_start=11)

# define color range
borough_list = df['Borough'].unique().tolist()
colors_array = cm.rainbow(np.linspace(0, 1, len(borough_list)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# iterate on boroughs
for i, borough in enumerate(borough_list):
    add_markers_to_map(df[df['Borough']==borough].reset_index(),
        rainbow[i], "Borough='{}'".format(borough_list[i]), map_toronto)
    
# show map
map_toronto

##### <span style="color:darkred">2- Explore neighborhoods' nearby venues</span>

<span style="color:darkred">*We will cluster Toronto's neighborhoods according to the similarities of their nearby venues. We'll use the __proportion of venues of each type__ (category) found within __500 meters__ of the center of the neighborhoods as criterien for the K-means algorithm, and we will use __4 clusters__.*</span>

In [12]:
RADIUS = 500
NB_CLUSTERS = 4

<span style="color:darkred">*First we retrieve all venues from Foursquare*</span>

In [13]:
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

CLIENT_ID = 'X44GZE3TFHMZN4XSSRDPNQELI4O3WQXC2XBBI5VOWY52J3AE' # Foursquare ID
CLIENT_SECRET = 'BQSEQ22WUR5Z1P25YZN0GPBWK0T0PTKJ3C5HBW0YLWXP2A1C' # Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # default Foursquare API limit value

# iterate over neighboorhoods
venues_list=[]
for name, lat, lng in zip(df['Neighborhood'], df['Latitude'], df['Longitude']):
    # create the API request URL
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, RADIUS, LIMIT) 
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    # return only relevant information for each nearby venue
    venues_list.append([(name, lat, lng, 
        v['venue']['name'], 
        v['venue']['location']['lat'], 
        v['venue']['location']['lng'],  
        v['venue']['categories'][0]['name']) for v in results])

# build dataframe
nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
nearby_venues.columns = ['Neighborhood Name', 'Neighborhood Latitude', 'Neighborhood Longitude', 
    'Venue',  'Venue Latitude', 'Venue Longitude', 'Venue Category']

# display results and size
nearby_venues

Unnamed: 0,Neighborhood Name,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Malvern",43.81139,-79.19662,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,"Highland Creek, Rouge Hill, Port Union",43.78574,-79.15875,Chris Effects Painting,43.784343,-79.163742,Construction & Landscaping
2,"Highland Creek, Rouge Hill, Port Union",43.78574,-79.15875,Royal Canadian Legion,43.782533,-79.163085,Bar
3,"Guildwood, Morningside, West Hill",43.76575,-79.17470,Homestead Roofing Repair,43.765140,-79.178663,Construction & Landscaping
4,"Guildwood, Morningside, West Hill",43.76575,-79.17470,Heron Park Community Centre,43.768867,-79.176958,Gym / Fitness Center
...,...,...,...,...,...,...,...
2374,Northwest,43.71174,-79.57941,Petro-Canada,43.714398,-79.581868,Gas Station
2375,Northwest,43.71174,-79.57941,Rexdale Hyundai,43.713733,-79.578123,Auto Dealership
2376,Northwest,43.71174,-79.57941,U-Haul Moving & Storage at Rexdale Blvd,43.713500,-79.577307,Storage Facility
2377,Northwest,43.71174,-79.57941,Fitness Two 4,43.714482,-79.582903,Gym


In [14]:
print('There are {} uniques categories.'.format(len(nearby_venues['Venue Category'].unique())))

There are 266 uniques categories.


In [15]:
len(nearby_venues['Neighborhood Name'].unique())

101

<span style="color:darkred">*We can notice that Foursquare found venues around 101 neighborhoods out of the 103 initially listed, we will cluster the missing 2 neighborhoods together at the end.*</span>

<span style="color:darkred">*Now we create a dataframe with all the categories using __one-hot encoding__ then calculate the average of venues of each type per neighborhood using __groupby / mean__ aggregation method.*</span>

In [16]:
# create categories dataframe with one-hot encoding
venues_onehot = pd.get_dummies(nearby_venues[['Venue Category']], prefix="", prefix_sep="")
# add Neighborhood Name as first column
venues_onehot['Neighborhood Name'] = nearby_venues['Neighborhood Name']
new_columns = [venues_onehot.columns[-1]] + list(venues_onehot.columns[:-1])
venues_onehot = venues_onehot[new_columns]
# calculate the number of each type of venue per neighborhood
venues_avg = venues_onehot.groupby('Neighborhood Name').mean().reset_index()
# show results
venues_avg

Unnamed: 0,Neighborhood Name,ATM,Accessories Store,Adult Boutique,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,...,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.0,...,0.01,0.0,0.0,0.0000,0.0,0.01,0.0,0.0,0.0,0.0
1,Agincourt,0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,0.0,...,0.00,0.0,0.0,0.0000,0.0,0.00,0.0,0.0,0.0,0.0
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,0.0,...,0.00,0.0,0.0,0.0000,0.0,0.00,0.0,0.0,0.0,0.0
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,0.0,...,0.00,0.0,0.0,0.0625,0.0,0.00,0.0,0.0,0.0,0.0
4,"Alderwood, Long Branch",0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,0.0,...,0.00,0.0,0.0,0.0000,0.0,0.00,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,Willowdale West,0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,0.0,...,0.00,0.0,0.0,0.0000,0.0,0.00,0.0,0.0,0.0,0.0
97,Woburn,0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,0.0,...,0.00,0.0,0.0,0.0000,0.0,0.00,0.0,0.0,0.0,0.0
98,"Woodbine Gardens, Parkview Hill",0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,0.0,...,0.00,0.0,0.0,0.0000,0.0,0.00,0.0,0.0,0.0,0.0
99,Woodbine Heights,0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,0.0,...,0.00,0.0,0.0,0.0000,0.0,0.00,0.0,0.0,0.0,0.0


<span style="color:darkred">*Finally we create a dataframe showing the 5 most common venues for each neighborhood.*</span>

In [17]:
# function that returns the 5 most common venues for each neighborhood (row)
def return_most_common_venues(row):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:5]

# create columns
df_columns = ['Neighborhood Name']
for i in np.arange(5):
    df_columns.append('#{} Most Common Venue'.format(i+1))

# create a new dataframe
venues_sorted = pd.DataFrame(columns=df_columns)
venues_sorted['Neighborhood Name'] = venues_avg['Neighborhood Name']
for i in np.arange(venues_avg.shape[0]):
    venues_sorted.iloc[i, 1:] = return_most_common_venues(venues_avg.iloc[i, :])

# show results
venues_sorted.head()

Unnamed: 0,Neighborhood Name,#1 Most Common Venue,#2 Most Common Venue,#3 Most Common Venue,#4 Most Common Venue,#5 Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Hotel,Restaurant,Japanese Restaurant
1,Agincourt,Chinese Restaurant,Hong Kong Restaurant,Discount Store,Bubble Tea Shop,Skating Rink
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Pharmacy,Intersection,Yoga Studio,Farm,Elementary School
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Pharmacy,Beer Store,Fried Chicken Joint,Japanese Restaurant
4,"Alderwood, Long Branch",Convenience Store,Gym,Performing Arts Venue,Pub,Elementary School


##### <span style="color:darkred">3- Run k-Means algorithm' nearby venues</span>

<span style="color:darkred">*Run k-Means algorithm with 4 clusters*</span>

In [18]:
from sklearn.cluster import KMeans

venues_clustering = venues_avg.drop('Neighborhood Name', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=NB_CLUSTERS, random_state=0).fit(venues_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 3, 2, 2, 0, 2, 0, 2, 2,
       2, 2, 0, 2, 0, 2, 2, 2, 0, 3, 0, 0, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 0, 3, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2,
       2, 1, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2])

<span style="color:darkred">*Add clustering and geospacial information to the sorted dataframe*</span>

In [19]:
# add clustering labels
venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
toronto_merged = df.join(venues_sorted.set_index('Neighborhood Name'), on='Neighborhood')

# show results
toronto_merged

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,#1 Most Common Venue,#2 Most Common Venue,#3 Most Common Venue,#4 Most Common Venue,#5 Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.81139,-79.19662,3.0,Fast Food Restaurant,Yoga Studio,Electronics Store,Food,Flower Shop
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.78574,-79.15875,3.0,Construction & Landscaping,Bar,Farmers Market,Escape Room,Ethiopian Restaurant
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.76575,-79.17470,0.0,Construction & Landscaping,Park,Bus Stop,Gym / Fitness Center,Fish & Chips Shop
3,M1G,Scarborough,Woburn,43.76812,-79.21761,2.0,Business Service,Korean BBQ Restaurant,Park,Coffee Shop,Farm
4,M1H,Scarborough,Cedarbrae,43.76944,-79.23892,2.0,Gaming Cafe,Trail,Yoga Studio,Elementary School,Escape Room
...,...,...,...,...,...,...,...,...,...,...,...
98,M9N,York,Weston,43.70507,-79.51804,2.0,Pizza Place,Park,Diner,Fried Chicken Joint,Convenience Store
99,M9P,Etobicoke,Westmount,43.69630,-79.52926,2.0,Pizza Place,Coffee Shop,Middle Eastern Restaurant,Sandwich Place,Chinese Restaurant
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv...",43.68681,-79.55728,2.0,Pizza Place,Bus Line,Clothing Store,Music Venue,Bus Stop
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.74453,-79.58624,2.0,Grocery Store,Pharmacy,Beer Store,Fried Chicken Joint,Japanese Restaurant


<span style="color:darkred">*Analyze the number of neighborhoods per cluster*</span>

In [20]:
toronto_merged['Cluster Labels'].value_counts()

2.0    81
0.0    13
3.0     4
1.0     3
Name: Cluster Labels, dtype: int64

<span style="color:darkred">*Group the 2 neighborhoods with no nearby venues together in an additional cluster*</span>

In [21]:
print('There are {} unassigned neighborhoods.'.format(toronto_merged['Cluster Labels'].isnull().sum()))
toronto_merged['Cluster Labels'].fillna(NB_CLUSTERS, inplace=True)
print('There are {} unassigned neighborhoods.'.format(toronto_merged['Cluster Labels'].isnull().sum()))
toronto_merged['Cluster Labels'].value_counts()

There are 2 unassigned neighborhoods.
There are 0 unassigned neighborhoods.


2.0    81
0.0    13
3.0     4
1.0     3
4.0     2
Name: Cluster Labels, dtype: int64

<span style="color:darkred">*Visualize the clusters on the map*</span>

In [22]:
# recreate and center map
map_toronto = folium.Map(location=[loc.latitude, loc.longitude], zoom_start=11)

# define color range
colors_array = cm.rainbow(np.linspace(0, 1, NB_CLUSTERS+1))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# iterate on boroughs
for cluster in range(NB_CLUSTERS+1):
    add_markers_to_map(toronto_merged[toronto_merged['Cluster Labels']==cluster].reset_index(),
        rainbow[cluster], "Cluster='{}'".format(cluster), map_toronto)
    
# show map
map_toronto

##### <span style="color:darkred">4- Analyze clusters</span>

<span style="color:darkred">*Let's display the neighborhoods details cluster by cluster to spot the common types of venues*</span>

In [23]:
toronto_merged[toronto_merged['Cluster Labels']==0]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,#1 Most Common Venue,#2 Most Common Venue,#3 Most Common Venue,#4 Most Common Venue,#5 Most Common Venue
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.76575,-79.1747,0.0,Construction & Landscaping,Park,Bus Stop,Gym / Fitness Center,Fish & Chips Shop
17,M2H,North York,Hillcrest Village,43.80225,-79.35558,0.0,Park,Residential Building (Apartment / Condo),Yoga Studio,Farm,Elementary School
20,M2L,North York,"Silver Hills, York Mills",43.75698,-79.3806,0.0,Park,Yoga Studio,Farmers Market,Elementary School,Escape Room
25,M3A,North York,Parkwoods,43.75245,-79.32991,0.0,Food & Drink Shop,Park,Farmers Market,Elementary School,Escape Room
34,M4A,North York,Victoria Village,43.73057,-79.31306,0.0,German Restaurant,Park,Grocery Store,Yoga Studio,Falafel Restaurant
40,M4J,East York,East Toronto,43.68811,-79.33418,0.0,Convenience Store,Intersection,Park,Farm,Elementary School
46,M4R,Central Toronto,North Toronto West,43.71458,-79.40668,0.0,Playground,Park,Gym Pool,Yoga Studio,Farm
50,M4W,Downtown Toronto,Rosedale,43.6819,-79.37829,0.0,Park,Playground,Bike Trail,Yoga Studio,Farm
64,M5P,Central Toronto,"Forest Hill North, Forest Hill West",43.69479,-79.4144,0.0,Locksmith,Park,Yoga Studio,Farmers Market,Escape Room
79,M6L,North York,"Downsview, North Park, Upwood Park",43.71381,-79.48874,0.0,Bakery,Basketball Court,Park,Farmers Market,Escape Room


In [24]:
toronto_merged[toronto_merged['Cluster Labels']==1]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,#1 Most Common Venue,#2 Most Common Venue,#3 Most Common Venue,#4 Most Common Venue,#5 Most Common Venue
28,M3H,North York,"Bathurst Manor, Downsview North, Wilson Heights",43.75788,-79.44847,1.0,Home Service,Business Service,Electronics Store,Flower Shop,Flea Market
81,M6N,York,"The Junction North, Runnymede",43.67646,-79.48272,1.0,Brewery,Home Service,Seafood Restaurant,Furniture / Home Store,Farm
96,M9L,North York,Humber Summit,43.75948,-79.55707,1.0,Home Service,Hobby Shop,Rental Car Location,Falafel Restaurant,Electronics Store


In [25]:
toronto_merged[toronto_merged['Cluster Labels']==2]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,#1 Most Common Venue,#2 Most Common Venue,#3 Most Common Venue,#4 Most Common Venue,#5 Most Common Venue
3,M1G,Scarborough,Woburn,43.76812,-79.21761,2.0,Business Service,Korean BBQ Restaurant,Park,Coffee Shop,Farm
4,M1H,Scarborough,Cedarbrae,43.76944,-79.23892,2.0,Gaming Cafe,Trail,Yoga Studio,Elementary School,Escape Room
5,M1J,Scarborough,Scarborough Village,43.74446,-79.23117,2.0,Spa,Indian Restaurant,Restaurant,Park,Grocery Store
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.72582,-79.26461,2.0,Convenience Store,Chinese Restaurant,Discount Store,Coffee Shop,Department Store
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.71289,-79.28506,2.0,Intersection,Bakery,Coffee Shop,Soccer Field,Bus Line
...,...,...,...,...,...,...,...,...,...,...,...
98,M9N,York,Weston,43.70507,-79.51804,2.0,Pizza Place,Park,Diner,Fried Chicken Joint,Convenience Store
99,M9P,Etobicoke,Westmount,43.69630,-79.52926,2.0,Pizza Place,Coffee Shop,Middle Eastern Restaurant,Sandwich Place,Chinese Restaurant
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv...",43.68681,-79.55728,2.0,Pizza Place,Bus Line,Clothing Store,Music Venue,Bus Stop
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.74453,-79.58624,2.0,Grocery Store,Pharmacy,Beer Store,Fried Chicken Joint,Japanese Restaurant


In [26]:
toronto_merged[toronto_merged['Cluster Labels']==3]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,#1 Most Common Venue,#2 Most Common Venue,#3 Most Common Venue,#4 Most Common Venue,#5 Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.81139,-79.19662,3.0,Fast Food Restaurant,Yoga Studio,Electronics Store,Food,Flower Shop
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.78574,-79.15875,3.0,Construction & Landscaping,Bar,Farmers Market,Escape Room,Ethiopian Restaurant
32,M3M,North York,Downsview Central,43.73224,-79.50178,3.0,Construction & Landscaping,Insurance Office,Farm,Elementary School,Escape Room
80,M6M,York,"Del Ray, Keelesdale, Mount Dennis, Silverthorn",43.69517,-79.48397,3.0,ATM,Fast Food Restaurant,Playground,Coffee Shop,Construction & Landscaping


<span style="color:darkred">*Analyzing the common venues per cluster, we can highlight the following patterns:*</span><br>
<span style="color:darkred">- *Cluster __0__ regroups the 13 places having __parks__ in their neighborhood*</span><br>
<span style="color:darkred">- *Cluster __1__ regroups the 3 places having a high number of __home services__ in their neighborhood*</span> <br>
<span style="color:darkred">- *Cluster __3__ regroups the 4 places having __Construction / Landscaping venues__ in their neighborhood*</span> <br>
<span style="color:darkred">- *Cluster __2__ regroups all the other places, which have more __restaurants and other food venues__ in their direct neighborhood*</span> 