# Toronto neighbourhood analysis

### Importing libraries

In [1]:
#!pip install lxml # comment when lxml is installed
#!pip install geocoder # comment when geocoder is installed

import requests, folium, json
import lxml.html as lh
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors

### Getting and cleaning data

We define a function to scrape a table from the wikipedia article. It will append any element with "length" columns. In our case, we want length = 3 (because there are 3 columns). The function isn't all that robust, because who knows that would happen with multiple tables in a given url, but it works for our case. 

In [2]:
def scrape_table(url, length):
    req = requests.get(url)
    doc = lh.fromstring(req.content)
    tr_elements = doc.xpath('//tr')
    col=[] # Create empty list
    
    for t in tr_elements[0]:  # create header of the table
        name = t.text_content()
        col.append((name.replace("\n",""),[]))

    for j in range(1,len(tr_elements)): # populate the table
        raw_row = tr_elements[j]
        row = [word.text_content().replace("\n", "") for word in raw_row.iterchildren()] # get words in each column
        
        if len(row)!= length: # if the row doesn't have "length" columns, then it isn't part of our table
            break
            
        for i, data in enumerate(row):
            col[i][1].append(data)

    Dict={title:column for (title,column) in col}
    return pd.DataFrame(Dict)

Once we have our function, we call it, and then look at the first 7 rows.

In [3]:
df_ugly = scrape_table("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M", 3)
df_ugly.head(12)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
8,M8A,Not assigned,Not assigned
9,M9A,Queen's Park,Not assigned


Let's clean our dataframe so that only the rows with assigned boroughs remain.

In [4]:
df_clean = df_ugly.loc[df_ugly['Borough'] != "Not assigned"].reset_index(drop=True)
df_clean.head(12)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor
5,M7A,Downtown Toronto,Queen's Park
6,M9A,Queen's Park,Not assigned
7,M1B,Scarborough,Rouge
8,M1B,Scarborough,Malvern
9,M3B,North York,Don Mills North


We then assign the corresponding borough to any neighbourhood that says "Not assigned".

In [5]:
indexes = df_clean.loc[df_clean["Neighbourhood"] == "Not assigned"].index # getting indexes of not assigned neighbourhoods
df_clean.loc[indexes, 'Neighbourhood'] = df_clean.loc[indexes, 'Borough'] # overwritting neighbourhoods
df_clean.head(12)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor
5,M7A,Downtown Toronto,Queen's Park
6,M9A,Queen's Park,Queen's Park
7,M1B,Scarborough,Rouge
8,M1B,Scarborough,Malvern
9,M3B,North York,Don Mills North


Looking at the data, we see that a single borough can have multiple postcodes, but a single postcode cannot have multiple boroughs. Therefore, we can group different neighbourhoods with the same postcode, as:

In [6]:
codes = df_clean['Postcode'].unique()

new_frame = np.zeros((len(codes), 3), dtype = object)

for i, code in enumerate(codes):
    data = df_clean.loc[df_clean['Postcode'] == code]
    start = data.index[0]    
    neigh_list = ''
    
    for neighbourhood in enumerate(data['Neighbourhood']):
        neigh_list += ', ' + neighbourhood[1]
    
    new_frame[i, 0] = code 
    new_frame[i, 1] = data.loc[start, 'Borough']
    new_frame[i, 2] = neigh_list[2:]

df = pd.DataFrame(data=new_frame, columns=['Postcode', 'Borough', 'Neighbourhood'])
df.head(12)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park
5,M9A,Queen's Park,Queen's Park
6,M1B,Scarborough,"Rouge, Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson, Garden District"


We now see the shape of our dataframe:

In [7]:
df.shape

(103, 3)

### Adding coordinates to the dataframe

Loading the coordinates from a file onto a new dataframe:

In [8]:
coordinates = pd.read_csv('Geospatial_Coordinates.csv', delimiter=',')
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Let's join the dataframes using their postcodes, and drop every row that contains the word "Toronto":

In [9]:
dframe = df.join(coordinates.set_index('Postal Code'), on='Postcode')
dframe = dframe[~dframe['Borough'].str.contains("Toronto")].reset_index(drop=True)
dframe.head(12)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
3,M9A,Queen's Park,Queen's Park,43.667856,-79.532242
4,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
5,M3B,North York,Don Mills North,43.745906,-79.352188
6,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
7,M6B,North York,Glencairn,43.709577,-79.445073
8,M9B,Etobicoke,"Cloverdale, Islington, Martin Grove, Princess ...",43.650943,-79.554724
9,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497


In [10]:
# create map of Manhattan using latitude and longitude values
latitude = 43.7001100
longitude = -79.4163000

map_tor = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(dframe['Latitude'], dframe['Longitude'], dframe['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_tor)  
    
map_tor

## Setting up foursquare

In the following cell, I will hide my credentials.

Let's grab the function from the last lab to get every venue near our neighbourhoods.

In [12]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&oauth_token={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID,
            TOKEN,
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [13]:
venues = getNearbyVenues(names = dframe['Neighbourhood'],
                                   latitudes = dframe['Latitude'],
                                   longitudes = dframe['Longitude']
                                  )

In [14]:
venues.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,Careful & Reliable Painting,43.752622,-79.331957,Construction & Landscaping
2,Parkwoods,43.753259,-79.329656,649 Variety,43.754513,-79.331942,Convenience Store
3,Parkwoods,43.753259,-79.329656,Sun Life,43.75476,-79.332783,Construction & Landscaping
4,Parkwoods,43.753259,-79.329656,GTA Restoration,43.753396,-79.333477,Fireworks Store


One-hot encoding gives:

In [15]:
# one hot encoding
tor_onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
tor_onehot['Neighbourhood'] = venues['Neighbourhood'] 

# move neighbourhood column to the first column
fixed_columns = [tor_onehot.columns[-1]] + list(tor_onehot.columns[:-1])
tor_onehot = tor_onehot[fixed_columns]

tor_onehot.head()

Unnamed: 0,Neighbourhood,ATM,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,Automotive Shop,...,Toy / Game Store,Trail,Train Station,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Grouping by the mean of the frequency of each venue type, yields:

In [16]:
tor_grouped = tor_onehot.groupby('Neighbourhood').mean().reset_index()
tor_grouped.head()

Unnamed: 0,Neighbourhood,ATM,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,Automotive Shop,...,Toy / Game Store,Trail,Train Station,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0
3,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.076923,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bathurst Manor, Downsview North, Wilson Heights",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,...,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0


Let's create a new dataframe with the top 10 venues per neighbourhood:

In [17]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [18]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
venues_sorted = pd.DataFrame(columns=columns)
venues_sorted['Neighbourhood'] = tor_grouped['Neighbourhood']

for ind in np.arange(tor_grouped.shape[0]):
    venues_sorted.iloc[ind, 1:] = return_most_common_venues(tor_grouped.iloc[ind, :], num_top_venues)
    
venues.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,Careful & Reliable Painting,43.752622,-79.331957,Construction & Landscaping
2,Parkwoods,43.753259,-79.329656,649 Variety,43.754513,-79.331942,Convenience Store
3,Parkwoods,43.753259,-79.329656,Sun Life,43.75476,-79.332783,Construction & Landscaping
4,Parkwoods,43.753259,-79.329656,GTA Restoration,43.753396,-79.333477,Fireworks Store


Going through the whole venues data, we can see that there are no venues nearby the Upper Rouge neighbourhood, which will yield NaN values after clustering.



In [19]:
print(" Upper Rouge appears", len(venues.loc[venues['Neighbourhood'] == 'Upper Rouge']), "times in venues dataframe\n",
      "Upper Rouge appears", len(dframe.loc[dframe['Neighbourhood'] == 'Upper Rouge']), "times in original dataframe.")

 Upper Rouge appears 0 times in venues dataframe
 Upper Rouge appears 1 times in original dataframe.


Let's drop the row from the original dataframe:

In [20]:
dframe = dframe[dframe.Neighbourhood != 'Upper Rouge']
dframe.reset_index(drop=True)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
3,M9A,Queen's Park,Queen's Park,43.667856,-79.532242
4,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
...,...,...,...,...,...
58,M8W,Etobicoke,"Alderwood, Long Branch",43.602414,-79.543484
59,M9W,Etobicoke,Northwest,43.706748,-79.594054
60,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
61,M8Y,Etobicoke,"Humber Bay, King's Mill Park, Kingsway Park So...",43.636258,-79.498509


# Clustering

In [21]:
# set number of clusters
kclusters = 5

tor_grouped_clustering = tor_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(tor_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 0, 3, 3, 3, 0, 3,
       3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0,
       3, 3, 3, 2, 0, 4, 1, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 1],
      dtype=int32)

In [22]:
# add clustering labels
venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

tor_merged = dframe

#merge tor_grouped with venues_sorted to add latitude/longitude for each neighborhood
tor_merged = tor_merged.join(venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

In [23]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(tor_merged['Latitude'], tor_merged['Longitude'], tor_merged['Neighbourhood'], tor_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.9).add_to(map_clusters)
       
map_clusters

## Cluster examination

### Cluster 0

Given the fact that every element in this cluster has "Home Service", I would label this cluster "Home Cluster"

In [24]:
tor_merged.loc[tor_merged['Cluster Labels'] == 0, tor_merged.columns[[1] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Scarborough,0,Home Service,Fast Food Restaurant,Empanada Restaurant,Fireworks Store,Financial or Legal Service,Film Studio,Field,Farm,Falafel Restaurant,Fabric Shop
8,Etobicoke,0,Home Service,Print Shop,Gift Shop,Deli / Bodega,Empanada Restaurant,Financial or Legal Service,Film Studio,Field,Fast Food Restaurant,Farm
33,North York,0,Home Service,Empanada Restaurant,Pharmacy,Pizza Place,Hardware Store,Drugstore,Field,Fast Food Restaurant,Farm,Falafel Restaurant
35,North York,0,Home Service,Empanada Restaurant,Fireworks Store,Financial or Legal Service,Film Studio,Field,Fast Food Restaurant,Farm,Falafel Restaurant,Fabric Shop
36,North York,0,Home Service,Korean Restaurant,Business Service,Baseball Field,Fireworks Store,Financial or Legal Service,Film Studio,Field,Fast Food Restaurant,Farm


### Cluster 1

There are many services in this cluster. It might contain mainly residential areas. This would be the 'Residential Cluster'.

In [25]:
tor_merged.loc[tor_merged['Cluster Labels'] == 1, tor_merged.columns[[1] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,York,1,Park,Spa,Women's Store,Fast Food Restaurant,Market,Event Space,Fireworks Store,Financial or Legal Service,Film Studio,Field
25,East York,1,Film Studio,Convenience Store,Park,Metro Station,Yoga Studio,Empanada Restaurant,Financial or Legal Service,Field,Fast Food Restaurant,Farm
30,North York,1,Martial Arts Dojo,Park,Cafeteria,Event Space,Fireworks Store,Financial or Legal Service,Film Studio,Field,Fast Food Restaurant,Farm
44,York,1,Park,Convenience Store,Electronics Store,Yoga Studio,Empanada Restaurant,Financial or Legal Service,Film Studio,Field,Fast Food Restaurant,Farm
46,North York,1,Flower Shop,Convenience Store,Bank,Park,Electronics Store,Dim Sum Restaurant,Event Space,Financial or Legal Service,Film Studio,Field
54,Scarborough,1,Park,Yoga Studio,Electronics Store,Financial or Legal Service,Film Studio,Field,Fast Food Restaurant,Farm,Falafel Restaurant,Fabric Shop


### Cluster 2

Cluster 1 contains only Queen's Park. There might not be many things around here. This cluster might be an outlier, so it would be best to call it 'Queen's Park Cluster'.

In [26]:
tor_merged.loc[tor_merged['Cluster Labels'] == 2, tor_merged.columns[[1] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Queen's Park,2,Pizza Place,Yoga Studio,Electronics Store,Financial or Legal Service,Film Studio,Field,Fast Food Restaurant,Farm,Falafel Restaurant,Fabric Shop


### Cluster 3

Due to the frequency of fast food places, restaurants and stores in this cluster, I would label it 'The Business Cluster'. If you were to open a restaurant, you would want to avoid this cluster, given the amount of competition.

In [27]:
tor_merged.loc[tor_merged['Cluster Labels'] == 3, tor_merged.columns[[1] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,3,Construction & Landscaping,Food & Drink Shop,Bus Stop,Park,Convenience Store,BBQ Joint,Fireworks Store,Yoga Studio,Event Space,Film Studio
1,North York,3,Hockey Arena,Intersection,Pizza Place,French Restaurant,Coffee Shop,Financial or Legal Service,Portuguese Restaurant,Dim Sum Restaurant,Fabric Shop,Deli / Bodega
2,North York,3,Clothing Store,Furniture / Home Store,Accessories Store,Home Service,Women's Store,Tailor Shop,Health & Beauty Service,Construction & Landscaping,Miscellaneous Shop,Event Space
5,North York,3,Pharmacy,Basketball Court,Café,Gym / Fitness Center,Caribbean Restaurant,Construction & Landscaping,Japanese Restaurant,Restaurant,Baseball Field,Field
6,East York,3,Pizza Place,Fast Food Restaurant,Pet Store,Intersection,Café,Bus Line,Spa,Breakfast Spot,Furniture / Home Store,Bank
7,North York,3,Pizza Place,Spa,Japanese Restaurant,Park,Pub,Metro Station,Sushi Restaurant,Pharmacy,Asian Restaurant,Greek Restaurant
9,Scarborough,3,Moving Target,Construction & Landscaping,Golf Course,Yoga Studio,Financial or Legal Service,Film Studio,Field,Fast Food Restaurant,Farm,Falafel Restaurant
10,North York,3,Gym,Beer Store,Asian Restaurant,Japanese Restaurant,Clothing Store,Sporting Goods Shop,Coffee Shop,Restaurant,Fast Food Restaurant,Bubble Tea Shop
11,East York,3,Skating Rink,ATM,Beer Store,Park,Curling Ice,Pharmacy,Cosmetics Shop,Construction & Landscaping,Salon / Barbershop,Bus Stop
12,York,3,Playground,Trail,Field,Business Service,Tennis Court,Hockey Arena,Asian Restaurant,Flower Shop,Fireworks Store,Financial or Legal Service


### Cluster 4

Since this cluster only contains one element, I would label it after the element. This would be the 'Scarborough Cluster'.

In [28]:
tor_merged.loc[tor_merged['Cluster Labels'] == 4, tor_merged.columns[[1] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,Scarborough,4,Women's Store,Flower Shop,Fish & Chips Shop,Fireworks Store,Financial or Legal Service,Film Studio,Field,Fast Food Restaurant,Farm,Falafel Restaurant
