# Segmenting and Clustering Neighborhoods in Toronto

First and foremost let's import all the necessary libraries

In [2]:
import pandas as pd
import requests as rq
import numpy as np
import folium

import matplotlib.cm as cm
import matplotlib.colors as colors

from matplotlib import pyplot as plt
from bs4 import BeautifulSoup
from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim

## Part 1. Creating base dataframe

Download and parse the wiki page

In [3]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
response = rq.get(url)
soup = BeautifulSoup(response.text)

Now find all the necessary data rows with data

In [4]:
items = []
rows = soup.table.find_all('tr')
# Skip first row because we don't need titles
for row in rows[1:]:
    cols = row.find_all('td')
    # Do not forget to strip the trailing \n
    items.append((cols[0].text, cols[1].text, cols[2].text.rstrip()))

Create the dataframe from the data

In [5]:
columns = ["PostalCode", "Borough", "Neighborhood"]

df_raw = pd.DataFrame(columns=columns, data = items)

# We don't need rows with empty Borough
df_raw.replace("Not assigned", value=np.nan, inplace=True)
df_raw.dropna(subset=["Borough"], inplace=True)
# Empty Neighborhood is replaced with the corresponding Borough
df_raw["Neighborhood"].fillna(df_raw["Borough"], inplace=True)

# Join the neighborhoods with the same postal code
df_combined = df_raw.groupby(by=["PostalCode", "Borough"], as_index=False).aggregate(lambda places: ", ".join(set(places)))
# And fix the index
df_combined.set_index("PostalCode", inplace=True)

Check first rows of the resulting dataframe

In [6]:
df_combined.head(10)

Unnamed: 0_level_0,Borough,Neighborhood
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Malvern, Rouge"
M1C,Scarborough,"Highland Creek, Port Union, Rouge Hill"
M1E,Scarborough,"Morningside, Guildwood, West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae
M1J,Scarborough,Scarborough Village
M1K,Scarborough,"East Birchmount Park, Kennedy Park, Ionview"
M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
M1M,Scarborough,"Cliffside, Scarborough Village West, Cliffcrest"
M1N,Scarborough,"Cliffside West, Birch Cliff"


Seems pretty good. Also check the shape while we're at it

In [7]:
df_combined.shape

(103, 2)

Good. Time to move further

## Part 2. Adding geospatial data

Now, the google is not working well for me so I'll stick to the csv file with coordinates.
Let's open it and make a corresponding dataframe.

In [8]:
filename = "Geospatial_Coordinates.csv"
df_coords = pd.read_csv(filename, index_col="Postal Code")

Alright. Check the first rows to see if it loaded correctly

In [9]:
df_coords.head(10)

Unnamed: 0_level_0,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.806686,-79.194353
M1C,43.784535,-79.160497
M1E,43.763573,-79.188711
M1G,43.770992,-79.216917
M1H,43.773136,-79.239476
M1J,43.744734,-79.239476
M1K,43.727929,-79.262029
M1L,43.711112,-79.284577
M1M,43.716316,-79.239476
M1N,43.692657,-79.264848


And the shape. It should be the same as the shape of <b>df_combined</b>

In [10]:
df_coords.shape

(103, 2)

Good. Now merge it into one dataframe for further usage using the index columns

In [11]:
df_geo = df_combined.join(df_coords)
df_geo.head()

Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
M1C,Scarborough,"Highland Creek, Port Union, Rouge Hill",43.784535,-79.160497
M1E,Scarborough,"Morningside, Guildwood, West Hill",43.763573,-79.188711
M1G,Scarborough,Woburn,43.770992,-79.216917
M1H,Scarborough,Cedarbrae,43.773136,-79.239476


## Part 3. Clustering

First of all let us drop the unnecessary boroughs, leaving only those containing *Toronto*

In [12]:
df_toronto = df_geo.loc[df_geo["Borough"].str.contains("Toronto")]
df_toronto.head(10)

Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M4E,East Toronto,The Beaches,43.676357,-79.293031
M4K,East Toronto,"Riverdale, The Danforth West",43.679557,-79.352188
M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
M4M,East Toronto,Studio District,43.659526,-79.340923
M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
M4P,Central Toronto,Davisville North,43.712751,-79.390197
M4R,Central Toronto,North Toronto West,43.715383,-79.405678
M4S,Central Toronto,Davisville,43.704324,-79.38879
M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
M4V,Central Toronto,"Deer Park, South Hill, Forest Hill SE, Rathnel...",43.686412,-79.400049


Config for forsquare API

In [36]:
CLIENT_ID = ''
CLIENT_SECRET = ''
VERSION = '20180605'
LIMIT = 100

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 
CLIENT_SECRET:


I'll be using the same idea that was used in New-York clustering, so let's copy the required functions and go through the whole process until we need to do the clustering itself.

In [14]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = rq.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [15]:
toronto_venues_raw = getNearbyVenues(names=df_toronto['Neighborhood'],
                                   latitudes=df_toronto['Latitude'],
                                   longitudes=df_toronto['Longitude'])

The Beaches
Riverdale, The Danforth West
The Beaches West, India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park, Summerhill East
Deer Park, South Hill, Forest Hill SE, Rathnelly, Summerhill West
Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront, Regent Park
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Richmond, Adelaide, King
Harbourfront East, Toronto Islands, Union Station
Toronto Dominion Centre, Design Exchange
Victoria Hotel, Commerce Court
Roselawn
Forest Hill North, Forest Hill West
North Midtown, Yorkville, The Annex
Harbord, University of Toronto
Kensington Market, Grange Park, Chinatown
South Niagara, Bathurst Quay, Island airport, CN Tower, Harbourfront West, Railway Lands, King and Spadina
Stn A PO Boxes 25 The Esplanade
Underground city, First Canadian Place
Christie
Dovercourt Village, Dufferin
Little Portugal, Trinity
Brockton, Parkdale Village, Exhibition Place
The Junction So

In [16]:
toronto_venues_raw.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
1,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
2,The Beaches,43.676357,-79.293031,Starbucks,43.678798,-79.298045,Coffee Shop
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,"Riverdale, The Danforth West",43.679557,-79.352188,Dolce Gelato,43.677773,-79.351187,Ice Cream Shop


In [17]:
toronto_venues_raw.shape

(1705, 7)

In [18]:
groups = toronto_venues_raw.groupby('Neighborhood').count()
groups

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,56,56,56,56,56,56
"Brockton, Parkdale Village, Exhibition Place",22,22,22,22,22,22
Business Reply Mail Processing Centre 969 Eastern,17,17,17,17,17,17
"Cabbagetown, St. James Town",44,44,44,44,44,44
Central Bay Street,86,86,86,86,86,86
Christie,16,16,16,16,16,16
Church and Wellesley,88,88,88,88,88,88
Davisville,34,34,34,34,34,34
Davisville North,10,10,10,10,10,10
"Deer Park, South Hill, Forest Hill SE, Rathnelly, Summerhill West",14,14,14,14,14,14


There's definetely a problem with this data - some of the neighborhoods do not contain enough venues nearby  
In clustering they will probably be the bottleneck, falling into their own small clusters.
To deal with that we can either:
- Drop them entirely
- Raise the radius of our search

Because of the daily limitations of the foursquare API the first option is easier to do. So we will drop everything below 10 venues

In [19]:
to_leave = groups.loc[groups["Venue"] > 10].index
mask = toronto_venues_raw["Neighborhood"].isin(to_leave)
toronto_venues = toronto_venues_raw[mask]

Check the updated data

In [20]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,56,56,56,56,56,56
"Brockton, Parkdale Village, Exhibition Place",22,22,22,22,22,22
Business Reply Mail Processing Centre 969 Eastern,17,17,17,17,17,17
"Cabbagetown, St. James Town",44,44,44,44,44,44
Central Bay Street,86,86,86,86,86,86
Christie,16,16,16,16,16,16
Church and Wellesley,88,88,88,88,88,88
Davisville,34,34,34,34,34,34
"Deer Park, South Hill, Forest Hill SE, Rathnelly, Summerhill West",14,14,14,14,14,14
"Dovercourt Village, Dufferin",20,20,20,20,20,20


In [21]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 237 uniques categories.


In [22]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.011628,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.011628,0.0,0.0,0.011628,0.0,0.0


In [24]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [25]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Seafood Restaurant,Café,Restaurant
1,"Brockton, Parkdale Village, Exhibition Place",Breakfast Spot,Café,Coffee Shop,Yoga Studio,Stadium
2,Business Reply Mail Processing Centre 969 Eastern,Burrito Place,Fast Food Restaurant,Spa,Auto Workshop,Light Rail Station
3,"Cabbagetown, St. James Town",Coffee Shop,Restaurant,Pizza Place,Bakery,Café
4,Central Bay Street,Coffee Shop,Italian Restaurant,Bubble Tea Shop,Bar,Ice Cream Shop


Now we need to define the number of clusters. One of the possible methods is checking silhouette score.  
In general the biggest silhoutte score means it's the best K which we should use. So let's go through all of them

In [26]:
from sklearn.metrics import silhouette_score

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

for k in range(2, 31):
    # run k-means clustering
    kmeans = KMeans(n_clusters=k, random_state=0).fit(toronto_grouped_clustering)
    label = kmeans.labels_
    sil_coeff = silhouette_score(toronto_grouped_clustering, label, metric='euclidean')
    print("k:", k, " score:", sil_coeff)

k: 2  score: 0.3765193517686561
k: 3  score: 0.07558690957154172
k: 4  score: 0.14736253120299056
k: 5  score: 0.06150155567340832
k: 6  score: 0.13153744357426322
k: 7  score: 0.050465991448622474
k: 8  score: 0.09310984546735267
k: 9  score: 0.13643285146354625
k: 10  score: 0.1141432054059709
k: 11  score: 0.10982528616892849
k: 12  score: 0.11751093363154289
k: 13  score: 0.03989819417445684
k: 14  score: 0.0326707922058533
k: 15  score: 0.1131754868776899
k: 16  score: 0.09607663795090296
k: 17  score: 0.033551511604645035
k: 18  score: 0.04575504439131467
k: 19  score: 0.05583678090857979
k: 20  score: 0.031914032520115404
k: 21  score: 0.04877705936023582
k: 22  score: 0.04874475753771182
k: 23  score: 0.052097905015142235
k: 24  score: 0.057122481945183164
k: 25  score: 0.0633770064994425
k: 26  score: 0.06292718927622461
k: 27  score: 0.05594664155772434
k: 28  score: 0.04476385421954023
k: 29  score: 0.03278986193178851
k: 30  score: 0.010826804979078963


As we can see the highest score is achieved by using k=2.  It's still quite low though so the results might not be that good

In [27]:
# set number of clusters
kclusters = 2

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [28]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df_toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
# drop the NaN values (because we dropped some of the neighborhoods before)
toronto_merged.dropna(inplace=True)

toronto_merged.head() # check the last columns!

Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
M4K,East Toronto,"Riverdale, The Danforth West",43.679557,-79.352188,0.0,Greek Restaurant,Coffee Shop,Ice Cream Shop,Bookstore,Italian Restaurant
M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572,0.0,Pet Store,Movie Theater,Italian Restaurant,Sushi Restaurant,Burrito Place
M4M,East Toronto,Studio District,43.659526,-79.340923,0.0,Café,Coffee Shop,American Restaurant,Bakery,Italian Restaurant
M4R,Central Toronto,North Toronto West,43.715383,-79.405678,0.0,Coffee Shop,Clothing Store,Sporting Goods Shop,Yoga Studio,Fast Food Restaurant
M4S,Central Toronto,Davisville,43.704324,-79.38879,0.0,Dessert Shop,Sandwich Place,Pizza Place,Café,Coffee Shop


In [29]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [30]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=13)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### First group

In [31]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
M4K,"Riverdale, The Danforth West",Greek Restaurant,Coffee Shop,Ice Cream Shop,Bookstore,Italian Restaurant
M4L,"The Beaches West, India Bazaar",Pet Store,Movie Theater,Italian Restaurant,Sushi Restaurant,Burrito Place
M4M,Studio District,Café,Coffee Shop,American Restaurant,Bakery,Italian Restaurant
M4R,North Toronto West,Coffee Shop,Clothing Store,Sporting Goods Shop,Yoga Studio,Fast Food Restaurant
M4S,Davisville,Dessert Shop,Sandwich Place,Pizza Place,Café,Coffee Shop
M4V,"Deer Park, South Hill, Forest Hill SE, Rathnel...",Coffee Shop,Pub,American Restaurant,Convenience Store,Light Rail Station
M4X,"Cabbagetown, St. James Town",Coffee Shop,Restaurant,Pizza Place,Bakery,Café
M4Y,Church and Wellesley,Coffee Shop,Japanese Restaurant,Gay Bar,Sushi Restaurant,Restaurant
M5A,"Harbourfront, Regent Park",Coffee Shop,Park,Café,Pub,Bakery
M5B,"Garden District, Ryerson",Coffee Shop,Clothing Store,Café,Middle Eastern Restaurant,Cosmetics Shop


Most of the places are here. Mainly food venues (especially coffee shops and cafes). Overall it may be described as "entertainment centers".

### Second group

In [32]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
M5V,"South Niagara, Bathurst Quay, Island airport, ...",Airport Service,Airport Lounge,Airport Terminal,Harbor / Marina,Sculpture Garden


This includes only one place which is mainly focused on airport facilities. It's definetely different from the first group

Final groups:

In [35]:
print("1st group: {}".format(', '.join(toronto_merged[toronto_merged["Cluster Labels"] == 0]["Neighborhood"])))
print("2nd group: {}".format(', '.join(toronto_merged[toronto_merged["Cluster Labels"] == 1]["Neighborhood"])))

1st group: Riverdale, The Danforth West, The Beaches West, India Bazaar, Studio District, North Toronto West, Davisville, Deer Park, South Hill, Forest Hill SE, Rathnelly, Summerhill West, Cabbagetown, St. James Town, Church and Wellesley, Harbourfront, Regent Park, Garden District, Ryerson, St. James Town, Berczy Park, Central Bay Street, Richmond, Adelaide, King, Harbourfront East, Toronto Islands, Union Station, Toronto Dominion Centre, Design Exchange, Victoria Hotel, Commerce Court, North Midtown, Yorkville, The Annex, Harbord, University of Toronto, Kensington Market, Grange Park, Chinatown, Stn A PO Boxes 25 The Esplanade, Underground city, First Canadian Place, Christie, Dovercourt Village, Dufferin, Little Portugal, Trinity, Brockton, Parkdale Village, Exhibition Place, The Junction South, High Park, Roncesvalles, Parkdale, Runnymede, Swansea, Business Reply Mail Processing Centre 969 Eastern
2nd group: South Niagara, Bathurst Quay, Island airport, CN Tower, Harbourfront West,