##### Segmenting and Clustering Neighborhoods in Toronto Part III

* Firstly we are importing the required libraries

In [24]:
import os
import requests
import numpy as np
import pandas as pd
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans


import folium # map rendering library
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

* Getting data which ingested in previous task.

In [3]:
df = pd.read_csv(os.getcwd() + "\\data\\NeighboorhoodWithGoespatialCoordinates.csv", sep = ";")
df.drop(df.columns[0], axis = 1, inplace = True)
df

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


In [4]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(len(df['Borough'].unique()), df.shape[0]))

The dataframe has 10 boroughs and 103 neighborhoods.


* Create a map of New York with neighborhoods superimposed on top.

In [10]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="Toronto")
location = geolocator.geocode(address)
latitude_toronto = location.latitude
longitude_toronto = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude_toronto, longitude_toronto))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [13]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location = [latitude_toronto, longitude_toronto], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
                        [lat, lng],
                        radius=5,
                        popup=label,
                        color='blue',
                        fill=True,
                        fill_color='#3186cc',
                        fill_opacity=0.7,
                        parse_html=False).add_to(map_toronto)  
    
map_toronto

* Next, we are going to start utilizing the Foursquare API to explore the neighborhoods and segment them

In [18]:
#https://foursquare.com/developers/apps/YTTSV5B0SMRPXVLXOT4OTQ2ZOE1V0OQ2AZCYTEMYKXO0KA0N/settings
CLIENT_ID = "YTTSV5B0SMRPXVLXOT4OTQ2ZOE1V0OQ2AZCYTEMYKXO0KA0N" # your Foursquare ID
CLIENT_SECRET = "DKLVJ1SOU1ZSBNNBHK4CD4K0IVEFJ4BKVGXHX2KLLDVJ4M40" # your Foursquare Secret
VERSION = '20180604'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: YTTSV5B0SMRPXVLXOT4OTQ2ZOE1V0OQ2AZCYTEMYKXO0KA0N
CLIENT_SECRET:DKLVJ1SOU1ZSBNNBHK4CD4K0IVEFJ4BKVGXHX2KLLDVJ4M40


#### Explore Neighborhoods in Toronto

*  Let's create a function to repeat the same process to all the neighborhoods in Manhattan

In [20]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [157]:
radius = 500 # define radius
LIMIT = 100 # limit of number of venues returned by Foursquare API

toronto_venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue, Humber Valley Village
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto, Broadview North (Old East York)
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmo

Let's check the size of the resulting dataframe

In [37]:
print(toronto_venues.shape)
toronto_venues.head()

(2131, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Parkwoods,43.753259,-79.329656,Corrosion Service Company Limited,43.752432,-79.334661,Construction & Landscaping
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant


Let's check how many venues were returned for each neighborhood

In [38]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,5,5,5,5,5,5
"Alderwood, Long Branch",8,8,8,8,8,8
"Bathurst Manor, Wilson Heights, Downsview North",20,20,20,20,20,20
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",23,23,23,23,23,23
...,...,...,...,...,...,...
"Willowdale, Willowdale West",6,6,6,6,6,6
Woburn,3,3,3,3,3,3
Woodbine Heights,6,6,6,6,6,6
York Mills West,2,2,2,2,2,2


* Let's find out how many unique categories can be curated from all the returned venues

In [42]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 273 uniques categories.


#### Analyzing Each Neighborhood

In [92]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="V_", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

print(toronto_onehot.shape)
toronto_onehot.head()

(2131, 274)


Unnamed: 0,Neighborhood,V_Accessories Store,V_Afghan Restaurant,V_Airport,V_Airport Food Court,V_Airport Gate,V_Airport Lounge,V_Airport Service,V_Airport Terminal,V_American Restaurant,...,V_Train Station,V_Vegetarian / Vegan Restaurant,V_Video Game Store,V_Vietnamese Restaurant,V_Warehouse Store,V_Wine Bar,V_Wine Shop,V_Wings Joint,V_Women's Store,V_Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


* Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [94]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
print(toronto_grouped.shape)
toronto_grouped.head()

(95, 274)


Unnamed: 0,Neighborhood,V_Accessories Store,V_Afghan Restaurant,V_Airport,V_Airport Food Court,V_Airport Gate,V_Airport Lounge,V_Airport Service,V_Airport Terminal,V_American Restaurant,...,V_Train Station,V_Vegetarian / Vegan Restaurant,V_Video Game Store,V_Vietnamese Restaurant,V_Warehouse Store,V_Wine Bar,V_Wine Shop,V_Wings Joint,V_Women's Store,V_Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


* Let's print each neighborhood along with the top 5 most common venues

In [95]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                         venue  freq
0                     V_Lounge   0.2
1               V_Skating Rink   0.2
2             V_Breakfast Spot   0.2
3             V_Clothing Store   0.2
4  V_Latin American Restaurant   0.2


----Alderwood, Long Branch----
              venue  freq
0     V_Pizza Place  0.25
1             V_Gym  0.12
2    V_Skating Rink  0.12
3     V_Coffee Shop  0.12
4  V_Sandwich Place  0.12


----Bathurst Manor, Wilson Heights, Downsview North----
              venue  freq
0            V_Bank  0.10
1     V_Coffee Shop  0.10
2        V_Pharmacy  0.05
3  V_Ice Cream Shop  0.05
4      V_Restaurant  0.05


----Bayview Village----
                   venue  freq
0                 V_Café  0.25
1                 V_Bank  0.25
2  V_Japanese Restaurant  0.25
3   V_Chinese Restaurant  0.25
4    V_Accessories Store  0.00


----Bedford Park, Lawrence Manor East----
                  venue  freq
0  V_Italian Restaurant  0.09
1         V_Coffee Shop  0.09
2          

4           V_Mobile Phone Shop   0.0


----Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West----
                    venue  freq
0         V_Grocery Store  0.07
1         V_Tanning Salon  0.07
2        V_Sandwich Place  0.07
3  V_Fast Food Restaurant  0.07
4       V_Supplement Shop  0.07


----Moore Park, Summerhill East----
                               venue  freq
0                             V_Park  0.33
1                       V_Restaurant  0.33
2                       V_Playground  0.33
3               V_Mexican Restaurant  0.00
4  V_Molecular Gastronomy Restaurant  0.00


----New Toronto, Mimico South, Humber Bay Shores----
                    venue  freq
0           V_Pizza Place  0.08
1   V_American Restaurant  0.08
2  V_Fast Food Restaurant  0.08
3          V_Liquor Store  0.08
4    V_Mexican Restaurant  0.08


----North Park, Maple Leaf Park, Upwood Park----
                          venue  freq
0                        V_Park  

                         venue  freq
0  V_Middle Eastern Restaurant  0.14
1                V_Coffee Shop  0.14
2             V_Discount Store  0.14
3             V_Sandwich Place  0.14
4         V_Chinese Restaurant  0.14


----Weston----
                          venue  freq
0                        V_Park   1.0
1           V_Accessories Store   0.0
2                 V_Men's Store   0.0
3  V_Modern European Restaurant   0.0
4           V_Mobile Phone Shop   0.0


----Wexford, Maryvale----
                         venue  freq
0  V_Middle Eastern Restaurant  0.33
1                V_Auto Garage  0.17
2                     V_Bakery  0.17
3             V_Breakfast Spot  0.17
4             V_Sandwich Place  0.17


----Willowdale, Willowdale East----
                venue  freq
0  V_Ramen Restaurant  0.09
1       V_Pizza Place  0.06
2    V_Sandwich Place  0.06
3  V_Sushi Restaurant  0.06
4        V_Restaurant  0.06


----Willowdale, Willowdale West----
              venue  freq
0        V_Ph

* First, let's write a function to sort the venues in descending order.

In [99]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

* Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [145]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,V_Breakfast Spot,V_Latin American Restaurant,V_Skating Rink,V_Lounge,V_Clothing Store,V_Doner Restaurant,V_Dim Sum Restaurant,V_Diner,V_Discount Store,V_Distribution Center
1,"Alderwood, Long Branch",V_Pizza Place,V_Coffee Shop,V_Sandwich Place,V_Pub,V_Pool,V_Skating Rink,V_Gym,V_Colombian Restaurant,V_Comfort Food Restaurant,V_Drugstore
2,"Bathurst Manor, Wilson Heights, Downsview North",V_Bank,V_Coffee Shop,V_Fried Chicken Joint,V_Supermarket,V_Ice Cream Shop,V_Sushi Restaurant,V_Shopping Mall,V_Middle Eastern Restaurant,V_Deli / Bodega,V_Mobile Phone Shop
3,Bayview Village,V_Chinese Restaurant,V_Café,V_Bank,V_Japanese Restaurant,V_Yoga Studio,V_Dim Sum Restaurant,V_Diner,V_Discount Store,V_Distribution Center,V_Dog Run
4,"Bedford Park, Lawrence Manor East",V_Restaurant,V_Coffee Shop,V_Sandwich Place,V_Italian Restaurant,V_Thai Restaurant,V_Indian Restaurant,V_Pub,V_Café,V_Sushi Restaurant,V_Hobby Shop


####  Clustering Neighborhoods  
Run k-means to cluster the neighborhood into 5 clusters.

In [146]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 3,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3,
       1, 1, 1, 1, 1, 3, 2])

* Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [147]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,1.0,V_Park,V_Food & Drink Shop,V_Construction & Landscaping,V_Dog Run,V_Dessert Shop,V_Dim Sum Restaurant,V_Diner,V_Discount Store,V_Distribution Center,V_Doner Restaurant
1,M4A,North York,Victoria Village,43.725882,-79.315572,1.0,V_Hockey Arena,V_Coffee Shop,V_Intersection,V_Pizza Place,V_Portuguese Restaurant,V_Yoga Studio,V_Department Store,V_Dessert Shop,V_Dim Sum Restaurant,V_Diner
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,1.0,V_Coffee Shop,V_Pub,V_Bakery,V_Park,V_Theater,V_Restaurant,V_Breakfast Spot,V_Café,V_Electronics Store,V_Performing Arts Venue
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,1.0,V_Clothing Store,V_Accessories Store,V_Furniture / Home Store,V_Event Space,V_Vietnamese Restaurant,V_Coffee Shop,V_Boutique,V_Women's Store,V_Miscellaneous Shop,V_Athletics & Sports
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,1.0,V_Coffee Shop,V_Sushi Restaurant,V_Diner,V_Creperie,V_Sandwich Place,V_Distribution Center,V_Discount Store,V_Smoothie Shop,V_Italian Restaurant,V_Burrito Place


* Finally, let's visualize the resulting clusters

In [150]:
toronto_merged.dropna(axis = 0 , inplace = True)
toronto_merged["Cluster Labels"] = toronto_merged["Cluster Labels"].astype(int)

In [151]:
# create map
map_clusters = folium.Map(location=[latitude_toronto, longitude_toronto], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### Examining Clusters

##### Cluster 1

In [152]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
51,Scarborough,0,V_American Restaurant,V_Motel,V_Deli / Bodega,V_Dessert Shop,V_Dim Sum Restaurant,V_Diner,V_Discount Store,V_Distribution Center,V_Dog Run,V_Yoga Studio


##### Cluster 2

In [153]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,1,V_Park,V_Food & Drink Shop,V_Construction & Landscaping,V_Dog Run,V_Dessert Shop,V_Dim Sum Restaurant,V_Diner,V_Discount Store,V_Distribution Center,V_Doner Restaurant
1,North York,1,V_Hockey Arena,V_Coffee Shop,V_Intersection,V_Pizza Place,V_Portuguese Restaurant,V_Yoga Studio,V_Department Store,V_Dessert Shop,V_Dim Sum Restaurant,V_Diner
2,Downtown Toronto,1,V_Coffee Shop,V_Pub,V_Bakery,V_Park,V_Theater,V_Restaurant,V_Breakfast Spot,V_Café,V_Electronics Store,V_Performing Arts Venue
3,North York,1,V_Clothing Store,V_Accessories Store,V_Furniture / Home Store,V_Event Space,V_Vietnamese Restaurant,V_Coffee Shop,V_Boutique,V_Women's Store,V_Miscellaneous Shop,V_Athletics & Sports
4,Downtown Toronto,1,V_Coffee Shop,V_Sushi Restaurant,V_Diner,V_Creperie,V_Sandwich Place,V_Distribution Center,V_Discount Store,V_Smoothie Shop,V_Italian Restaurant,V_Burrito Place
...,...,...,...,...,...,...,...,...,...,...,...,...
97,Downtown Toronto,1,V_Coffee Shop,V_Café,V_Restaurant,V_Hotel,V_Gym,V_Asian Restaurant,V_Japanese Restaurant,V_Steakhouse,V_American Restaurant,V_Seafood Restaurant
98,Etobicoke,1,V_Pool,V_River,V_Yoga Studio,V_Deli / Bodega,V_Department Store,V_Dessert Shop,V_Dim Sum Restaurant,V_Diner,V_Discount Store,V_Distribution Center
99,Downtown Toronto,1,V_Sushi Restaurant,V_Coffee Shop,V_Japanese Restaurant,V_Gay Bar,V_Restaurant,V_Yoga Studio,V_Pub,V_Burger Joint,V_Bubble Tea Shop,V_Hotel
100,East Toronto,1,V_Gym / Fitness Center,V_Auto Workshop,V_Garden Center,V_Garden,V_Fast Food Restaurant,V_Farmers Market,V_Light Rail Station,V_Comic Shop,V_Pizza Place,V_Butcher


##### Cluster 3

In [154]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
45,North York,2,V_Cafeteria,V_Yoga Studio,V_Dog Run,V_Dessert Shop,V_Dim Sum Restaurant,V_Diner,V_Discount Store,V_Distribution Center,V_Doner Restaurant,V_Deli / Bodega


##### Cluster 4

In [155]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,York,3,V_Park,V_Bakery,V_Women's Store,V_College Stadium,V_Colombian Restaurant,V_Ethiopian Restaurant,V_Electronics Store,V_Eastern European Restaurant,V_Dumpling Restaurant,V_Drugstore
35,East York,3,V_Park,V_Convenience Store,V_Ethiopian Restaurant,V_Electronics Store,V_Eastern European Restaurant,V_Dumpling Restaurant,V_Drugstore,V_Donut Shop,V_Doner Restaurant,V_Deli / Bodega
64,York,3,V_Park,V_Yoga Studio,V_Dog Run,V_Department Store,V_Dessert Shop,V_Dim Sum Restaurant,V_Diner,V_Discount Store,V_Distribution Center,V_Doner Restaurant
66,North York,3,V_Convenience Store,V_Park,V_Ethiopian Restaurant,V_Electronics Store,V_Eastern European Restaurant,V_Dumpling Restaurant,V_Drugstore,V_Donut Shop,V_Doner Restaurant,V_Deli / Bodega
83,Central Toronto,3,V_Park,V_Playground,V_Restaurant,V_Discount Store,V_Dance Studio,V_Deli / Bodega,V_Department Store,V_Dessert Shop,V_Dim Sum Restaurant,V_Diner
85,Scarborough,3,V_Park,V_Playground,V_Distribution Center,V_Deli / Bodega,V_Department Store,V_Dessert Shop,V_Dim Sum Restaurant,V_Diner,V_Discount Store,V_Dog Run
91,Downtown Toronto,3,V_Park,V_Trail,V_Playground,V_Deli / Bodega,V_Department Store,V_Dessert Shop,V_Dim Sum Restaurant,V_Diner,V_Discount Store,V_Distribution Center


##### Cluster 5

In [156]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
57,North York,4,V_Baseball Field,V_Yoga Studio,V_Dessert Shop,V_Dim Sum Restaurant,V_Diner,V_Discount Store,V_Distribution Center,V_Dog Run,V_Doner Restaurant,V_Falafel Restaurant
101,Etobicoke,4,V_Baseball Field,V_Yoga Studio,V_Dessert Shop,V_Dim Sum Restaurant,V_Diner,V_Discount Store,V_Distribution Center,V_Dog Run,V_Doner Restaurant,V_Falafel Restaurant
