# Segmenting and Clustering Toronto's Neighborhoods
### Clustering Toronto's neighborhoods based on common venue categories

## 0. Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import pandas as pd
import requests
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

## 1. Fetching Toronto's Neighborhood Data 

##### Scrape Wikipedia's page to read Toronto's neighbourhood data i.e. Postal Codes, Boroughs, and Neighborhood names

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
raw_toronto_data = pd.read_html(url)

##### Here we can see that all the required data is in the 0th index of the retreived list

In [3]:
raw_toronto_data[0]

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


##### So read data from 0th index of this list into a dataframe

In [4]:
pd_TN = pd.DataFrame(raw_toronto_data[0])
print(pd_TN.head())
print("\nTotal Number of Neighborhoods recieved: {}".format(pd_TN.shape[0]))
print("\nData types of columns: \n{}".format(pd_TN.dtypes))

  Postal Code           Borough              Neighbourhood
0         M1A      Not assigned               Not assigned
1         M2A      Not assigned               Not assigned
2         M3A        North York                  Parkwoods
3         M4A        North York           Victoria Village
4         M5A  Downtown Toronto  Regent Park, Harbourfront

Total Number of Neighborhoods recieved: 180

Data types of columns: 
Postal Code      object
Borough          object
Neighbourhood    object
dtype: object


## 2. Cleaning Data

##### Dropping entries where borough is not assigned

In [5]:
pd_TN.drop(pd_TN[pd_TN.Borough == 'Not assigned'].index, axis = 0, inplace =True)
pd_TN.reset_index(inplace = True)
pd_TN.drop(columns = 'index', inplace = True)
pd_TN.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


##### Replacing Neighborhood with corresponding Boroughs where Neigbourhoods are not assigned

In [6]:
condition = pd_TN.Neighbourhood == 'Not assigned'
pd_TN.loc[condition, 'Neighbourhood'] = pd_TN['Borough']

In [7]:
print("Number of neighborhoods in the cleaned DataFrame are: {}".format(pd_TN.shape[0]))

Number of neighborhoods in the cleaned DataFrame are: 103


##### Reading lat/lng of neighborhoods

In [8]:
lat_lng = pd.read_csv('https://cocl.us/Geospatial_data')
lat_lng.head(103)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


##### We can see that data in loaded .csv file is sorted w.r.t postal codes so I'm also gonna sort my dataframe w.r.t postal codes and now I can simply merge lat/lng columns with my original data frame.

In [9]:
pd_TN.sort_values('Postal Code',axis = 0, ascending = True, inplace = True)
pd_TN.reset_index(inplace = True)
pd_TN.drop(columns = 'index', inplace = True)
pd_TN.head(103)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


In [10]:
#Adding Lat/Lng columns to original DataFrame
pd_TN['Latitude'] = lat_lng['Latitude']
pd_TN['Longitude'] = lat_lng['Longitude']

#Renaming column to standardize names to American spellings
pd_TN.rename(columns = {'Neighbourhood':'Neighborhood'}, inplace =True) 

pd_TN.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


##### Now we have a clean DataFrame containing Toronto's Neighbourhoods Data

## 3. Getting top 100 venues within 500m radius of all neighborhoods using Forsquare API

#### My Forsquare Credentials

In [11]:
CLIENT_ID = 'GUUCORHSKBKU0EYPOMDW2RAIYV4Y5KXFQKA2EIU4FWOOKYQW' # your Foursquare ID
CLIENT_SECRET = 'UNKAZH4AI1ZWMWHCXIZNC0LE0JO2JXQ0NE2330FQ3S3C0P2I' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

#### Defining a function to get top 100 venues of all neighbourhoods of Toronto

In [12]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT = 100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [15]:
toronto_venues = getNearbyVenues(names=pd_TN['Neighborhood'],
                                   latitudes=pd_TN['Latitude'],
                                   longitudes=pd_TN['Longitude']
                                  )

Malvern, Rouge
Rouge Hill, Port Union, Highland Creek
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
Kennedy Park, Ionview, East Birchmount Park
Golden Mile, Clairlea, Oakridge
Cliffside, Cliffcrest, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Wexford Heights, Scarborough Town Centre
Wexford, Maryvale
Agincourt
Clarks Corners, Tam O'Shanter, Sullivan
Milliken, Agincourt North, Steeles East, L'Amoreaux East
Steeles West, L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
York Mills, Silver Hills
Willowdale, Newtonbrook
Willowdale, Willowdale East
York Mills West
Willowdale, Willowdale West
Parkwoods
Don Mills
Don Mills
Bathurst Manor, Wilson Heights, Downsview North
Northwood Park, York University
Downsview
Downsview
Downsview
Downsview
Victoria Village
Parkview Hill, Woodbine Gardens
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto, Broadview North (Old East York)
The Danforth West, 

In [16]:
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,"Guildwood, Morningside, West Hill",43.763573,-79.188711,RBC Royal Bank,43.76679,-79.191151,Bank
3,"Guildwood, Morningside, West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
4,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Sail Sushi,43.765951,-79.191275,Restaurant


In [17]:
print("Total number of venues: {} \n".format(toronto_venues.shape[0]))
print("Total number of uniques venues: {}".format(len(toronto_venues['Venue Category'].unique())))

Total number of venues: 2156 

Total number of uniques venues: 272


##### We need to cluster neighborhoods on basis of popular venue types there. Here, I have venue category in Tabular form but I need it in numeric form for clustering. So, using one hot encoding, I converted this tabular data into numeric form.

In [18]:
# One hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# Adding neighborhood column back to the dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# Moving neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


##### Grouping DataFrame w.r.t. Neighborhood and taking mean, so that I could know which venues categories are most common in a particular neighborhood.

In [19]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
print('Number of rows in this grouped DataFrame: {}'.format(toronto_grouped.shape[0]))
toronto_grouped.head()

Number of rows in this grouped DataFrame: 95


Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


##### Finding most common venues in each neighborhood to get some insight.

In [20]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [21]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Latin American Restaurant,Lounge,Skating Rink,Breakfast Spot,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
1,"Alderwood, Long Branch",Pizza Place,Gym,Coffee Shop,Pharmacy,Sandwich Place,Pub,Pool,Women's Store,Diner,Deli / Bodega
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Middle Eastern Restaurant,Frozen Yogurt Shop,Deli / Bodega,Supermarket,Sushi Restaurant,Restaurant,Shopping Mall,Mobile Phone Shop
3,Bayview Village,Chinese Restaurant,Café,Bank,Japanese Restaurant,Women's Store,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
4,"Bedford Park, Lawrence Manor East",Italian Restaurant,Sandwich Place,Coffee Shop,Restaurant,Thai Restaurant,Pub,Café,Indian Restaurant,Sushi Restaurant,Fast Food Restaurant


## 4. Clustering Neighborhoods 
####  K - Means Clustering

In [22]:
# Number of clusters
k = 5
X = toronto_grouped.drop('Neighborhood', axis = 1)
kmeans = KMeans(n_clusters = k, init = 'k-means++').fit(X)

In [23]:
# Adding cluster labels to the grouped neighborhood data.
toronto_clustered = neighborhoods_venues_sorted
toronto_clustered.insert(1, 'Cluster Labels', kmeans.labels_)
toronto_clustered.head()

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,0,Latin American Restaurant,Lounge,Skating Rink,Breakfast Spot,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
1,"Alderwood, Long Branch",0,Pizza Place,Gym,Coffee Shop,Pharmacy,Sandwich Place,Pub,Pool,Women's Store,Diner,Deli / Bodega
2,"Bathurst Manor, Wilson Heights, Downsview North",0,Coffee Shop,Bank,Middle Eastern Restaurant,Frozen Yogurt Shop,Deli / Bodega,Supermarket,Sushi Restaurant,Restaurant,Shopping Mall,Mobile Phone Shop
3,Bayview Village,0,Chinese Restaurant,Café,Bank,Japanese Restaurant,Women's Store,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
4,"Bedford Park, Lawrence Manor East",0,Italian Restaurant,Sandwich Place,Coffee Shop,Restaurant,Thai Restaurant,Pub,Café,Indian Restaurant,Sushi Restaurant,Fast Food Restaurant


In [24]:
toronto_final = neighborhoods_venues_sorted

# Merging neighborhoods_venues_sorted with pd_TN to add latitude/longitude for each neighborhood
toronto_final = toronto_final.join(pd_TN.set_index('Neighborhood'), on='Neighborhood')

toronto_final.head()

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Postal Code,Borough,Latitude,Longitude
0,Agincourt,0,Latin American Restaurant,Lounge,Skating Rink,Breakfast Spot,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,M1S,Scarborough,43.7942,-79.262029
1,"Alderwood, Long Branch",0,Pizza Place,Gym,Coffee Shop,Pharmacy,Sandwich Place,Pub,Pool,Women's Store,Diner,Deli / Bodega,M8W,Etobicoke,43.602414,-79.543484
2,"Bathurst Manor, Wilson Heights, Downsview North",0,Coffee Shop,Bank,Middle Eastern Restaurant,Frozen Yogurt Shop,Deli / Bodega,Supermarket,Sushi Restaurant,Restaurant,Shopping Mall,Mobile Phone Shop,M3H,North York,43.754328,-79.442259
3,Bayview Village,0,Chinese Restaurant,Café,Bank,Japanese Restaurant,Women's Store,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,M2K,North York,43.786947,-79.385975
4,"Bedford Park, Lawrence Manor East",0,Italian Restaurant,Sandwich Place,Coffee Shop,Restaurant,Thai Restaurant,Pub,Café,Indian Restaurant,Sushi Restaurant,Fast Food Restaurant,M5M,North York,43.733283,-79.41975


## 5. Displaying Clustered Neighborhoods on map
##### Using Floium, displaying clustered neighborhoods on Toronto's map.

In [25]:
# Toronto Lat/Lng
latitude = 43.6532
longitude = -79.3832

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_final['Latitude'], toronto_final['Longitude'], toronto_final['Neighborhood'], toronto_final['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## 6. Insights of common venues in each cluster

#####  Examining each cluster and determining the discriminating venue categories that distinguish each cluster.

### Cluster 1

In [26]:
toronto_final.loc[toronto_final['Cluster Labels'] == 0, toronto_final.columns[[1] + list(range(2, toronto_final.shape[1] -4 ))]]

Unnamed: 0,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,Latin American Restaurant,Lounge,Skating Rink,Breakfast Spot,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
1,0,Pizza Place,Gym,Coffee Shop,Pharmacy,Sandwich Place,Pub,Pool,Women's Store,Diner,Deli / Bodega
2,0,Coffee Shop,Bank,Middle Eastern Restaurant,Frozen Yogurt Shop,Deli / Bodega,Supermarket,Sushi Restaurant,Restaurant,Shopping Mall,Mobile Phone Shop
3,0,Chinese Restaurant,Café,Bank,Japanese Restaurant,Women's Store,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
4,0,Italian Restaurant,Sandwich Place,Coffee Shop,Restaurant,Thai Restaurant,Pub,Café,Indian Restaurant,Sushi Restaurant,Fast Food Restaurant
...,...,...,...,...,...,...,...,...,...,...,...
88,0,Middle Eastern Restaurant,Accessories Store,Auto Garage,Breakfast Spot,Shopping Mall,Bakery,Sandwich Place,Doner Restaurant,Diner,Discount Store
90,0,Ramen Restaurant,Sushi Restaurant,Pizza Place,Restaurant,Café,Sandwich Place,Coffee Shop,Movie Theater,Japanese Restaurant,Hotel
91,0,Pharmacy,Pizza Place,Coffee Shop,Bank,Women's Store,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
92,0,Coffee Shop,Soccer Field,Korean Restaurant,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant


### Cluster 2

In [27]:
toronto_final.loc[toronto_final['Cluster Labels'] == 1, toronto_final.columns[[1] + list(range(2, toronto_final.shape[1] -4 ))]]

Unnamed: 0,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,1,Park,Pool,Women's Store,Golf Course,Ethiopian Restaurant,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop,Doner Restaurant
26,1,Intersection,Convenience Store,Park,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Women's Store
30,1,Trail,Park,Jewelry Store,Sushi Restaurant,Women's Store,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run
32,1,Park,Sushi Restaurant,Japanese Restaurant,Pub,Women's Store,Dog Run,Dim Sum Restaurant,Diner,Discount Store,Distribution Center
44,1,Park,Sandwich Place,Mobile Phone Shop,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop,Doner Restaurant,Event Space,Distribution Center
46,1,Park,Bus Line,Dim Sum Restaurant,Swim School,Dog Run,Diner,Discount Store,Distribution Center,Doner Restaurant,Department Store
50,1,Park,Playground,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop
52,1,Restaurant,Trail,Park,College Stadium,Colombian Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore
54,1,Basketball Court,Park,Bakery,Construction & Landscaping,Trail,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run
61,1,Convenience Store,Park,Food & Drink Shop,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Women's Store


### Cluster 3

In [28]:
toronto_final.loc[toronto_final['Cluster Labels'] == 2, toronto_final.columns[[1] + list(range(2, toronto_final.shape[1] -4 ))]]

Unnamed: 0,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
87,2,Park,Women's Store,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Drugstore
89,2,Park,Women's Store,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Drugstore


### Cluster 4

In [29]:
toronto_final.loc[toronto_final['Cluster Labels'] == 3, toronto_final.columns[[1] + list(range(2, toronto_final.shape[1] -4 ))]]

Unnamed: 0,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
39,3,Food Service,Baseball Field,Women's Store,Doner Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop,Dessert Shop
58,3,Baseball Field,Women's Store,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Farmers Market


### Cluster 5

In [30]:
toronto_final.loc[toronto_final['Cluster Labels'] == 4, toronto_final.columns[[1] + list(range(2, toronto_final.shape[1] -4 ))]]

Unnamed: 0,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
49,4,Fast Food Restaurant,Department Store,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop,Doner Restaurant


### I hope you enjoyed this notebook.