## Exploring and clustering the neighborhoods in Toronto - Part 3

In [15]:
import pandas as pd
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

# !conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

!pip install geopy
from geopy.geocoders import Nominatim
import json
import requests # library to handle requests
print('Libraries imported.')

Collecting geopy
[?25l  Downloading https://files.pythonhosted.org/packages/ab/97/25def417bf5db4cc6b89b47a56961b893d4ee4fec0c335f5b9476a8ff153/geopy-1.22.0-py2.py3-none-any.whl (113kB)
[K     |████████████████████████████████| 122kB 9.1MB/s eta 0:00:01
[?25hCollecting geographiclib<2,>=1.49 (from geopy)
  Downloading https://files.pythonhosted.org/packages/8b/62/26ec95a98ba64299163199e95ad1b0e34ad3f4e176e221c40245f211e425/geographiclib-1.50-py3-none-any.whl
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-1.50 geopy-1.22.0
Libraries imported.


### A. Data Preparation and Cleaning

In [16]:
df = pd.read_csv("Postal_Codes_of_Canada_Pt2.csv",index_col=0)
df.head() #Using Geocoder NOT the dataframe posted by COURSERA

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.752935,-79.335641
1,M4A,North York,Victoria Village,43.728102,-79.31189
2,M5A,Downtown Toronto,Regent Park,43.650964,-79.353041
3,M6A,North York,Lawrence Manor,43.723265,-79.451211
4,M7A,Downtown Toronto,Queen's Park,43.66179,-79.38939


1. Find out basic information about number of unique Boroughs and Neighborhoods

In [17]:
unique = []

for Neighborhood in df.Neighborhood:
    new_neighborhood = Neighborhood.split(",")
    unique = unique + new_neighborhood

print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df['Borough'].unique()),
        len(unique)))

The dataframe has 10 boroughs and 209 neighborhoods.


2. Visualizing all postal codes in the dataframe using Folium

In [18]:
from geopy.geocoders import Nominatim

In [19]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [20]:
toronto_map = folium.Map(location=[latitude, longitude], zoom_start=10)

# add the trending venues as blue circle markers
for lat, lng, postcode, borough, neighborhood in \
    zip(df['Latitude'], df['Longitude'], df['Postal Code'], df['Borough'],df['Neighborhood']):
        label = '{}, {}, {}'.format(neighborhood, borough, postcode)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            fill=True,
            color='blue',
            fill_color='blue',
            fill_opacity=0.6,
            parse_html=False).add_to(toronto_map)
    
#Show the map
toronto_map

For simplification, I segment and cluster only the neighborhoods with borough containing 'Toronto'. So let's slice the original dataframe and create a new dataframe of the Toronto data.

In [21]:
df_toronto = df[df['Borough'].astype(str).str.contains('Toronto')]
df_toronto.reset_index(drop=True,inplace=True)
df_toronto.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,Regent Park,43.650964,-79.353041
1,M7A,Downtown Toronto,Queen's Park,43.66179,-79.38939
2,M5B,Downtown Toronto,Garden District,43.657491,-79.377529
3,M5C,Downtown Toronto,St. James Town,43.651734,-79.375554
4,M4E,East Toronto,The Beaches,43.678148,-79.295349


In [22]:
df_toronto.shape

(75, 5)

3. Let's visualize again!

In [23]:
specific_toronto_map = folium.Map(location=[latitude, longitude], zoom_start=11)

# add the trending venues as blue circle markers
for lat, lng, postcode, borough, neighborhood in \
    zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Postal Code'], df_toronto['Borough'],
        df_toronto['Neighborhood']):
    
        label = '{}, {}, {}'.format(neighborhood, borough, postcode)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            fill=True,
            color='blue',
            fill_color='blue',
            fill_opacity=0.6,
            parse_html=False).add_to(specific_toronto_map)
    
#Show the map
specific_toronto_map

4. Defining Foursquare Credential and Version (Hidden Code)

In [24]:
# @hidden_cell

CLIENT_ID = '0FM3Q3MSAFZWRTJNTY1QWHZVW2GU3M0Z30KHXGJDUW2BGNYZ' # your Foursquare ID
CLIENT_SECRET = 'TXHVYZPQLAKRO14HF31LCQVYFGDF1HSEVEHA4DMETAIAJDUC' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30

In [25]:
df_toronto.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,Regent Park,43.650964,-79.353041
1,M7A,Downtown Toronto,Queen's Park,43.66179,-79.38939
2,M5B,Downtown Toronto,Garden District,43.657491,-79.377529
3,M5C,Downtown Toronto,St. James Town,43.651734,-79.375554
4,M4E,East Toronto,The Beaches,43.678148,-79.295349


5. Use a function to search venues around Toronto

In [26]:
def getNearbyVenues(names, latitudes, longitudes, radius=100):
    LIMIT = 100
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        try:
            results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
            venues_list.append([(
                name, 
                lat, 
                lng, 
                v['venue']['name'], 
                v['venue']['location']['lat'], 
                v['venue']['location']['lng'],  
                v['venue']['categories'][0]['name']) for v in results])
            print("Working on it")
        except:
            print("Error")

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [27]:
df_toronto.shape

(75, 5)

In [28]:
toronto_venues = getNearbyVenues(names=df_toronto['Neighborhood'],
                                   latitudes=df_toronto['Latitude'],
                                   longitudes=df_toronto['Longitude'])



Regent Park
Working on it
Queen's Park
Working on it
Garden District
Working on it
St. James Town
Working on it
The Beaches
Working on it
Berczy Park
Working on it
Central Bay Street
Working on it
Christie
Working on it
Richmond
Working on it
Dufferin
Working on it
Harbourfront East
Working on it
Little Portugal
Working on it
The Danforth West
Working on it
Toronto Dominion Centre
Working on it
Brockton
Working on it
India Bazaar
Working on it
Commerce Court
Working on it
Studio District
Working on it
Lawrence Park
Working on it
Roselawn
Working on it
Davisville North
Working on it
Forest Hill North & West
Working on it
High Park
Working on it
North Toronto West
Working on it
The Annex
Working on it
Parkdale
Working on it
Davisville
Working on it
University of Toronto
Working on it
Runnymede
Working on it
Moore Park
Working on it
Kensington Market
Working on it
Summerhill West
Working on it
CN Tower
Working on it
Rosedale
Working on it
Stn A PO Boxes
Working on it
St. James Town
Workin

In [29]:
print(toronto_venues.shape)
toronto_venues.head()

(249, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Garden District,43.657491,-79.377529,Oakham Café,43.658078,-79.378315,Café
1,Garden District,43.657491,-79.377529,Lake Devo,43.656994,-79.376898,Lake
2,Garden District,43.657491,-79.377529,Ryerson Square,43.656988,-79.376896,Other Great Outdoors
3,Garden District,43.657491,-79.377529,Tim Hortons,43.656907,-79.377329,Coffee Shop
4,Garden District,43.657491,-79.377529,It's All Grk,43.656781,-79.376828,Greek Restaurant


6. Checking how many venues were returned for each neighborhood

In [30]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adelaide,11,11,11,11,11,11
Bathurst Quay,1,1,1,1,1,1
Business reply mail Processing Centre,8,8,8,8,8,8
CN Tower,1,1,1,1,1,1
Central Bay Street,5,5,5,5,5,5
Chinatown,13,13,13,13,13,13
Christie,1,1,1,1,1,1
Church and Wellesley,5,5,5,5,5,5
Commerce Court,11,11,11,11,11,11
Design Exchange,7,7,7,7,7,7


In [31]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 64 uniques categories.


In [32]:
print(f'There are {len(toronto_venues.Neighborhood.unique())} unique neighborhoods')

There are 45 unique neighborhoods


### B. Analyzing Each Neighborhood

In [33]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Asian Restaurant,BBQ Joint,Bank,Bar,Breakfast Spot,Bubble Tea Shop,Building,Burger Joint,...,Smoothie Shop,Speakeasy,Steakhouse,Sushi Restaurant,Taco Place,Tea Room,Thai Restaurant,Theme Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Garden District,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Garden District,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Garden District,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Garden District,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Garden District,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Check the size of new dataframe

In [34]:
toronto_onehot.shape

(249, 65)

In [35]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood,American Restaurant,Asian Restaurant,BBQ Joint,Bank,Bar,Breakfast Spot,Bubble Tea Shop,Building,Burger Joint,...,Smoothie Shop,Speakeasy,Steakhouse,Sushi Restaurant,Taco Place,Tea Room,Thai Restaurant,Theme Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Adelaide,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.090909,0.090909,0.090909,0.0,0.0,0.090909,0.0
1,Bathurst Quay,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Business reply mail Processing Centre,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,CN Tower,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,...,0.0,0.0,0.0,0.2,0.0,0.2,0.0,0.0,0.0,0.0


In [36]:
toronto_grouped.shape

(45, 65)

The result above shows 45 Neighborhoods and 65 Venue Category types

#### Let's print each neighborhood along with the top 5 most common venues

In [37]:
num_top_venues = 5

for nhood in toronto_grouped.Neighborhood:
    print("----"+nhood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == nhood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide----
                           venue  freq
0                    Coffee Shop  0.18
1  Vegetarian / Vegan Restaurant  0.09
2                       Tea Room  0.09
3                     Taco Place  0.09
4               Sushi Restaurant  0.09


----Bathurst Quay----
                   venue  freq
0             Restaurant   1.0
1    American Restaurant   0.0
2  Performing Arts Venue   0.0
3       Greek Restaurant   0.0
4          Grocery Store   0.0


----Business reply mail Processing Centre----
                  venue  freq
0           Pizza Place  0.25
1   Japanese Restaurant  0.12
2  Colombian Restaurant  0.12
3                  Café  0.12
4            Steakhouse  0.12


----CN Tower----
                   venue  freq
0             Restaurant   1.0
1    American Restaurant   0.0
2  Performing Arts Venue   0.0
3       Greek Restaurant   0.0
4          Grocery Store   0.0


----Central Bay Street----
                venue  freq
0  Italian Restaurant   0.2
1            Tea Room

#### Put these data into one dataframe

In [38]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Instead of top 5 venues, we now try top 10 venues

In [39]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adelaide,Coffee Shop,Sushi Restaurant,Japanese Restaurant,Restaurant,Vegetarian / Vegan Restaurant,Garden,Taco Place,Tea Room,Food Court,Greek Restaurant
1,Bathurst Quay,Restaurant,Vietnamese Restaurant,Colombian Restaurant,Garden,French Restaurant,Food Truck,Food Court,Flower Shop,Fish Market,Fish & Chips Shop
2,Business reply mail Processing Centre,Pizza Place,Colombian Restaurant,Asian Restaurant,Japanese Restaurant,Steakhouse,Speakeasy,Café,Falafel Restaurant,Deli / Bodega,Diner
3,CN Tower,Restaurant,Vietnamese Restaurant,Colombian Restaurant,Garden,French Restaurant,Food Truck,Food Court,Flower Shop,Fish Market,Fish & Chips Shop
4,Central Bay Street,Bubble Tea Shop,Chinese Restaurant,Tea Room,Sushi Restaurant,Italian Restaurant,Fish & Chips Shop,Diner,Dumpling Restaurant,Falafel Restaurant,Fast Food Restaurant


### C. Clustering the Neighborhoods

We try with 5 clusters first and visualize

In [40]:
# set number of clusters
kclusters = 5

toronto_grp_clust = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grp_clust)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 2, 1, 2, 1, 1, 1, 1, 1, 0], dtype=int32)

In [41]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df_toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.merge(neighborhoods_venues_sorted,on='Neighborhood',how='right')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5B,Downtown Toronto,Garden District,43.657491,-79.377529,1,Coffee Shop,Sandwich Place,Lake,Bar,Greek Restaurant,Café,Other Great Outdoors,Vietnamese Restaurant,Diner,Dumpling Restaurant
1,M5C,Downtown Toronto,St. James Town,43.651734,-79.375554,1,Gay Bar,Italian Restaurant,Gastropub,Performing Arts Venue,Coffee Shop,Japanese Restaurant,Diner,Breakfast Spot,Flower Shop,Fish Market
2,M4X,Downtown Toronto,St. James Town,43.667656,-79.367326,1,Gay Bar,Italian Restaurant,Gastropub,Performing Arts Venue,Coffee Shop,Japanese Restaurant,Diner,Breakfast Spot,Flower Shop,Fish Market
3,M5G,Downtown Toronto,Central Bay Street,43.656072,-79.385653,1,Bubble Tea Shop,Chinese Restaurant,Tea Room,Sushi Restaurant,Italian Restaurant,Fish & Chips Shop,Diner,Dumpling Restaurant,Falafel Restaurant,Fast Food Restaurant
4,M6G,Downtown Toronto,Christie,43.668602,-79.420387,1,Grocery Store,Vietnamese Restaurant,Gay Bar,Garden,French Restaurant,Food Truck,Food Court,Flower Shop,Fish Market,Fish & Chips Shop


In [42]:
toronto_merged.shape

(46, 16)

In [43]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged.Latitude, toronto_merged.Longitude, 
                                  toronto_merged.Neighborhood, toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### 4. Finding out more about the clusters

In [54]:
cluster_0 = toronto_merged[toronto_merged["Cluster Labels"]==0]
cluster_0.reset_index(drop=True,inplace=True)
cluster_0

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5K,Downtown Toronto,Toronto Dominion Centre,43.646923,-79.381626,0,Coffee Shop,Restaurant,Deli / Bodega,Gym,Fish & Chips Shop,Diner,Dumpling Restaurant,Falafel Restaurant,Fast Food Restaurant,Fish Market
1,M5S,Downtown Toronto,University of Toronto,43.663281,-79.398088,0,Food Truck,Coffee Shop,Vietnamese Restaurant,Cupcake Shop,Gastropub,Garden,French Restaurant,Food Court,Flower Shop,Fish Market
2,M5K,Downtown Toronto,Design Exchange,43.646923,-79.381626,0,Coffee Shop,Restaurant,Deli / Bodega,Gym,Fish & Chips Shop,Diner,Dumpling Restaurant,Falafel Restaurant,Fast Food Restaurant,Fish Market
3,M5S,Downtown Toronto,Harbord,43.663281,-79.398088,0,Food Truck,Coffee Shop,Vietnamese Restaurant,Cupcake Shop,Gastropub,Garden,French Restaurant,Food Court,Flower Shop,Fish Market


In [46]:
cluster_0.shape

(4, 16)

In [53]:
cluster_1 = toronto_merged[toronto_merged["Cluster Labels"]==1]
cluster_1.reset_index(drop=True,inplace=True)
cluster_1

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5B,Downtown Toronto,Garden District,43.657491,-79.377529,1,Coffee Shop,Sandwich Place,Lake,Bar,Greek Restaurant,Café,Other Great Outdoors,Vietnamese Restaurant,Diner,Dumpling Restaurant
1,M5C,Downtown Toronto,St. James Town,43.651734,-79.375554,1,Gay Bar,Italian Restaurant,Gastropub,Performing Arts Venue,Coffee Shop,Japanese Restaurant,Diner,Breakfast Spot,Flower Shop,Fish Market
2,M4X,Downtown Toronto,St. James Town,43.667656,-79.367326,1,Gay Bar,Italian Restaurant,Gastropub,Performing Arts Venue,Coffee Shop,Japanese Restaurant,Diner,Breakfast Spot,Flower Shop,Fish Market
3,M5G,Downtown Toronto,Central Bay Street,43.656072,-79.385653,1,Bubble Tea Shop,Chinese Restaurant,Tea Room,Sushi Restaurant,Italian Restaurant,Fish & Chips Shop,Diner,Dumpling Restaurant,Falafel Restaurant,Fast Food Restaurant
4,M6G,Downtown Toronto,Christie,43.668602,-79.420387,1,Grocery Store,Vietnamese Restaurant,Gay Bar,Garden,French Restaurant,Food Truck,Food Court,Flower Shop,Fish Market,Fish & Chips Shop
5,M5H,Downtown Toronto,Richmond,43.650542,-79.384116,1,Coffee Shop,Sushi Restaurant,Japanese Restaurant,Restaurant,Vegetarian / Vegan Restaurant,Garden,Taco Place,Tea Room,Food Court,Greek Restaurant
6,M5L,Downtown Toronto,Commerce Court,43.648283,-79.37875,1,Café,American Restaurant,Breakfast Spot,Burrito Place,Fast Food Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Sushi Restaurant,Deli / Bodega
7,M5R,Central Toronto,The Annex,43.674911,-79.40394,1,American Restaurant,Pizza Place,Pub,Flower Shop,Fast Food Restaurant,Deli / Bodega,Diner,Dumpling Restaurant,Falafel Restaurant,Fish & Chips Shop
8,M6R,West Toronto,Parkdale,43.64784,-79.448388,1,BBQ Joint,Gourmet Shop,Sushi Restaurant,Café,Butcher,Fish Market,Vietnamese Restaurant,Fish & Chips Shop,Dumpling Restaurant,Falafel Restaurant
9,M6S,West Toronto,Runnymede,43.649725,-79.482692,1,Pizza Place,Smoothie Shop,Burrito Place,French Restaurant,Bar,Sushi Restaurant,Café,Falafel Restaurant,Fish & Chips Shop,Diner


In [48]:
cluster_1.shape

(26, 16)

From the dataframe above, we can deduce that venues located at cluster_1 are majority coffee shops, and pizza place.

In [52]:
cluster_2 = toronto_merged[toronto_merged["Cluster Labels"]==2]
cluster_2.reset_index(drop=True,inplace=True)
cluster_2

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5V,Downtown Toronto,CN Tower,43.640539,-79.397435,2,Restaurant,Vietnamese Restaurant,Colombian Restaurant,Garden,French Restaurant,Food Truck,Food Court,Flower Shop,Fish Market,Fish & Chips Shop
1,M5V,Downtown Toronto,King and Spadina,43.640539,-79.397435,2,Restaurant,Vietnamese Restaurant,Colombian Restaurant,Garden,French Restaurant,Food Truck,Food Court,Flower Shop,Fish Market,Fish & Chips Shop
2,M5V,Downtown Toronto,Railway Lands,43.640539,-79.397435,2,Restaurant,Vietnamese Restaurant,Colombian Restaurant,Garden,French Restaurant,Food Truck,Food Court,Flower Shop,Fish Market,Fish & Chips Shop
3,M5V,Downtown Toronto,Harbourfront West,43.640539,-79.397435,2,Restaurant,Vietnamese Restaurant,Colombian Restaurant,Garden,French Restaurant,Food Truck,Food Court,Flower Shop,Fish Market,Fish & Chips Shop
4,M5V,Downtown Toronto,Bathurst Quay,43.640539,-79.397435,2,Restaurant,Vietnamese Restaurant,Colombian Restaurant,Garden,French Restaurant,Food Truck,Food Court,Flower Shop,Fish Market,Fish & Chips Shop
5,M5V,Downtown Toronto,South Niagara,43.640539,-79.397435,2,Restaurant,Vietnamese Restaurant,Colombian Restaurant,Garden,French Restaurant,Food Truck,Food Court,Flower Shop,Fish Market,Fish & Chips Shop
6,M5V,Downtown Toronto,Island airport,43.640539,-79.397435,2,Restaurant,Vietnamese Restaurant,Colombian Restaurant,Garden,French Restaurant,Food Truck,Food Court,Flower Shop,Fish Market,Fish & Chips Shop


In [50]:
cluster_2.shape

(7, 16)

From the dataframe above, it can be inferred that cluster_2 is mostly restaurants.

In [55]:
cluster_3 = toronto_merged[toronto_merged["Cluster Labels"]==3]
cluster_3.reset_index(drop=True,inplace=True)
cluster_3

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5J,Downtown Toronto,Harbourfront East,43.62375,-79.3692,3,Harbor / Marina,Vietnamese Restaurant,Gay Bar,Garden,French Restaurant,Food Truck,Food Court,Flower Shop,Fish Market,Fish & Chips Shop
1,M5J,Downtown Toronto,Union Station,43.62375,-79.3692,3,Harbor / Marina,Vietnamese Restaurant,Gay Bar,Garden,French Restaurant,Food Truck,Food Court,Flower Shop,Fish Market,Fish & Chips Shop
2,M5J,Downtown Toronto,Toronto Islands,43.62375,-79.3692,3,Harbor / Marina,Vietnamese Restaurant,Gay Bar,Garden,French Restaurant,Food Truck,Food Court,Flower Shop,Fish Market,Fish & Chips Shop


In [56]:
cluster_3.shape

(3, 16)

From the dataframe, we see that cluster_3 is mostly filled with Harbors.

In [57]:
cluster_4 = toronto_merged[toronto_merged["Cluster Labels"]==4]
cluster_4.reset_index(drop=True,inplace=True)
cluster_4

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M6J,West Toronto,Little Portugal,43.648636,-79.417752,4,Park,Outdoor Sculpture,Vietnamese Restaurant,Garden,French Restaurant,Food Truck,Food Court,Flower Shop,Fish Market,Fish & Chips Shop
1,M6P,West Toronto,High Park,43.65872,-79.46315,4,Park,Vietnamese Restaurant,Gay Bar,Garden,French Restaurant,Food Truck,Food Court,Flower Shop,Fish Market,Fish & Chips Shop
2,M4T,Central Toronto,Moore Park,43.690328,-79.383522,4,Park,Vietnamese Restaurant,Gay Bar,Garden,French Restaurant,Food Truck,Food Court,Flower Shop,Fish Market,Fish & Chips Shop
3,M6J,West Toronto,Trinity,43.648636,-79.417752,4,Park,Outdoor Sculpture,Vietnamese Restaurant,Garden,French Restaurant,Food Truck,Food Court,Flower Shop,Fish Market,Fish & Chips Shop
4,M6P,West Toronto,The Junction South,43.65872,-79.46315,4,Park,Vietnamese Restaurant,Gay Bar,Garden,French Restaurant,Food Truck,Food Court,Flower Shop,Fish Market,Fish & Chips Shop
5,M4T,Central Toronto,Summerhill East,43.690328,-79.383522,4,Park,Vietnamese Restaurant,Gay Bar,Garden,French Restaurant,Food Truck,Food Court,Flower Shop,Fish Market,Fish & Chips Shop


In [58]:
cluster_4.shape

(6, 16)

From the dataframe, we see that cluster_4 is mostly filled with Parks.