In [1]:
import pandas as pd
import numpy as np

### 1. Scrape the Wikipedia page

In [2]:
# use pd.read_html to scrape the page
# there are 3 tables, and the table I need is the first one
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]

In [3]:
# drop the row whose Borough is 'Not assigned' 
df_clean = df.drop(df[df['Borough'].str.contains('Not assigned')].index).reset_index(drop=True)

In [4]:
df_clean

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [5]:
# define a function to update the neighbourhood
def update_neighbourhood(row):
    if row['Neighbourhood'] == 'Not assigned':
        return row['Borough']
    else:
        return row['Neighbourhood']

In [6]:
df_clean['Neighbourhood'] = df_clean.apply(update_neighbourhood, axis=1)
df_clean

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [7]:
df_clean.shape

(103, 3)

### 2. Utilize the location data

**The package 'geocoder' does not work, so I download the csv file and then merge it with the dataframe.**

In [8]:
# read the csv file
geospatial_coordinates = pd.read_csv('D:/Code/Python/Coursera Capstone/Coursera_Capstone/Geospatial_Coordinates.csv')

In [9]:
# merge the dataframe
df_final = pd.merge(df_clean, geospatial_coordinates, on=['Postal Code'])

In [10]:
# change name of the first column
df_final.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)

In [11]:
df_final

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


### 3. Get more info of neighbourhoods by using FourSquare

In [12]:
from geopy.geocoders import Nominatim
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
import requests
from pandas.io.json import json_normalize
import json

In [13]:
toronto_data = df_final[df_final['Borough'].str.contains('Toronto')].reset_index(drop=True)
toronto_data.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [14]:
# Make a list of all neighbourhoods in the dataframe
neighbourhood_lists = []
for i in toronto_data.Neighbourhood:
    neighbourhood_lists += i.split(', ')

In [15]:
print(len(neighbourhood_lists))
print(len(set(neighbourhood_lists)))
neighbourhood_lists = set(neighbourhood_lists)

78
76


In [16]:
toronto_data.Borough.value_counts()

Downtown Toronto    19
Central Toronto      9
West Toronto         6
East Toronto         5
Name: Borough, dtype: int64

**Get each neighbourhood's latitude and longitude, and delete the neighbourhood whose geographical coordinates are unknown.**

In [17]:
neighbourhood_latitude = []
neighbourhood_longitude = []
delete_list = []
for neighbourhood in neighbourhood_lists:
    address = '{}, Toronto, Ontario'.format(neighbourhood)
    geolocator = Nominatim(user_agent="ny_explorer")
    location = geolocator.geocode(address)
    try:
        neighbourhood_latitude.append(location.latitude)
        neighbourhood_longitude.append(location.longitude)
    except:
        delete_list.append(neighbourhood)

In [18]:
# delete the duplicates
neighbourhood_lists = list(neighbourhood_lists - set(delete_list))

In [19]:
toronto_neighbourhood = pd.DataFrame({'Neighbourhood': neighbourhood_lists,
                                      'Latitude': neighbourhood_latitude, 
                                      'Longitude': neighbourhood_longitude})
toronto_neighbourhood.head()

Unnamed: 0,Neighbourhood,Latitude,Longitude
0,Cabbagetown,43.623054,-79.394316
1,Studio District,43.671024,-79.296712
2,Regent Park,43.664473,-79.366986
3,Berczy Park,43.649585,-79.390683
4,Exhibition Place,43.64494,-79.478313


In [20]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [21]:
# create map of Toronto using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, neighborhood in zip(toronto_neighbourhood['Latitude'], 
                                   toronto_neighbourhood['Longitude'], 
                                   toronto_neighbourhood['Neighbourhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

**Define Foursquare Credentials and Version**

In [22]:
CLIENT_ID = '1QNBWZXCKW43RHFLX4GYWWZXCF2TBX423SI5E0L3WFDXUJPH' # your Foursquare ID
CLIENT_SECRET = 'NAZMCD34B1AXIWR3BAH1GCHDZF5VGCG3JFN1AEMWMKJ3Y3L5' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

**Get nearby venues of each neighbourhood.**

In [23]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [24]:
toronto_venues = getNearbyVenues(toronto_neighbourhood['Neighbourhood'], 
                                 toronto_neighbourhood['Latitude'], 
                                 toronto_neighbourhood['Longitude'])

**The size of resulting dataframe.**

In [25]:
print(toronto_venues.shape)
toronto_venues.head()

(3580, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Cabbagetown,43.623054,-79.394316,Bestival Toronto,43.62314,-79.391107,Music Venue
1,Cabbagetown,43.623054,-79.394316,The Tree of Knowledge,43.619602,-79.39423,Park
2,Cabbagetown,43.623054,-79.394316,Hanlan's Point - mooring wall,43.62024,-79.390266,Harbor / Marina
3,Cabbagetown,43.623054,-79.394316,The Mermaid Cafe,43.619264,-79.391311,Café
4,Studio District,43.671024,-79.296712,Kew Gardens,43.669038,-79.298538,Park


**The number of venue categorise in the toronto.**

In [26]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 287 uniques categories.


In [27]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adelaide,100,100,100,100,100,100
Bathurst Quay,13,13,13,13,13,13
Berczy Park,100,100,100,100,100,100
Brockton,100,100,100,100,100,100
CN Tower,100,100,100,100,100,100
...,...,...,...,...,...,...
Underground city,39,39,39,39,39,39
Union Station,3,3,3,3,3,3
University of Toronto,67,67,67,67,67,67
Victoria Hotel,57,57,57,57,57,57


### 4. Analyze Each Neighbourhood

In [28]:
# ont hot encoding
toronto_onehot = pd.get_dummies(toronto_venues['Venue Category'], prefix='', prefix_sep='')

# add neighbourhood column back to the dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighborhood']

# move neighbourhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Service,American Restaurant,Animal Shelter,Antique Shop,...,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Cabbagetown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Cabbagetown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Cabbagetown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Cabbagetown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Studio District,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


**Group rows by neighbourhood and by taking the mean of the frequency of occurrence of each category.**

In [29]:
toronto_grouped = toronto_onehot.groupby(['Neighbourhood']).mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Service,American Restaurant,Animal Shelter,Antique Shop,...,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Adelaide,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,...,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0
1,Bathurst Quay,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.04,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02
3,Brockton,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0
4,CN Tower,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,...,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0


**Get top-N common venues of each neighbourhood**

In [30]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [108]:
num_top_venues = 9

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue
0,Adelaide,Coffee Shop,Hotel,Café,American Restaurant,Restaurant,Japanese Restaurant,Seafood Restaurant,Salad Place,Asian Restaurant
1,Bathurst Quay,Sushi Restaurant,Skating Rink,Restaurant,Liquor Store,Gas Station,Fast Food Restaurant,Persian Restaurant,Deli / Bodega,Pub
2,Berczy Park,Coffee Shop,Café,Vegetarian / Vegan Restaurant,Cosmetics Shop,Clothing Store,Pizza Place,Japanese Restaurant,Yoga Studio,Bookstore
3,Brockton,Coffee Shop,Clothing Store,Café,Hotel,Middle Eastern Restaurant,Japanese Restaurant,Diner,Burger Joint,Pizza Place
4,CN Tower,Coffee Shop,Japanese Restaurant,Café,Cosmetics Shop,American Restaurant,Restaurant,Gym,Seafood Restaurant,Gastropub


### 5. Cluster Neighbourhoods with K-means

In [109]:
from sklearn.cluster import KMeans

In [110]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([4, 4, 4, 4, 4, 2, 4, 4, 4, 4])

In [111]:
# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_neighbourhood

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighbourhood
toronto_merged = toronto_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue
0,Cabbagetown,43.623054,-79.394316,2,Café,Music Venue,Harbor / Marina,Park,Yoga Studio,Electronics Store,Escape Room,Ethiopian Restaurant,Event Space
1,Studio District,43.671024,-79.296712,4,Beach,Japanese Restaurant,Breakfast Spot,Thai Restaurant,Pizza Place,Pub,Bar,Park,Martial Arts School
2,Regent Park,43.664473,-79.366986,4,Restaurant,Café,Coffee Shop,Indian Restaurant,Pub,Bakery,Japanese Restaurant,Italian Restaurant,Diner
3,Berczy Park,43.649585,-79.390683,4,Coffee Shop,Café,Vegetarian / Vegan Restaurant,Cosmetics Shop,Clothing Store,Pizza Place,Japanese Restaurant,Yoga Studio,Bookstore
4,Exhibition Place,43.64494,-79.478313,2,Park,Pilates Studio,Dance Studio,Bus Line,Yoga Studio,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant


In [112]:
def get_borough(row):
    for index, row_temp in toronto_data.iterrows():
        if row['Neighbourhood'] in row_temp['Neighbourhood'].split(', '):
            return row_temp['Borough']
    return 'Borough_unknown'

In [113]:
toronto_merged['Borough'] = toronto_merged.apply(get_borough, axis=1)

In [114]:
new_columns = [toronto_merged.columns[-1]] + list(toronto_merged.columns[:-1])
toronto_merged = toronto_merged[new_columns]
toronto_merged.head()

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue
0,Downtown Toronto,Cabbagetown,43.623054,-79.394316,2,Café,Music Venue,Harbor / Marina,Park,Yoga Studio,Electronics Store,Escape Room,Ethiopian Restaurant,Event Space
1,East Toronto,Studio District,43.671024,-79.296712,4,Beach,Japanese Restaurant,Breakfast Spot,Thai Restaurant,Pizza Place,Pub,Bar,Park,Martial Arts School
2,Downtown Toronto,Regent Park,43.664473,-79.366986,4,Restaurant,Café,Coffee Shop,Indian Restaurant,Pub,Bakery,Japanese Restaurant,Italian Restaurant,Diner
3,Downtown Toronto,Berczy Park,43.649585,-79.390683,4,Coffee Shop,Café,Vegetarian / Vegan Restaurant,Cosmetics Shop,Clothing Store,Pizza Place,Japanese Restaurant,Yoga Studio,Bookstore
4,West Toronto,Exhibition Place,43.64494,-79.478313,2,Park,Pilates Studio,Dance Studio,Bus Line,Yoga Studio,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant


In [115]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### 6. Check Clusters

**Obviously, the size of data set is not large enough, which leads to the poor result of KMeans**  
**The group with the maximum size is about food and coffee, which I think can make some sense.**

**In the Cluster 0, only one neighbourhood which is near to the college.**

In [116]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue
62,Ryerson,Skating Rink,Trail,Yoga Studio,Egyptian Restaurant,Electronics Store,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant


**In the Cluster 1, only one neighbourhood which is far away from the center.**

In [117]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue
59,Harbourfront East,Beer Store,Yoga Studio,Fast Food Restaurant,Electronics Store,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market


**In the Cluster 2, mix of the entertainment, park and food.**

In [118]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue
0,Cabbagetown,Café,Music Venue,Harbor / Marina,Park,Yoga Studio,Electronics Store,Escape Room,Ethiopian Restaurant,Event Space
4,Exhibition Place,Park,Pilates Studio,Dance Studio,Bus Line,Yoga Studio,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant
13,Union Station,Playground,Bike Trail,Park,Fast Food Restaurant,Electronics Store,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant
22,Grange Park,Park,Mexican Restaurant,French Restaurant,Pizza Place,Pub,Café,Liquor Store,BBQ Joint,Coffee Shop
46,Swansea,Bank,Park,Playground,Mediterranean Restaurant,Electronics Store,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant


**In the Cluster 3, the majority is about the gym and park.**

In [119]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue
42,The Junction South,Gym,Park,Dumpling Restaurant,Egyptian Restaurant,Electronics Store,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant


**In the Cluster 4, main group which is about food and coffee.**

In [120]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue
1,Studio District,Beach,Japanese Restaurant,Breakfast Spot,Thai Restaurant,Pizza Place,Pub,Bar,Park,Martial Arts School
2,Regent Park,Restaurant,Café,Coffee Shop,Indian Restaurant,Pub,Bakery,Japanese Restaurant,Italian Restaurant,Diner
3,Berczy Park,Coffee Shop,Café,Vegetarian / Vegan Restaurant,Cosmetics Shop,Clothing Store,Pizza Place,Japanese Restaurant,Yoga Studio,Bookstore
5,Davisville,Coffee Shop,Restaurant,Thai Restaurant,Pet Store,Indian Restaurant,Electronics Store,Auto Dealership,Grocery Store,Pub
6,Trinity,Coffee Shop,Restaurant,Café,Seafood Restaurant,Italian Restaurant,Japanese Restaurant,Breakfast Spot,Cocktail Bar,Beer Bar
...,...,...,...,...,...,...,...,...,...,...
65,Riverdale,Indian Restaurant,Grocery Store,Café,Platform,Asian Restaurant,Snack Place,Bus Stop,Brewery,Sandwich Place
66,South Hill,Bar,Café,Coffee Shop,Bakery,Restaurant,Cocktail Bar,Korean Restaurant,Italian Restaurant,Breakfast Spot
67,Underground city,Coffee Shop,Café,Bakery,Pizza Place,Bank,Dance Studio,Dessert Shop,Pub,Pilates Studio
68,Little Portugal,Italian Restaurant,Café,Bar,Thai Restaurant,Coffee Shop,Bakery,Mexican Restaurant,Grocery Store,Antique Shop
