# Week 3 project

In [1]:
!pip install folium

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/fd/a0/ccb3094026649cda4acd55bf2c3822bb8c277eb11446d13d384e5be35257/folium-0.10.1-py2.py3-none-any.whl (91kB)
[K     |████████████████████████████████| 92kB 8.2MB/s eta 0:00:011
Collecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/81/6d/31c83485189a2521a75b4130f1fee5364f772a0375f81afff619004e5237/branca-0.4.0-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.4.0 folium-0.10.1


In [124]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import folium
import json
import geopy.distance
import numpy as np
from sklearn.cluster import KMeans

## Get data from website 

In [3]:
response = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

## Parse response data and prepare dataframe

In [4]:
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find_all(class_='wikitable')[0]
data = [[td.get_text().strip() for td in tr.find_all("td")] for tr in table.find_all("tr")]

## Assign data to dataframe and do cleaning

In [5]:
df = pd.DataFrame(data, columns=['PostalCode', 'Borough', 'Neighbourhood'])
df = df[df['Borough'] != 'Not assigned']
df = df[df['Borough'].notnull()]

df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor


## Join neighbourhood to post code and borough

In [6]:
df_joined = df.groupby(['PostalCode', 'Borough'])['Neighbourhood'].apply(lambda x: ', '.join(x)).to_frame().reset_index()
df_joined.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [7]:
df_joined.shape

(103, 3)

## Load geo data 

In [8]:
!wget -q https://cocl.us/Geospatial_data

In [9]:
df_location = pd.read_csv('Geospatial_data')
df_location.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## Add geo data to orignal data frame by using post code

In [10]:
df_combined = df_joined.join(df_location.set_index('Postal Code'), on='PostalCode')
df_combined.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


## Fetch venue data from foursquare

### Select Toronto

In [12]:
df_toronto = df_combined[df_combined['Borough'].str.contains('Toronto')]
df_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [17]:
df_toronto.shape

(39, 5)

### Check distance between postcodes

In [85]:
import geopy.distance

In [103]:
distance_matrix = []

for index1, item1 in df_toronto.iterrows():       
    distance_matrix.append([geopy.distance.distance((item1['Latitude'], item1['Longitude']), (item2['Latitude'], item2['Longitude'])).km for index2, item2 in df_toronto.iterrows()])

df_distance = pd.DataFrame(data=distance_matrix, columns=df_toronto['PostalCode'].to_list())  
df_distance.replace(0, np.nan, inplace=True)

###  Anlyze distance

In [106]:
df_distance

Unnamed: 0,M4E,M4K,M4L,M4M,M4N,M4P,M4R,M4S,M4T,M4V,...,M5X,M6G,M6H,M6J,M6K,M6P,M6R,M6S,M7A,M7Y
0,,4.783559,1.993202,4.291486,9.619156,8.815305,10.063112,8.322152,7.414142,8.701328,...,7.839071,10.473616,12.062312,10.698086,11.753554,13.946929,13.518335,15.683039,7.934935,2.753349
1,4.783559,,3.177377,2.403919,6.139873,4.79485,5.868387,4.034848,2.734048,3.933596,...,4.225191,5.783385,7.357686,6.484299,7.752742,9.295478,9.06127,11.111572,3.567574,3.097046
2,1.993202,3.177377,,2.29975,8.822699,7.734906,8.906815,7.089083,5.91016,7.08163,...,5.846215,8.629165,10.217456,8.723542,9.762329,12.06114,11.56999,13.759062,6.008462,0.846194
3,4.291486,2.403919,2.29975,,8.532692,7.124297,8.110172,6.29857,4.769478,5.62674,...,3.556822,6.678252,8.241071,6.488309,7.477977,9.99189,9.383259,11.61182,3.929919,1.602413
4,9.619156,6.139873,8.822699,8.532692,,1.70032,1.955339,2.632796,4.29566,4.711171,...,8.858646,7.044675,7.846785,9.242365,10.616427,9.589496,10.33484,11.472995,7.302015,9.054092
5,8.815305,4.79485,7.734906,7.124297,1.70032,,1.281433,0.943108,2.636836,3.032251,...,7.175008,5.464122,6.421939,7.586354,8.972707,8.272369,8.869405,10.196286,5.605548,7.841978
6,10.063112,5.868387,8.906815,8.110172,1.955339,1.281433,,1.833697,3.393753,3.250699,...,7.674536,5.272043,5.937171,7.580243,8.912698,7.641361,8.434622,9.51887,6.040353,8.955604
7,8.322152,4.034848,7.089083,6.29857,2.632796,0.943108,1.833697,,1.700517,2.187359,...,6.232436,4.727545,5.829628,6.745172,8.142737,7.749258,8.215747,9.688056,4.669344,7.122522
8,7.414142,2.734048,5.91016,4.769478,4.29566,2.636836,3.393753,1.700517,,1.406302,...,4.572009,3.879378,5.285242,5.488191,6.892903,7.277337,7.42847,9.195643,3.072917,5.793444
9,8.701328,3.933596,7.08163,5.62674,4.711171,3.032251,3.250699,2.187359,1.406302,,...,4.456843,2.609502,3.914871,4.561637,5.956454,5.901752,6.157602,7.830734,2.810844,6.854314


In [108]:
# Show mean distance between postcodes
df_distance.mean()

M4E    8.493136
M4K    4.823965
M4L    6.756491
M4M    5.159432
M4N    7.533915
M4P    6.064550
M4R    6.414585
M4S    5.372972
M4T    4.405812
M4V    4.278318
M4W    3.976880
M4X    3.881895
M4Y    3.509186
M5A    4.225477
M5B    3.554285
M5C    3.753176
M5E    4.137577
M5G    3.481975
M5H    3.664410
M5J    4.273168
M5K    3.830208
M5L    3.794566
M5N    6.364067
M5P    5.125036
M5R    3.876949
M5S    3.619454
M5T    3.780299
M5V    5.303702
M5W    3.992388
M5X    3.759525
M6G    4.434513
M6H    5.451730
M6J    4.648310
M6K    5.615818
M6P    6.875563
M6R    6.559978
M6S    8.440578
M7A    3.465936
M7Y    6.345275
dtype: float64

### Conclusion: Average radius shold be fine with 1000 Meter

In [46]:
# The code was removed by Watson Studio for sharing.

### Fetch data from foursquare

In [109]:
import json
toronto_venues_list = []

for index, item in df_toronto.iterrows():
    print(item['PostalCode'])

    params = dict(
      client_id=CLIENT_ID,
      client_secret=CLIENT_SECRET,
      v=VERSION,
      ll=f"{item['Latitude']},{item['Longitude']}",
      radius=1000,
      limit=100
    )
    results = requests.get(url, params=params).json()
 
    venues = results['response']['groups'][0]['items']

    for venue in venues:

        toronto_venues_list.append(
        (item['PostalCode'], venue['venue']['categories'][0]['name'])
        )

df_toronto_venues = pd.DataFrame(data=toronto_venues_list, columns=['PostalCode', 'Venue Category'])
df_toronto_venues.head()


M4E
M4K
M4L
M4M
M4N
M4P
M4R
M4S
M4T
M4V
M4W
M4X
M4Y
M5A
M5B
M5C
M5E
M5G
M5H
M5J
M5K
M5L
M5N
M5P
M5R
M5S
M5T
M5V
M5W
M5X
M6G
M6H
M6J
M6K
M6P
M6R
M6S
M7A
M7Y


Unnamed: 0,PostalCode,Venue Category
0,M4E,Trail
1,M4E,Vegetarian / Vegan Restaurant
2,M4E,Gastropub
3,M4E,Indie Movie Theater
4,M4E,Bakery


In [110]:
df_toronto_venues.shape

(3184, 2)

In [111]:
df_toronto_venues.groupby('PostalCode').count()

Unnamed: 0_level_0,Venue Category
PostalCode,Unnamed: 1_level_1
M4E,76
M4K,100
M4L,81
M4M,100
M4N,8
M4P,100
M4R,44
M4S,100
M4T,62
M4V,79


### Find unique categories

In [112]:
len(df_toronto_venues['Venue Category'].unique())

272

### Analyze by Postal code

In [113]:
# one hot encoding
torento_onehot = pd.get_dummies(df_toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add postal code
torento_onehot['PostalCode'] = df_toronto_venues['PostalCode']

# move column position
fixed_columns = [torento_onehot.columns[-1]] + list(torento_onehot.columns[:-1])
torento_onehot = torento_onehot[fixed_columns]

torento_onehot.head()

Unnamed: 0,PostalCode,Accessories Store,Afghan Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Yoga Studio,Zoo
0,M4E,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M4E,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,M4E,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M4E,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M4E,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [114]:
torento_onehot.shape

(3184, 273)

### Clean data with mean value

In [115]:
toronto_grouped = torento_onehot.groupby('PostalCode').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,PostalCode,Accessories Store,Afghan Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Yoga Studio,Zoo
0,M4E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.013158,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M4K,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0
2,M4L,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.0
3,M4M,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.01,0.0,...,0.01,0.0,0.0,0.04,0.0,0.01,0.0,0.0,0.01,0.0
4,M4N,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [116]:
toronto_grouped.shape

(39, 273)

## Assign top venue catories to original data

In [119]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [122]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['PostalCode']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['PostalCode'] = toronto_grouped['PostalCode']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,PostalCode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,Pub,Coffee Shop,Pizza Place,Park,Japanese Restaurant,Breakfast Spot,Beach,Bakery,Caribbean Restaurant,Sandwich Place
1,M4K,Greek Restaurant,Coffee Shop,Café,Pub,Pizza Place,Fast Food Restaurant,Ice Cream Shop,Italian Restaurant,Bakery,Diner
2,M4L,Indian Restaurant,Grocery Store,Coffee Shop,Beach,Café,Gym,Sandwich Place,Brewery,Intersection,Burrito Place
3,M4M,Coffee Shop,Bar,Café,Bakery,American Restaurant,Brewery,Italian Restaurant,Vietnamese Restaurant,French Restaurant,Diner
4,M4N,College Gym,Café,Park,Bookstore,Trail,Gym / Fitness Center,College Quad,Coffee Shop,Eastern European Restaurant,Electronics Store


# Use K-Mean to cluster data

In [127]:
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('PostalCode', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([1, 1, 1, 1, 2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       3, 0, 1, 1, 1, 4, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0], dtype=int32)

Join data

In [128]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df_toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('PostalCode'), on='PostalCode')

toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,M4E,East Toronto,The Beaches,43.676357,-79.293031,1,Pub,Coffee Shop,Pizza Place,Park,Japanese Restaurant,Breakfast Spot,Beach,Bakery,Caribbean Restaurant,Sandwich Place
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,1,Greek Restaurant,Coffee Shop,Café,Pub,Pizza Place,Fast Food Restaurant,Ice Cream Shop,Italian Restaurant,Bakery,Diner
42,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572,1,Indian Restaurant,Grocery Store,Coffee Shop,Beach,Café,Gym,Sandwich Place,Brewery,Intersection,Burrito Place
43,M4M,East Toronto,Studio District,43.659526,-79.340923,1,Coffee Shop,Bar,Café,Bakery,American Restaurant,Brewery,Italian Restaurant,Vietnamese Restaurant,French Restaurant,Diner
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,2,College Gym,Café,Park,Bookstore,Trail,Gym / Fitness Center,College Quad,Coffee Shop,Eastern European Restaurant,Electronics Store


## Create map

In [132]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [134]:
# create map
map_clusters = folium.Map(location=['43.6532', '-79.3832'], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['PostalCode'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Exam Clusters

Cluster 1

In [145]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[0, 1] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,PostalCode,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
45,M4P,Central Toronto,Coffee Shop,Italian Restaurant,Café,Pizza Place,Dessert Shop,Fast Food Restaurant,Gym,Sushi Restaurant,Pub,Restaurant
46,M4R,Central Toronto,Coffee Shop,Park,Italian Restaurant,Mexican Restaurant,Diner,Sporting Goods Shop,Skating Rink,Café,Chinese Restaurant,Restaurant
47,M4S,Central Toronto,Coffee Shop,Italian Restaurant,Sushi Restaurant,Café,Dessert Shop,Pizza Place,Gym,Middle Eastern Restaurant,Restaurant,Pub
48,M4T,Central Toronto,Coffee Shop,Italian Restaurant,Grocery Store,Park,Gym,Restaurant,Sushi Restaurant,Thai Restaurant,Bagel Shop,Pizza Place
49,M4V,Central Toronto,Coffee Shop,Sushi Restaurant,Park,Thai Restaurant,Italian Restaurant,Restaurant,Café,Liquor Store,Gym,Pub
50,M4W,Downtown Toronto,Coffee Shop,Grocery Store,Park,Hostel,Athletics & Sports,Juice Bar,Office,Japanese Restaurant,Filipino Restaurant,Bank
53,M5A,Downtown Toronto,Coffee Shop,Café,Theater,Park,Diner,Pub,Breakfast Spot,Bakery,Restaurant,Gastropub
64,M5P,Central Toronto,Park,Coffee Shop,Café,Liquor Store,Burger Joint,Bank,Bakery,Japanese Restaurant,Sushi Restaurant,Trail
76,M6H,West Toronto,Café,Park,Coffee Shop,Sushi Restaurant,Bar,Italian Restaurant,Portuguese Restaurant,Bakery,Restaurant,Brewery
84,M6S,West Toronto,Coffee Shop,Café,Pizza Place,Bakery,Italian Restaurant,Sushi Restaurant,Pub,Park,Falafel Restaurant,Gastropub


Cluster 2

In [146]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[0, 1] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,PostalCode,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,M4E,East Toronto,Pub,Coffee Shop,Pizza Place,Park,Japanese Restaurant,Breakfast Spot,Beach,Bakery,Caribbean Restaurant,Sandwich Place
41,M4K,East Toronto,Greek Restaurant,Coffee Shop,Café,Pub,Pizza Place,Fast Food Restaurant,Ice Cream Shop,Italian Restaurant,Bakery,Diner
42,M4L,East Toronto,Indian Restaurant,Grocery Store,Coffee Shop,Beach,Café,Gym,Sandwich Place,Brewery,Intersection,Burrito Place
43,M4M,East Toronto,Coffee Shop,Bar,Café,Bakery,American Restaurant,Brewery,Italian Restaurant,Vietnamese Restaurant,French Restaurant,Diner
51,M4X,Downtown Toronto,Diner,Park,Gastropub,Restaurant,Café,Japanese Restaurant,Taiwanese Restaurant,Thai Restaurant,Pub,Italian Restaurant
52,M4Y,Downtown Toronto,Coffee Shop,Japanese Restaurant,Burger Joint,Restaurant,Café,Park,Hotel,Bubble Tea Shop,Sandwich Place,Sushi Restaurant
54,M5B,Downtown Toronto,Coffee Shop,Clothing Store,Restaurant,Middle Eastern Restaurant,Italian Restaurant,Diner,Japanese Restaurant,Electronics Store,Sushi Restaurant,Bookstore
55,M5C,Downtown Toronto,Coffee Shop,Café,Restaurant,Hotel,Bakery,Breakfast Spot,Italian Restaurant,Gym,Seafood Restaurant,Cosmetics Shop
56,M5E,Downtown Toronto,Coffee Shop,Café,Hotel,Restaurant,Japanese Restaurant,Beer Bar,Bakery,Breakfast Spot,Pub,BBQ Joint
57,M5G,Downtown Toronto,Coffee Shop,Japanese Restaurant,Park,Italian Restaurant,Clothing Store,Art Gallery,Ramen Restaurant,Theater,Thai Restaurant,Plaza


Cluster 3

In [147]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[0, 1] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,PostalCode,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
44,M4N,Central Toronto,College Gym,Café,Park,Bookstore,Trail,Gym / Fitness Center,College Quad,Coffee Shop,Eastern European Restaurant,Electronics Store


Cluster 4

In [148]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[0, 1] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,PostalCode,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
63,M5N,Central Toronto,Sushi Restaurant,Spa,Pharmacy,Italian Restaurant,Bank,Coffee Shop,Japanese Restaurant,Asian Restaurant,Gastropub,Bakery


Cluster 5

In [149]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[0, 1] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,PostalCode,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
68,M5V,Downtown Toronto,Harbor / Marina,Coffee Shop,Scenic Lookout,Track,Dog Run,Airport,Airport Lounge,Sculpture Garden,Dance Studio,Café
