In [1]:
#import standard libraries
import pandas as pd
import numpy as np

#import urllib and beautifulsoup for webcrawler
import urllib.request
from bs4 import BeautifulSoup

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Libraries imported.


In [2]:
#define the website to scrape 
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
page = urllib.request.urlopen(url)

In [3]:
#load the html
soup = BeautifulSoup(page, "lxml")

#locate the table we want to retrieve 
tables = soup.find_all("table")
table = soup.find('table', class_ = 'wikitable sortable')

In [4]:
#define empty arrays
A = []
B = []
C = []

#loop through table and store columns in arrays
for row in table.findAll('tr'):
    cells = row.findAll('td')
    if len(cells) == 3:
        A.append(cells[0].find(text=True))
        B.append(cells[1].find(text=True))
        C.append(cells[2].find(text=True))

#create dataframe and transfer data        
df = pd.DataFrame(A,columns=['Postal Code'])
df['Borough'] = B
df['Neighborhood'] = C

In [5]:
#drop rows with columns 'not assigned'
missing = df[df['Borough']=='Not assigned\n'].index
missing
df.drop(missing, inplace = True)

In [12]:
#reset the indices 
df.reset_index(drop=True,inplace = True)
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A\n,North York\n,Parkwoods\n
1,M4A\n,North York\n,Victoria Village\n
2,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"
3,M6A\n,North York\n,"Lawrence Manor, Lawrence Heights\n"
4,M7A\n,Downtown Toronto\n,"Queen's Park, Ontario Provincial Government\n"
...,...,...,...
98,M8X\n,Etobicoke\n,"The Kingsway, Montgomery Road, Old Mill North\n"
99,M4Y\n,Downtown Toronto\n,Church and Wellesley\n
100,M7Y\n,East Toronto\n,"Business reply mail Processing Centre, South C..."
101,M8Y\n,Etobicoke\n,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [6]:
#check for the unique number of postal codes 
len(df['Postal Code'].unique())

103

In [7]:
#check the shape of the dataframe to see if there are any duplicate area codes
df.shape

(103, 3)

In [8]:
#load in the neighborhood coordinate data
coords = pd.read_csv('http://cocl.us/Geospatial_data')
coords.shape

(103, 3)

In [9]:
df['Postal Code'] = df['Postal Code'].str.strip()
coords['Postal Code'] = coords['Postal Code'].str.strip()
mdf = pd.merge(df,coords, on='Postal Code')
mdf

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York\n,Parkwoods\n,43.753259,-79.329656
1,M4A,North York\n,Victoria Village\n,43.725882,-79.315572
2,M5A,Downtown Toronto\n,"Regent Park, Harbourfront\n",43.654260,-79.360636
3,M6A,North York\n,"Lawrence Manor, Lawrence Heights\n",43.718518,-79.464763
4,M7A,Downtown Toronto\n,"Queen's Park, Ontario Provincial Government\n",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke\n,"The Kingsway, Montgomery Road, Old Mill North\n",43.653654,-79.506944
99,M4Y,Downtown Toronto\n,Church and Wellesley\n,43.665860,-79.383160
100,M7Y,East Toronto\n,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke\n,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


In [21]:
#get the latitude and longitude values for toronto
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [25]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(mdf['Latitude'], mdf['Longitude'], mdf['Borough'], mdf['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [26]:
#initialize credentials for Foursquare API
CLIENT_ID = '0FJGT5DZROG0WMMK3SOGFBSFJDFR3KFYSBKSBWKWRM3YDVLZ' #Foursquare ID
CLIENT_SECRET = '0T4C2N200OQD1UGPME0EVBA1BVTNEPDOHWB41YH33WTVQVGL' #Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [28]:
#define a function to get the name, category, and location of 100 venues within 500 meters of each neighborhood
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT = 100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [29]:
#get venue data for venues in Toronto 
toronto_venues = getNearbyVenues(names=mdf['Neighborhood'],
                                   latitudes=mdf['Latitude'],
                                   longitudes=mdf['Longitude']
                                  )

Parkwoods

Victoria Village

Regent Park, Harbourfront

Lawrence Manor, Lawrence Heights

Queen's Park, Ontario Provincial Government

Islington Avenue, Humber Valley Village

Malvern, Rouge

Don Mills

Parkview Hill, Woodbine Gardens

Garden District, Ryerson

Glencairn

West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale

Rouge Hill, Port Union, Highland Creek

Don Mills

Woodbine Heights

St. James Town

Humewood-Cedarvale

Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood

Guildwood, Morningside, West Hill

The Beaches

Berczy Park

Caledonia-Fairbanks

Woburn

Leaside

Central Bay Street

Christie

Cedarbrae

Hillcrest Village

Bathurst Manor, Wilson Heights, Downsview North

Thorncliffe Park

Richmond, Adelaide, King

Dufferin, Dovercourt Village

Scarborough Village

Fairview, Henry Farm, Oriole

Northwood Park, York University

East Toronto, Broadview North (Old East York)

Harbourfront East, Union Station, Toronto Islands

Little Portugal, Trinit

In [30]:
#inspect dataframe to ensure data was properly loaded
print(toronto_venues.shape)
toronto_venues.head()

(2130, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods\n,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods\n,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Parkwoods\n,43.753259,-79.329656,Corrosion Service Company Limited,43.752432,-79.334661,Construction & Landscaping
3,Victoria Village\n,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village\n,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant


In [31]:
#check the number of venues returned for each neighborhood in Toronto
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt\n,5,5,5,5,5,5
"Alderwood, Long Branch\n",8,8,8,8,8,8
"Bathurst Manor, Wilson Heights, Downsview North\n",20,20,20,20,20,20
Bayview Village\n,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East\n",23,23,23,23,23,23
...,...,...,...,...,...,...
"Willowdale, Willowdale West\n",6,6,6,6,6,6
Woburn\n,3,3,3,3,3,3
Woodbine Heights\n,6,6,6,6,6,6
York Mills West\n,2,2,2,2,2,2


In [32]:
#check the number of different venue catergories in Toronto 
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 272 uniques categories.


In [33]:
#Find the number of each venue 
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe and more neighborhood to the first column
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

#visualize the dataframe
toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Now lets group the venues by finding the mean frequency of venues in each neighborhood

In [34]:
#Taking the mean frequency occurance of each neighborhood
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,Agincourt\n,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch\n",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North\n",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village\n,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East\n",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90,"Willowdale, Willowdale West\n",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
91,Woburn\n,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
92,Woodbine Heights\n,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
93,York Mills West\n,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [35]:
#find the top 5 most prevalent venues for each neighborhood
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt
----
                       venue  freq
0                     Lounge   0.2
1  Latin American Restaurant   0.2
2             Clothing Store   0.2
3             Breakfast Spot   0.2
4               Skating Rink   0.2


----Alderwood, Long Branch
----
          venue  freq
0   Pizza Place  0.25
1  Skating Rink  0.12
2          Pool  0.12
3           Gym  0.12
4   Coffee Shop  0.12


----Bathurst Manor, Wilson Heights, Downsview North
----
               venue  freq
0               Bank  0.10
1        Coffee Shop  0.10
2  Mobile Phone Shop  0.05
3     Sandwich Place  0.05
4     Ice Cream Shop  0.05


----Bayview Village
----
                 venue  freq
0   Chinese Restaurant  0.25
1                 Café  0.25
2                 Bank  0.25
3  Japanese Restaurant  0.25
4          Yoga Studio  0.00


----Bedford Park, Lawrence Manor East
----
                venue  freq
0  Italian Restaurant  0.09
1         Coffee Shop  0.09
2          Restaurant  0.09
3      Sandwich Place  0.0

Define a funtion to return top venues in descending order

In [36]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Create dataframe to display the top 10 venues for each neightborhood

In [37]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt\n,Latin American Restaurant,Clothing Store,Lounge,Breakfast Spot,Skating Rink,Colombian Restaurant,Comfort Food Restaurant,Eastern European Restaurant,Dumpling Restaurant,Drugstore
1,"Alderwood, Long Branch\n",Pizza Place,Skating Rink,Coffee Shop,Gym,Pub,Pool,Sandwich Place,Dim Sum Restaurant,Dance Studio,Deli / Bodega
2,"Bathurst Manor, Wilson Heights, Downsview North\n",Coffee Shop,Bank,Sushi Restaurant,Mobile Phone Shop,Pharmacy,Pizza Place,Middle Eastern Restaurant,Deli / Bodega,Restaurant,Bridal Shop
3,Bayview Village\n,Café,Bank,Japanese Restaurant,Chinese Restaurant,Women's Store,Distribution Center,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
4,"Bedford Park, Lawrence Manor East\n",Coffee Shop,Sandwich Place,Italian Restaurant,Restaurant,Grocery Store,Pharmacy,Juice Bar,Liquor Store,Indian Restaurant,Pub


Now let's cluster the neighborhoods based on top venue data. We will use k_means clustering

In [57]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

Create a new dataframe that includes the cluster as well as the top 10 venues

In [43]:
# add clustering labels
#neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = mdf

#merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York\n,Parkwoods\n,43.753259,-79.329656,1.0,Park,Food & Drink Shop,Construction & Landscaping,Diner,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Discount Store,Curling Ice
1,M4A,North York\n,Victoria Village\n,43.725882,-79.315572,1.0,Coffee Shop,Portuguese Restaurant,Hockey Arena,Pizza Place,Intersection,Dim Sum Restaurant,Dance Studio,Deli / Bodega,Department Store,Dessert Shop
2,M5A,Downtown Toronto\n,"Regent Park, Harbourfront\n",43.65426,-79.360636,1.0,Coffee Shop,Park,Pub,Bakery,Café,Restaurant,Theater,Breakfast Spot,Yoga Studio,Shoe Store
3,M6A,North York\n,"Lawrence Manor, Lawrence Heights\n",43.718518,-79.464763,1.0,Clothing Store,Accessories Store,Furniture / Home Store,Women's Store,Arts & Crafts Store,Event Space,Miscellaneous Shop,Boutique,Athletics & Sports,Coffee Shop
4,M7A,Downtown Toronto\n,"Queen's Park, Ontario Provincial Government\n",43.662301,-79.389494,1.0,Coffee Shop,Diner,Sushi Restaurant,Distribution Center,Bar,Smoothie Shop,Beer Bar,Italian Restaurant,Sculpture Garden,Sandwich Place


Lets visualize the clusters

In [53]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        #color=rainbow[int(cluster-1)],
        fill=True,
        #fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Let's examine what makes each cluster different from the other

In [58]:
#Cluster number 1
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
51,Scarborough\n,0.0,Motel,American Restaurant,Women's Store,Curling Ice,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store


In [59]:
#Cluster number 2
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York\n,1.0,Park,Food & Drink Shop,Construction & Landscaping,Diner,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Discount Store,Curling Ice
1,North York\n,1.0,Coffee Shop,Portuguese Restaurant,Hockey Arena,Pizza Place,Intersection,Dim Sum Restaurant,Dance Studio,Deli / Bodega,Department Store,Dessert Shop
2,Downtown Toronto\n,1.0,Coffee Shop,Park,Pub,Bakery,Café,Restaurant,Theater,Breakfast Spot,Yoga Studio,Shoe Store
3,North York\n,1.0,Clothing Store,Accessories Store,Furniture / Home Store,Women's Store,Arts & Crafts Store,Event Space,Miscellaneous Shop,Boutique,Athletics & Sports,Coffee Shop
4,Downtown Toronto\n,1.0,Coffee Shop,Diner,Sushi Restaurant,Distribution Center,Bar,Smoothie Shop,Beer Bar,Italian Restaurant,Sculpture Garden,Sandwich Place
...,...,...,...,...,...,...,...,...,...,...,...,...
97,Downtown Toronto\n,1.0,Coffee Shop,Café,Restaurant,Hotel,Gym,Salad Place,American Restaurant,Steakhouse,Asian Restaurant,Japanese Restaurant
98,Etobicoke\n,1.0,River,Pool,Women's Store,Dim Sum Restaurant,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Cupcake Shop
99,Downtown Toronto\n,1.0,Sushi Restaurant,Coffee Shop,Japanese Restaurant,Gay Bar,Restaurant,Burger Joint,Hotel,Mediterranean Restaurant,Men's Store,Yoga Studio
100,East Toronto\n,1.0,Skate Park,Pizza Place,Brewery,Restaurant,Spa,Farmers Market,Fast Food Restaurant,Burrito Place,Butcher,Auto Workshop


In [60]:
#Cluster number 3
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
45,North York\n,2.0,Cafeteria,Women's Store,Discount Store,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Distribution Center,Curling Ice


In [61]:
#Cluster number 4
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,York\n,3.0,Park,Women's Store,Bakery,Discount Store,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Distribution Center
35,East York\n,3.0,Park,Convenience Store,Discount Store,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Distribution Center,Curling Ice
64,York\n,3.0,Park,Discount Store,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Distribution Center,College Arts Building
66,North York\n,3.0,Park,Convenience Store,Discount Store,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Distribution Center,Curling Ice
83,Central Toronto\n,3.0,Playground,Park,Restaurant,Drugstore,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store,Cupcake Shop
85,Scarborough\n,3.0,Park,Playground,Women's Store,Diner,Curling Ice,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant
91,Downtown Toronto\n,3.0,Park,Trail,Playground,Curling Ice,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner


In [62]:
#Cluster number 5
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
57,North York\n,4.0,Baseball Field,Distribution Center,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Women's Store,Ethiopian Restaurant
101,Etobicoke\n,4.0,Baseball Field,Distribution Center,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Women's Store,Ethiopian Restaurant
