Section 1: Web Scraping

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis

print('Libraries imported.')

Libraries imported.


<h2>Get the tables and read into dataframe</h2>

In [2]:
link = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

tables = pd.read_html(link, header=0)
df=pd.DataFrame(tables[0])
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


<h2>Ignore cells with a borough that is Not assigned:</h2>

In [3]:
df.drop(df[df['Borough']=="Not assigned"].index,axis=0, inplace=True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


<h2>Combine rows with the same Postal Code:</h2>

In [4]:
df_pc=df.groupby("Postal Code", as_index=False).agg(lambda neighbourhood:','.join(set(neighbourhood)))
df_pc.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


<h2>If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.</h2>

In [5]:
df_pc.loc[df_pc['Neighbourhood'] == 'Not assigned', 'Neighbourhood'] = ...
df_pc.loc[df_pc['Neighbourhood'] == 'Not assigned', 'Borough']
df_pc.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [6]:
df_pc.shape

(103, 3)

<h2>Adding Latitude and Longitude columns</h2>

In [7]:
df_pc['Latitude'] = np.nan
df_pc['Longitude'] = np.nan    

In [8]:
import geocoder

coordinates_df = pd.read_csv('http://cocl.us/Geospatial_data')
coordinates_df.head()
for index, row in df_pc.iterrows():
    postal_code = row['Postal Code']
    df_pc.loc[df_pc['Postal Code'] == postal_code, 'Latitude'] = coordinates_df.loc[coordinates_df['Postal Code'] == postal_code, 'Latitude']
    df_pc.loc[df_pc['Postal Code'] == postal_code, 'Longitude'] = coordinates_df.loc[coordinates_df['Postal Code'] == postal_code, 'Longitude']
df_pc

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


<h2> Mapping Neighbourhoods in Toronto </h2>

In [9]:
from geopy.geocoders import Nominatim

address = 'Toronto'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [10]:
import folium
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(df_pc['Latitude'], df_pc['Longitude'], df_pc['Borough'], df_pc['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

<h2> Mapping Neighbourhoods in North York borough </h2>

In [11]:
north_york_data = df_pc[df_pc['Borough'] == "North York"].reset_index(drop=True)
north_york_data.shape

(24, 5)

In [12]:
address = 'North York, Toronto'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of North York are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of North York are 43.7543263, -79.44911696639593.


In [13]:
# create map of Manhattan using latitude and longitude values
map_north_york = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(north_york_data['Latitude'], north_york_data['Longitude'], north_york_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_north_york)  
    
map_north_york

<h2>Using the FoursquareAPI</h2>

In [14]:
CLIENT_ID = 'GXVOFCDKFJLQYEHM11FA1MBHGT2MVV0OXNYUB5KMC0QS2XZV'
CLIENT_SECRET = 'private' # your Foursquare Secret
VERSION = '20200708' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: GXVOFCDKFJLQYEHM11FA1MBHGT2MVV0OXNYUB5KMC0QS2XZV
CLIENT_SECRET:XKNWJEXJCNMZXPPUA2OFQ2KEVWLQ4BR0D0PKJOPLH3LQFBHX


Get the Neighbourhood's name and coordinates:

In [15]:
north_york_data.loc[0, 'Neighbourhood']
neighbourhood_latitude = north_york_data.loc[0, 'Latitude'] # neighborhood latitude value
neighbourhood_longitude = north_york_data.loc[0, 'Longitude'] # neighborhood longitude value

neighbourhood_name = north_york_data.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name, 
                                                               neighbourhood_latitude, 
                                                               neighbourhood_longitude))

Latitude and longitude values of Hillcrest Village are 43.8037622, -79.3634517.


<h2>Now, lets get the top 100 venues that are in Hillcrest Village within a radius of 1000m.</h2>

In [16]:
import requests

LIMIT = 100
radius = 1000
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, neighbourhood_latitude, neighbourhood_longitude, VERSION, radius, LIMIT)
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5f2d43f9f7e86f77937f094d'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 21,
  'suggestedBounds': {'ne': {'lat': 43.81276220900001,
    'lng': -79.35100467075661},
   'sw': {'lat': 43.79476219099999, 'lng': -79.37589872924339}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bd9842be914a593adbd56fa',
       'name': 'Tastee',
       'location': {'address': '3913 Don Mills Rd.',
        'crossStreet': 'at Cliffwood Rd.',
        'lat': 43.80772211146167,
        'lng': -79.35679781099806,
        'labeledLatLngs': [{'label': 'display',
      

In [17]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

<h2>Clean the JSON and load into a pandas dataframe:</h2>

In [18]:
from pandas.io.json import json_normalize
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.shape

(21, 4)

<h2> Let's do the same for all neighbourhoods in North York </h2>

In [19]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [20]:
north_york_venues = getNearbyVenues(names=north_york_data['Neighbourhood'],
                                   latitudes=north_york_data['Latitude'],
                                   longitudes=north_york_data['Longitude']
                                  )

Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
York Mills, Silver Hills
Willowdale, Newtonbrook
Willowdale, Willowdale East
York Mills West
Willowdale, Willowdale West
Parkwoods
Don Mills
Don Mills
Bathurst Manor, Wilson Heights, Downsview North
Northwood Park, York University
Downsview
Downsview
Downsview
Downsview
Victoria Village
Bedford Park, Lawrence Manor East
Lawrence Manor, Lawrence Heights
Glencairn
North Park, Maple Leaf Park, Upwood Park
Humber Summit
Humberlea, Emery


In [21]:
north_york_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor, Wilson Heights, Downsview North",31,31,31,31,31,31
Bayview Village,15,15,15,15,15,15
"Bedford Park, Lawrence Manor East",44,44,44,44,44,44
Don Mills,74,74,74,74,74,74
Downsview,68,68,68,68,68,68
"Fairview, Henry Farm, Oriole",44,44,44,44,44,44
Glencairn,37,37,37,37,37,37
Hillcrest Village,21,21,21,21,21,21
Humber Summit,10,10,10,10,10,10
"Humberlea, Emery",10,10,10,10,10,10


In [22]:
print('There are {} uniques categories.'.format(len(north_york_venues['Venue Category'].unique())))

There are 156 uniques categories.


<h1> Analyse each neighbourhood </h1>

In [23]:
# one hot encoding
north_york_onehot = pd.get_dummies(north_york_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
north_york_onehot['Neighbourhood'] = north_york_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [north_york_onehot.columns[-1]] + list(north_york_onehot.columns[:-1])
north_york_onehot = north_york_onehot[fixed_columns]

north_york_onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,Automotive Shop,...,Theater,Toy / Game Store,Trail,Train Station,Turkish Restaurant,Video Game Store,Vietnamese Restaurant,Wings Joint,Women's Store,Yoga Studio
0,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
north_york_onehot.shape

(639, 157)

In [25]:
north_york_grouped = north_york_onehot.groupby('Neighbourhood').mean().reset_index()
north_york_grouped.head()

Unnamed: 0,Neighbourhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,Automotive Shop,...,Theater,Toy / Game Store,Trail,Train Station,Turkish Restaurant,Video Game Store,Vietnamese Restaurant,Wings Joint,Women's Store,Yoga Studio
0,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park, Lawrence Manor East",0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0
3,Don Mills,0.0,0.0,0.013514,0.013514,0.0,0.027027,0.013514,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.0,0.013514,0.0
4,Downsview,0.0,0.014706,0.014706,0.0,0.0,0.0,0.029412,0.0,0.0,...,0.0,0.0,0.0,0.0,0.029412,0.0,0.073529,0.0,0.0,0.0


In [26]:
north_york_grouped.shape

(20, 157)

In [27]:
num_top_venues = 5

for hood in north_york_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = north_york_grouped[north_york_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bathurst Manor, Wilson Heights, Downsview North----
                 venue  freq
0                 Bank  0.06
1    Convenience Store  0.06
2          Coffee Shop  0.06
3       Ice Cream Shop  0.03
4  Fried Chicken Joint  0.03


----Bayview Village----
                 venue  freq
0                 Bank  0.13
1  Japanese Restaurant  0.13
2        Grocery Store  0.13
3          Gas Station  0.13
4         Skating Rink  0.07


----Bedford Park, Lawrence Manor East----
                  venue  freq
0    Italian Restaurant  0.09
1           Coffee Shop  0.07
2  Fast Food Restaurant  0.05
3        Sandwich Place  0.05
4            Restaurant  0.05


----Don Mills----
                 venue  freq
0          Coffee Shop  0.07
1  Japanese Restaurant  0.07
2           Restaurant  0.07
3                  Gym  0.05
4                 Café  0.04


----Downsview----
                   venue  freq
0  Vietnamese Restaurant  0.07
1            Coffee Shop  0.07
2            Pizza Place  0.06
3       

<h2> Lets put that in a dataframe </h2>

In [28]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [29]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = north_york_grouped['Neighbourhood']

for ind in np.arange(north_york_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(north_york_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Coffee Shop,Convenience Store,Pharmacy,Mobile Phone Shop,Sandwich Place,Bridal Shop,Restaurant,Pizza Place,Pet Store
1,Bayview Village,Grocery Store,Gas Station,Bank,Japanese Restaurant,Park,Dog Run,Chinese Restaurant,Trail,Café,Skating Rink
2,"Bedford Park, Lawrence Manor East",Italian Restaurant,Coffee Shop,Bank,Fast Food Restaurant,Pizza Place,Restaurant,Sandwich Place,Pet Store,Breakfast Spot,Skating Rink
3,Don Mills,Restaurant,Coffee Shop,Japanese Restaurant,Gym,Burger Joint,Bank,Café,Pizza Place,Supermarket,Asian Restaurant
4,Downsview,Vietnamese Restaurant,Coffee Shop,Pizza Place,Hotel,Park,Gas Station,Grocery Store,Chinese Restaurant,Fast Food Restaurant,Liquor Store


<h1> Cluster Neighbourhoods </h1>

In [30]:
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 5

north_york_grouped_clustering = north_york_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(north_york_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 3, 3, 3, 3, 3, 3, 0, 4, 2])

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [31]:
# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

north_york_merged = north_york_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
north_york_merged = north_york_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

north_york_merged.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M2H,North York,Hillcrest Village,43.803762,-79.363452,0,Coffee Shop,Park,Pharmacy,Ice Cream Shop,Convenience Store,Chinese Restaurant,Recreation Center,Residential Building (Apartment / Condo),Restaurant,Sandwich Place
1,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556,3,Coffee Shop,Clothing Store,Restaurant,Juice Bar,Bank,Bakery,Japanese Restaurant,Sandwich Place,Fast Food Restaurant,Electronics Store
2,M2K,North York,Bayview Village,43.786947,-79.385975,3,Grocery Store,Gas Station,Bank,Japanese Restaurant,Park,Dog Run,Chinese Restaurant,Trail,Café,Skating Rink
3,M2L,North York,"York Mills, Silver Hills",43.75749,-79.374714,1,Park,Pool,Dessert Shop,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Dog Run,Discount Store,Diner,Dim Sum Restaurant
4,M2M,North York,"Willowdale, Newtonbrook",43.789053,-79.408493,3,Korean Restaurant,Café,Pizza Place,Park,Bus Station,Coffee Shop,Middle Eastern Restaurant,Bank,Shopping Mall,Diner


In [32]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(north_york_merged['Latitude'], north_york_merged['Longitude'], north_york_merged['Neighbourhood'], north_york_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<h2>Examine Each Cluster</h2>

In [33]:
north_york_merged.loc[north_york_merged['Cluster Labels'] == 0, north_york_merged.columns[[1] + list(range(5, north_york_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,0,Coffee Shop,Park,Pharmacy,Ice Cream Shop,Convenience Store,Chinese Restaurant,Recreation Center,Residential Building (Apartment / Condo),Restaurant,Sandwich Place
7,North York,0,Pharmacy,Bank,Convenience Store,Coffee Shop,Park,Pizza Place,Eastern European Restaurant,Bus Line,Bakery,Dumpling Restaurant
8,North York,0,Park,Bus Stop,Pharmacy,Convenience Store,Shopping Mall,Chinese Restaurant,Road,Café,Caribbean Restaurant,Pizza Place
11,North York,0,Bank,Coffee Shop,Convenience Store,Pharmacy,Mobile Phone Shop,Sandwich Place,Bridal Shop,Restaurant,Pizza Place,Pet Store
21,North York,0,Coffee Shop,Convenience Store,Athletics & Sports,Pizza Place,Dim Sum Restaurant,Bakery,Chinese Restaurant,Mediterranean Restaurant,Gas Station,Park


In [34]:
north_york_merged.loc[north_york_merged['Cluster Labels'] == 1, north_york_merged.columns[[1] + list(range(5, north_york_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,North York,1,Park,Pool,Dessert Shop,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Dog Run,Discount Store,Diner,Dim Sum Restaurant


In [35]:
north_york_merged.loc[north_york_merged['Cluster Labels'] == 2, north_york_merged.columns[[1] + list(range(5, north_york_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
23,North York,2,Convenience Store,Auto Workshop,Discount Store,Business Service,Storage Facility,Bakery,Intersection,Gas Station,Golf Course,Park


In [36]:
north_york_merged.loc[north_york_merged['Cluster Labels'] == 3, north_york_merged.columns[[1] + list(range(5, north_york_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,3,Coffee Shop,Clothing Store,Restaurant,Juice Bar,Bank,Bakery,Japanese Restaurant,Sandwich Place,Fast Food Restaurant,Electronics Store
2,North York,3,Grocery Store,Gas Station,Bank,Japanese Restaurant,Park,Dog Run,Chinese Restaurant,Trail,Café,Skating Rink
4,North York,3,Korean Restaurant,Café,Pizza Place,Park,Bus Station,Coffee Shop,Middle Eastern Restaurant,Bank,Shopping Mall,Diner
5,North York,3,Coffee Shop,Bubble Tea Shop,Ramen Restaurant,Pizza Place,Japanese Restaurant,Korean Restaurant,Sandwich Place,Restaurant,Sushi Restaurant,Fast Food Restaurant
6,North York,3,Park,Restaurant,Coffee Shop,Bowling Alley,Grocery Store,Golf Course,Gas Station,French Restaurant,Intersection,Dog Run
9,North York,3,Restaurant,Coffee Shop,Japanese Restaurant,Gym,Burger Joint,Bank,Café,Pizza Place,Supermarket,Asian Restaurant
10,North York,3,Restaurant,Coffee Shop,Japanese Restaurant,Gym,Burger Joint,Bank,Café,Pizza Place,Supermarket,Asian Restaurant
12,North York,3,Coffee Shop,Furniture / Home Store,Pizza Place,Caribbean Restaurant,Sushi Restaurant,Sports Bar,Middle Eastern Restaurant,Fast Food Restaurant,Bar,Bank
13,North York,3,Vietnamese Restaurant,Coffee Shop,Pizza Place,Hotel,Park,Gas Station,Grocery Store,Chinese Restaurant,Fast Food Restaurant,Liquor Store
14,North York,3,Vietnamese Restaurant,Coffee Shop,Pizza Place,Hotel,Park,Gas Station,Grocery Store,Chinese Restaurant,Fast Food Restaurant,Liquor Store


In [37]:
north_york_merged.loc[north_york_merged['Cluster Labels'] == 4, north_york_merged.columns[[1] + list(range(5, north_york_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,North York,4,Electronics Store,Pharmacy,Pizza Place,Park,Shopping Mall,Optical Shop,Italian Restaurant,Bakery,Bank,Dim Sum Restaurant
