Libraries necessary to extract data from the wiki page

In [1]:
import numpy as np
import pandas as pd 

import requests
from bs4 import BeautifulSoup
import json

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

Read wikipage to create dataframe with Toronto geospacial data.


In [2]:
def prepare_toronto_dataframe():
    res = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
    soup = BeautifulSoup(res.content,'xml')
    table = soup.find_all('table')[0] 
    df = pd.read_html(str(table))[0]
    # Remove rows where borough is "Not assigned"
    df = df[df['Borough']!='Not assigned']
    # strange recent name change on wikipage
    if "Neighbourhood" in df.columns:
        df.rename(columns={"Neighbourhood": "Neighborhood"}, inplace=True)
    # Combine Neighborhood values if postcode and borough are the same.
    df['Neighborhood'] = df.groupby(['Postcode','Borough'])['Neighborhood'].transform(lambda x: ','.join(x))
    df = df.drop_duplicates()
    # In case when Neighborhood is "Not assigned" Make it the same as Borough.
    df['Neighborhood'] = df.apply(
        lambda row: row['Borough'] if (row['Neighborhood']=='Not assigned') else row['Neighborhood'],
        axis=1)
    #read geospatial data, geocoder didn't work for me(maybe outside USA location)
    df_geo = pd.read_csv('geospatial_data.csv')
    # merge
    df_new = pd.merge(left=df,right=df_geo, left_on='Postcode', right_on='Postal Code')
    df_new = df_new.drop('Postal Code', axis=1)
    return df_new

In case when Neighborhood is "Not assigned" Make it the same as Borough.

In [3]:
try:
    toronto_df = pd.read_csv("toronto_geodata.csv")
    print("data read from file")
except:
    toronto_df = prepare_toronto_dataframe()
    toronto_df.to_csv("toronto_geodata.csv", index=False)

toronto_df
    

data read from file


Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.654260,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway,Montgomery Road,Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558
101,M8Y,Etobicoke,"Humber Bay,King's Mill Park,Kingsway Park Sout...",43.636258,-79.498509


Let check how it looks like on the map.

In [4]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))
print('The mean coordinate of Toronto are {}, {}.'.format(toronto_df["Latitude"].mean(), toronto_df["Longitude"].mean())) 

The geograpical coordinate of Toronto are 43.653963, -79.387207.
The mean coordinate of Toronto are 43.70460773398059, -79.39715291165048.


In [5]:
# create map of Toronto
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Borough'], toronto_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto


In [6]:
# as result are already cached we don't need credential anymore
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

Functions from DP0701EN/DP0701EN-3-3-2-Neighborhoods-New-York-py-v1.0.ipynb

In [7]:
VENUES_FILE = 'venues_explore_{}_{}_{}.json'

def save_venues_to_file(dataframe, latitude, longitude, radius):
    filename = VENUES_FILE.format(latitude, longitude, radius)
    with open(filename, 'w') as f:
        json.dump(dataframe, f)

def read_venues_from_file(latitude, longitude, radius):
    filename = VENUES_FILE.format(latitude, longitude, radius)
    try:
        with open(filename, 'r') as f:
            datastore = json.load(f)
        return datastore
    except:
        return None

def getVenues(name, lat, lng, radius, LIMIT=100):
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
    # make the GET request
    dataframe = requests.get(url).json()
    return dataframe
    

In [8]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        dataframe = read_venues_from_file(lat, lng, radius)
        if dataframe is None:
            dataframe = getVenues(name, lat, lng, radius)
            save_venues_to_file(dataframe, lat, lng, radius)
        if dataframe is None:
            print("can't load data for {}, {}, {} ,{}".format(name, lat, lng, radius))
            continue
        results = dataframe["response"]['groups'][0]['items']
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
    
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Borough', 
                  'Borough Latitude', 
                  'Borough Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Read the venues from the given zip code. 

In [9]:
toronto_venues = getNearbyVenues(names=toronto_df['Borough'],
                                   latitudes=toronto_df['Latitude'],
                                   longitudes=toronto_df['Longitude']
                                  )
toronto_venues.to_csv("toronto_venues.csv")


North York
North York
Downtown Toronto
North York
Downtown Toronto
Queen's Park
Scarborough
North York
East York
Downtown Toronto
North York
Etobicoke
Scarborough
North York
East York
Downtown Toronto
York
Etobicoke
Scarborough
East Toronto
Downtown Toronto
York
Scarborough
East York
Downtown Toronto
Downtown Toronto
Scarborough
North York
North York
East York
Downtown Toronto
West Toronto
Scarborough
North York
North York
East York
Downtown Toronto
West Toronto
Scarborough
North York
North York
East Toronto
Downtown Toronto
West Toronto
Scarborough
North York
North York
East Toronto
Downtown Toronto
North York
North York
Scarborough
North York
North York
East Toronto
North York
York
North York
Scarborough
North York
North York
Central Toronto
Central Toronto
York
York
Scarborough
North York
Central Toronto
Central Toronto
West Toronto
Etobicoke
Scarborough
North York
Central Toronto
Central Toronto
West Toronto
Mississauga
Etobicoke
Scarborough
Central Toronto
Downtown Toronto
West To

Examine what data we have.

In [10]:
print(toronto_venues.shape)
toronto_venues.head()


(2226, 7)


Unnamed: 0,Borough,Borough Latitude,Borough Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,North York,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,North York,43.753259,-79.329656,GTA Restoration,43.753396,-79.333477,Fireworks Store
2,North York,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,North York,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,North York,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop


In [11]:
toronto_venues.groupby('Borough').count()

Unnamed: 0_level_0,Borough Latitude,Borough Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Central Toronto,110,110,110,110,110,110
Downtown Toronto,1309,1309,1309,1309,1309,1309
East Toronto,125,125,125,125,125,125
East York,75,75,75,75,75,75
Etobicoke,73,73,73,73,73,73
Mississauga,11,11,11,11,11,11
North York,245,245,245,245,245,245
Scarborough,95,95,95,95,95,95
West Toronto,166,166,166,166,166,166
York,17,17,17,17,17,17


Similar operations like for New York data. There one improvement: in a case when the added column is not at the end, the code from the lecture will not be good, what happened to me

In [12]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Borough'] = toronto_venues['Borough'] 
# move neighborhood column to the first column
neighborhood_loc = toronto_onehot.columns.get_loc('Borough')

fixed_columns = [toronto_onehot.columns[neighborhood_loc]] + list(toronto_onehot.columns[:neighborhood_loc])+list(toronto_onehot.columns[neighborhood_loc+1:])
toronto_onehot = toronto_onehot[fixed_columns]



In [13]:
print(toronto_onehot.columns)
toronto_grouped = toronto_onehot.groupby('Borough').mean().reset_index()
toronto_grouped

Index(['Borough', 'Accessories Store', 'Afghan Restaurant', 'Airport',
       'Airport Food Court', 'Airport Gate', 'Airport Lounge',
       'Airport Service', 'Airport Terminal', 'American Restaurant',
       ...
       'Vegetarian / Vegan Restaurant', 'Video Game Store', 'Video Store',
       'Vietnamese Restaurant', 'Warehouse Store', 'Wine Bar', 'Wine Shop',
       'Wings Joint', 'Women's Store', 'Yoga Studio'],
      dtype='object', length=274)


Unnamed: 0,Borough,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Central Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,...,0.009091,0.0,0.0,0.009091,0.0,0.0,0.0,0.0,0.0,0.009091
1,Downtown Toronto,0.0,0.000764,0.000764,0.000764,0.000764,0.001528,0.002292,0.001528,0.010695,...,0.012987,0.001528,0.0,0.005348,0.0,0.005348,0.000764,0.0,0.001528,0.002292
2,East Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.024,...,0.0,0.0,0.0,0.0,0.0,0.008,0.0,0.0,0.0,0.024
3,East York,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.013333,0.0,0.013333,0.0,0.0,0.0,0.0,0.013333
4,Etobicoke,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013699,0.0,0.0
5,Mississauga,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,North York,0.004082,0.0,0.004082,0.0,0.0,0.0,0.0,0.0,0.008163,...,0.0,0.004082,0.004082,0.008163,0.0,0.0,0.0,0.004082,0.012245,0.0
7,Scarborough,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,...,0.0,0.0,0.0,0.021053,0.0,0.0,0.0,0.0,0.0,0.0
8,West Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.012048,0.0,0.0,0.012048,0.0,0.006024,0.0,0.0,0.0,0.006024
9,York,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0


In [14]:
num_top_venues = 5

for hood in toronto_grouped['Borough']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Borough'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Central Toronto----
            venue  freq
0     Coffee Shop  0.07
1  Sandwich Place  0.06
2            Park  0.05
3            Café  0.05
4     Pizza Place  0.04


----Downtown Toronto----
         venue  freq
0  Coffee Shop  0.10
1         Café  0.05
2        Hotel  0.03
3   Restaurant  0.03
4          Bar  0.02


----East Toronto----
                venue  freq
0    Greek Restaurant  0.07
1         Coffee Shop  0.06
2  Italian Restaurant  0.05
3             Brewery  0.04
4      Ice Cream Shop  0.04


----East York----
                 venue  freq
0         Burger Joint  0.05
1          Coffee Shop  0.05
2  Sporting Goods Shop  0.04
3          Pizza Place  0.04
4                 Bank  0.04


----Etobicoke----
            venue  freq
0     Pizza Place  0.11
1  Sandwich Place  0.07
2     Coffee Shop  0.05
3        Pharmacy  0.05
4             Gym  0.04


----Mississauga----
                       venue  freq
0                      Hotel  0.18
1                Coffee Shop  0.18
2  

In [15]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]


In [16]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Borough']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Borough'] = toronto_grouped['Borough']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,Coffee Shop,Sandwich Place,Café,Park,Sushi Restaurant,Pizza Place,Gym,Dessert Shop,Pub,Clothing Store
1,Downtown Toronto,Coffee Shop,Café,Restaurant,Hotel,Bakery,Italian Restaurant,Japanese Restaurant,Bar,Park,Steakhouse
2,East Toronto,Greek Restaurant,Coffee Shop,Italian Restaurant,Brewery,Ice Cream Shop,Café,Pizza Place,American Restaurant,Pub,Park
3,East York,Burger Joint,Coffee Shop,Bank,Sporting Goods Shop,Pizza Place,Park,Pharmacy,Pet Store,Supermarket,Breakfast Spot
4,Etobicoke,Pizza Place,Sandwich Place,Pharmacy,Coffee Shop,Discount Store,Fast Food Restaurant,Grocery Store,Gym,Beer Store,Liquor Store


In [17]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Borough', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 3, 4, 2, 1, 3, 1, 0], dtype=int32)

In [18]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Borough'), on='Borough')
# why nan?
toronto_merged.dropna(inplace=True)
toronto_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,1.0,Coffee Shop,Clothing Store,Fast Food Restaurant,Japanese Restaurant,Pizza Place,Sandwich Place,Park,Furniture / Home Store,Restaurant,Bank
1,M4A,North York,Victoria Village,43.725882,-79.315572,1.0,Coffee Shop,Clothing Store,Fast Food Restaurant,Japanese Restaurant,Pizza Place,Sandwich Place,Park,Furniture / Home Store,Restaurant,Bank
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636,1.0,Coffee Shop,Café,Restaurant,Hotel,Bakery,Italian Restaurant,Japanese Restaurant,Bar,Park,Steakhouse
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763,1.0,Coffee Shop,Clothing Store,Fast Food Restaurant,Japanese Restaurant,Pizza Place,Sandwich Place,Park,Furniture / Home Store,Restaurant,Bank
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494,1.0,Coffee Shop,Café,Restaurant,Hotel,Bakery,Italian Restaurant,Japanese Restaurant,Bar,Park,Steakhouse


In [19]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Borough'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [20]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,York,0.0,Convenience Store,Fast Food Restaurant,Park,Hockey Arena,Caribbean Restaurant,Discount Store,Bus Line,Sandwich Place,Market,Brewery
21,York,0.0,Convenience Store,Fast Food Restaurant,Park,Hockey Arena,Caribbean Restaurant,Discount Store,Bus Line,Sandwich Place,Market,Brewery
56,York,0.0,Convenience Store,Fast Food Restaurant,Park,Hockey Arena,Caribbean Restaurant,Discount Store,Bus Line,Sandwich Place,Market,Brewery
63,York,0.0,Convenience Store,Fast Food Restaurant,Park,Hockey Arena,Caribbean Restaurant,Discount Store,Bus Line,Sandwich Place,Market,Brewery
64,York,0.0,Convenience Store,Fast Food Restaurant,Park,Hockey Arena,Caribbean Restaurant,Discount Store,Bus Line,Sandwich Place,Market,Brewery


In [21]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,1.0,Coffee Shop,Clothing Store,Fast Food Restaurant,Japanese Restaurant,Pizza Place,Sandwich Place,Park,Furniture / Home Store,Restaurant,Bank
1,North York,1.0,Coffee Shop,Clothing Store,Fast Food Restaurant,Japanese Restaurant,Pizza Place,Sandwich Place,Park,Furniture / Home Store,Restaurant,Bank
2,Downtown Toronto,1.0,Coffee Shop,Café,Restaurant,Hotel,Bakery,Italian Restaurant,Japanese Restaurant,Bar,Park,Steakhouse
3,North York,1.0,Coffee Shop,Clothing Store,Fast Food Restaurant,Japanese Restaurant,Pizza Place,Sandwich Place,Park,Furniture / Home Store,Restaurant,Bank
4,Downtown Toronto,1.0,Coffee Shop,Café,Restaurant,Hotel,Bakery,Italian Restaurant,Japanese Restaurant,Bar,Park,Steakhouse
...,...,...,...,...,...,...,...,...,...,...,...,...
92,Downtown Toronto,1.0,Coffee Shop,Café,Restaurant,Hotel,Bakery,Italian Restaurant,Japanese Restaurant,Bar,Park,Steakhouse
96,Downtown Toronto,1.0,Coffee Shop,Café,Restaurant,Hotel,Bakery,Italian Restaurant,Japanese Restaurant,Bar,Park,Steakhouse
97,Downtown Toronto,1.0,Coffee Shop,Café,Restaurant,Hotel,Bakery,Italian Restaurant,Japanese Restaurant,Bar,Park,Steakhouse
99,Downtown Toronto,1.0,Coffee Shop,Café,Restaurant,Hotel,Bakery,Italian Restaurant,Japanese Restaurant,Bar,Park,Steakhouse


In [22]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
76,Mississauga,2.0,Hotel,Coffee Shop,Sandwich Place,Gym,Middle Eastern Restaurant,American Restaurant,Fried Chicken Joint,Burrito Place,Mediterranean Restaurant,Yoga Studio


In [23]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Scarborough,3.0,Coffee Shop,Fast Food Restaurant,Pizza Place,Bakery,Chinese Restaurant,Breakfast Spot,Pharmacy,Vietnamese Restaurant,Soccer Field,Skating Rink
8,East York,3.0,Burger Joint,Coffee Shop,Bank,Sporting Goods Shop,Pizza Place,Park,Pharmacy,Pet Store,Supermarket,Breakfast Spot
12,Scarborough,3.0,Coffee Shop,Fast Food Restaurant,Pizza Place,Bakery,Chinese Restaurant,Breakfast Spot,Pharmacy,Vietnamese Restaurant,Soccer Field,Skating Rink
14,East York,3.0,Burger Joint,Coffee Shop,Bank,Sporting Goods Shop,Pizza Place,Park,Pharmacy,Pet Store,Supermarket,Breakfast Spot
18,Scarborough,3.0,Coffee Shop,Fast Food Restaurant,Pizza Place,Bakery,Chinese Restaurant,Breakfast Spot,Pharmacy,Vietnamese Restaurant,Soccer Field,Skating Rink
22,Scarborough,3.0,Coffee Shop,Fast Food Restaurant,Pizza Place,Bakery,Chinese Restaurant,Breakfast Spot,Pharmacy,Vietnamese Restaurant,Soccer Field,Skating Rink
23,East York,3.0,Burger Joint,Coffee Shop,Bank,Sporting Goods Shop,Pizza Place,Park,Pharmacy,Pet Store,Supermarket,Breakfast Spot
26,Scarborough,3.0,Coffee Shop,Fast Food Restaurant,Pizza Place,Bakery,Chinese Restaurant,Breakfast Spot,Pharmacy,Vietnamese Restaurant,Soccer Field,Skating Rink
29,East York,3.0,Burger Joint,Coffee Shop,Bank,Sporting Goods Shop,Pizza Place,Park,Pharmacy,Pet Store,Supermarket,Breakfast Spot
32,Scarborough,3.0,Coffee Shop,Fast Food Restaurant,Pizza Place,Bakery,Chinese Restaurant,Breakfast Spot,Pharmacy,Vietnamese Restaurant,Soccer Field,Skating Rink


In [24]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Etobicoke,4.0,Pizza Place,Sandwich Place,Pharmacy,Coffee Shop,Discount Store,Fast Food Restaurant,Grocery Store,Gym,Beer Store,Liquor Store
17,Etobicoke,4.0,Pizza Place,Sandwich Place,Pharmacy,Coffee Shop,Discount Store,Fast Food Restaurant,Grocery Store,Gym,Beer Store,Liquor Store
70,Etobicoke,4.0,Pizza Place,Sandwich Place,Pharmacy,Coffee Shop,Discount Store,Fast Food Restaurant,Grocery Store,Gym,Beer Store,Liquor Store
77,Etobicoke,4.0,Pizza Place,Sandwich Place,Pharmacy,Coffee Shop,Discount Store,Fast Food Restaurant,Grocery Store,Gym,Beer Store,Liquor Store
88,Etobicoke,4.0,Pizza Place,Sandwich Place,Pharmacy,Coffee Shop,Discount Store,Fast Food Restaurant,Grocery Store,Gym,Beer Store,Liquor Store
89,Etobicoke,4.0,Pizza Place,Sandwich Place,Pharmacy,Coffee Shop,Discount Store,Fast Food Restaurant,Grocery Store,Gym,Beer Store,Liquor Store
93,Etobicoke,4.0,Pizza Place,Sandwich Place,Pharmacy,Coffee Shop,Discount Store,Fast Food Restaurant,Grocery Store,Gym,Beer Store,Liquor Store
94,Etobicoke,4.0,Pizza Place,Sandwich Place,Pharmacy,Coffee Shop,Discount Store,Fast Food Restaurant,Grocery Store,Gym,Beer Store,Liquor Store
98,Etobicoke,4.0,Pizza Place,Sandwich Place,Pharmacy,Coffee Shop,Discount Store,Fast Food Restaurant,Grocery Store,Gym,Beer Store,Liquor Store
101,Etobicoke,4.0,Pizza Place,Sandwich Place,Pharmacy,Coffee Shop,Discount Store,Fast Food Restaurant,Grocery Store,Gym,Beer Store,Liquor Store
