# Segmenting and Clustering Neighborhoods in Toronto
Applied Data Science Captone Course - IBM Coursera

## Part 1 - Scraping data into a dataframe

#### (Part 1) Import all required libraries for the assignment

In [2]:
# PART 1
# IMPORT REQUIRED LIBRARIES

import requests
from bs4 import BeautifulSoup

import pandas as pd
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

!pip install geopy
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

!pip install folium
import folium # map rendering library

print("Libraries imported")

Collecting folium
  Downloading folium-0.12.0-py2.py3-none-any.whl (94 kB)
[K     |████████████████████████████████| 94 kB 6.1 MB/s  eta 0:00:01
[?25hCollecting branca>=0.3.0
  Downloading branca-0.4.2-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.2 folium-0.12.0
Libraries imported


#### (Part 1) Scrape Toronto postal codes from Wikipedia into a dataframe and process it

In [3]:
# PART 1 
# READ WEB PAGE INTO A VARIABLE AND CREATE DATAFRAME 

# -----  CREATE LIST TO STORE POSTAL CODES
pclist = []   

# -----  READ WEB PAGE CONTENTS INTO VARIABLE
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
req = requests.get(url)
if req.status_code == 200:
    htmldoc = req.text
else:
    print("Could not retrieve web page")
    htmldoc = None

# -----  PARSE WEB PAGE CONTENTS TO FIND POSTAL CODE TABLE AND CREATE DATA FRAME
soup = BeautifulSoup(htmldoc, 'html.parser')
pctable = soup.find('table', attrs={'class':'wikitable sortable'})  # GET POSTAL CODES TABLE
pctable_rows = pctable.find_all('tr')
for tr in pctable_rows:
    td = tr.find_all('td')
    if td:                      # IF td NOT EMPTY THEN .. (td WILL BE EMPTY FOR TABLE HEADER)
        row = [tr.text for tr in td] 
        pclist.append(row)

# -----  CREATE DATAFRAME FROM LIST
pcdf = pd.DataFrame(pclist, columns = ['postalcode', 'borough', 'neighborhood'])
pcdf = pcdf.replace('\n','', regex=True)   # REPLACE NEW LINE (\n) CHARACTERS IN DATAFRAME

# -----  PROCESS THE DATAFRAME

#       1. DROP DATAFRAME ROWS WHERE borough is 'Not assigned'
index_names = pcdf[ pcdf['borough'] == 'Not assigned' ].index   
pcdf.drop(index_names, inplace = True)

#      2. REPLACE 'Not assigned' NEIGHBORHOOD NAMES WITH BOROUGH NAMES 
for row in pcdf.index: 
    if pcdf['neighborhood'][row] == 'Not assigned' and pcdf['borough'][row] != 'Not assigned':
        pcdf['neighborhood'][row] = pcdf['borough'][row]

#### (Part 1) Print number of rows in the dataframe

In [4]:
# PART 1
# PRINT NUMBER OF ROWS OF DATAFRAME USING shape() METHOD

print("Number of rows in dataframe:", pcdf.shape[0])

Number of rows in dataframe: 103


### --- End of Part 1 ---

## Part 2 - Adding latitude and longitude data for Toronto postal codes

#### (Part 2) Get geospatial data for Toronto postal codes

In [5]:
# PART 2
# INSTALL GEOCOCDER LIBRARY

# !pip install geocoder  
# import geocoder as gc  GEOCODER SHOWS REQUEST DENIED MESSAGE FROM GOOGLE

# USING Geospatial_Coordinates.csv INSTEAD
!wget -q -O 'geospatial_data.csv' http://cocl.us/Geospatial_data/Geospatial_Coordinates.csv

#### (Part 2) Create new dataframe that includes geospatial coordinates for postal codes

In [6]:
# PART 2
# ADD LATITUDE AND LONGITUDE COLUMNS TO DATAFRAME

# -----  READ CSV FILE INTO DATAFRAME
gsdf = pd.read_csv('geospatial_data.csv')
gsdf.rename(columns = {'Postal Code':'postalcode', 'Latitude':'latitude', 'Longitude':'longitude'}, inplace = True)

# -----  MERGE gsdf WITH pcdf TO GET NEW DF THAT INCLUDES LAT AND LONG
pcdf2 = pd.merge(pcdf, gsdf, on='postalcode', how='left')

# ----- SPOT CHECK COUPLE OF POSTAL TO SEE IF THE MATCH SCREENSHOT IN ASSGINMENT INSTRUCTIONS
# pcdf2[(pcdf2['postalcode'].str.contains("M5G")) | (pcdf2['postalcode'].str.contains("M2H"))]

pcdf2.head(12)

Unnamed: 0,postalcode,borough,neighborhood,latitude,longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


### --- End of Part 2 ---

## Part 3 - Explore and cluster neighborhoods in Toronto

#### (Part 3) Select boroughs with 'Toronto' in the name for clustering

In [7]:
# PART 3
# CREATE NEW DATAFRAME WITH ONLY TORONTO NEIGHBORHOODS

tordf = pcdf2[pcdf2['borough'].str.contains('Toronto', case=False)].reset_index(drop=True)
tordf.head()

Unnamed: 0,postalcode,borough,neighborhood,latitude,longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


#### (Part 3) Visualize neighbords on a map of Toronto

In [8]:
# PART 3
# GET LAT AND LONG FOR TORONTO, ON
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="torcan")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geographic coordinates of Toronto are {}, {}.'.format(latitude, longitude))

# PLOT THE NEIGHBORHOODS ON A MAP OF TORONTO
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(tordf['latitude'], tordf['longitude'], tordf['borough'], tordf['neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

The geographic coordinates of Toronto are 43.6534817, -79.3839347.


In [9]:
# The code was removed by Watson Studio for sharing.

#### (Part 3)  Get nearby venues for neighborhoods

In [10]:
# PART 3
# DEFINE FUNCTION TO GET NEARBY VENUES FOR NEIGHBORHOODS

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['neighborhood', 
                  'neighborhood latitude', 
                  'neighborhood longitude', 
                  'venue', 
                  'venue latitude', 
                  'venue longitude', 
                  'venue category']
    
    return(nearby_venues)

# GET NEARBY VENUES FOR NEIGHBORHOODS
torvenues = getNearbyVenues(names=tordf['neighborhood'],
                                   latitudes=tordf['latitude'],
                                   longitudes=tordf['longitude']
                                  )

Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
The Danforth West, Riverdale
Toronto Dominion Centre, Design Exchange
Brockton, Parkdale Village, Exhibition Place
India Bazaar, The Beaches West
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West, Forest Hill Road Park
High Park, The Junction South
North Toronto West,  Lawrence Park
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
University of Toronto, Harbord
Runnymede, Swansea
Moore Park, Summerhill East
Kensington Market, Chinatown, Grange Park
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport


#### (Part 3)  Get an overview of venue data

In [11]:
# PART 3
print("Venue data rows, cols:", torvenues.shape)
torvenues.head()

Venue data rows, cols: (1615, 7)


Unnamed: 0,neighborhood,neighborhood latitude,neighborhood longitude,venue,venue latitude,venue longitude,venue category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park, Harbourfront",43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
4,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa


In [12]:
print("Venue Count by neighborhood")
torvenues.groupby('neighborhood')['venue'].count()

Venue Count by neighborhood


neighborhood
Berczy Park                                                                                                    58
Brockton, Parkdale Village, Exhibition Place                                                                   22
Business reply mail Processing Centre, South Central Letter Processing Plant Toronto                           16
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport     15
Central Bay Street                                                                                             60
Christie                                                                                                       15
Church and Wellesley                                                                                           79
Commerce Court, Victoria Hotel                                                                                100
Davisville                                                                 

In [13]:
uniqcat = len(torvenues['venue category'].unique())
print('There are {} unique venue categories.'.format(uniqcat))

There are 235 unique venue categories.


#### (Part 3)  Analyze neighborhoods

In [17]:
# APPLY ONE HOT ENCODING
# one hot encoding
tor1hot = pd.get_dummies(torvenues[['venue category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
tor1hot['neighborhood'] = torvenues['neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [tor1hot.columns[-1]] + list(tor1hot.columns[:-1])
tor1hot = tor1hot[fixed_columns]

tor1hot.head()

Unnamed: 0,neighborhood,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theme Restaurant,Tibetan Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [18]:
print(tor1hot.shape)

(1615, 236)


In [19]:
# GROUP ROWS BY NEIGHBORD AND CALCULATE MEAN OF FREQUENCY OF OCCURENCE
torgrouped = tor1hot.groupby('neighborhood').mean().reset_index()
torgrouped.head()

Unnamed: 0,neighborhood,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theme Restaurant,Tibetan Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.066667,0.066667,0.066667,0.133333,0.066667,0.133333,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.016667,0.0,0.0,0.016667,0.016667


In [20]:
torgrouped.shape

(39, 236)

In [32]:
# PART 3 - RUN KMEANS CLUSTERING
#
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['neighborhood'] = torgrouped['neighborhood']

for ind in np.arange(torgrouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(torgrouped.iloc[ind, :], num_top_venues)

# set number of clusters
kclusters = 5

torgrouped_clustering = torgrouped.drop('neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(torgrouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'cluster labels', kmeans.labels_)

tormerged = tordf

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
tormerged = tormerged.join(neighborhoods_venues_sorted.set_index('neighborhood'), on='neighborhood')

tormerged.head() # check the last columns!

Unnamed: 0,postalcode,borough,neighborhood,latitude,longitude,cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,2,Coffee Shop,Café,Bakery,Pub,Park,Breakfast Spot,Theater,Yoga Studio,Cosmetics Shop,Brewery
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,2,Coffee Shop,Sushi Restaurant,College Cafeteria,Yoga Studio,Beer Bar,Sandwich Place,Burrito Place,Restaurant,Café,Portuguese Restaurant
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,2,Coffee Shop,Clothing Store,Middle Eastern Restaurant,Japanese Restaurant,Cosmetics Shop,Café,Hotel,Bubble Tea Shop,Diner,Fast Food Restaurant
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,2,Coffee Shop,Café,Cocktail Bar,Gastropub,American Restaurant,Hotel,Cosmetics Shop,Moroccan Restaurant,Department Store,Lingerie Store
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,2,Health Food Store,Neighborhood,Trail,Pub,Yoga Studio,Doner Restaurant,Discount Store,Distribution Center,Dog Run,Eastern European Restaurant


In [33]:
# PLOT NEIGHBORHOODS ON MAP WITH COLOR CODED CLUSTER LABELS
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(tormerged['latitude'], tormerged['longitude'], tormerged['neighborhood'], tormerged['cluster labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [70]:
# EXAMINE CHARACTERISTICS OF EACH CLUSTER SO WE CAN NAME THEM
# CLUSTER 1
tormerged.loc[tormerged['cluster labels'] == 0, tormerged.columns[[1] + list(range(5, tormerged.shape[1]))]]

Unnamed: 0,borough,cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,East Toronto,0,Neighborhood,Health Food Store,Pub,Trail,Yoga Studio,Dog Run,Diner,Discount Store,Distribution Center,Donut Shop


In [36]:
# CLUSTER 2
tormerged.loc[tormerged['cluster labels'] == 1, tormerged.columns[[1] + list(range(5, tormerged.shape[1]))]]

Unnamed: 0,borough,cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,Central Toronto,1,Park,Trail,Jewelry Store,Sushi Restaurant,Dessert Shop,Event Space,Ethiopian Restaurant,Escape Room,Electronics Store,Eastern European Restaurant
29,Central Toronto,1,Park,Playground,Trail,Tennis Court,Dog Run,Dessert Shop,Diner,Discount Store,Distribution Center,Donut Shop
33,Downtown Toronto,1,Park,Playground,Trail,Department Store,Event Space,Ethiopian Restaurant,Escape Room,Electronics Store,Eastern European Restaurant,Donut Shop


In [37]:
# CLUSTER 3
tormerged.loc[tormerged['cluster labels'] == 2, tormerged.columns[[1] + list(range(5, tormerged.shape[1]))]]

Unnamed: 0,borough,cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,2,Coffee Shop,Café,Bakery,Pub,Park,Breakfast Spot,Theater,Yoga Studio,Cosmetics Shop,Brewery
1,Downtown Toronto,2,Coffee Shop,Sushi Restaurant,College Cafeteria,Yoga Studio,Beer Bar,Sandwich Place,Burrito Place,Restaurant,Café,Portuguese Restaurant
2,Downtown Toronto,2,Coffee Shop,Clothing Store,Middle Eastern Restaurant,Japanese Restaurant,Cosmetics Shop,Café,Hotel,Bubble Tea Shop,Diner,Fast Food Restaurant
3,Downtown Toronto,2,Coffee Shop,Café,Cocktail Bar,Gastropub,American Restaurant,Hotel,Cosmetics Shop,Moroccan Restaurant,Department Store,Lingerie Store
4,East Toronto,2,Health Food Store,Neighborhood,Trail,Pub,Yoga Studio,Doner Restaurant,Discount Store,Distribution Center,Dog Run,Eastern European Restaurant
5,Downtown Toronto,2,Coffee Shop,Cocktail Bar,Farmers Market,Seafood Restaurant,Bakery,Restaurant,Cheese Shop,Beer Bar,Lounge,Japanese Restaurant
6,Downtown Toronto,2,Coffee Shop,Café,Italian Restaurant,Sandwich Place,Bubble Tea Shop,Thai Restaurant,Salad Place,Burger Joint,Korean Restaurant,Indian Restaurant
8,Downtown Toronto,2,Coffee Shop,Café,Restaurant,Clothing Store,Gym,Hotel,Thai Restaurant,Deli / Bodega,American Restaurant,Salad Place
10,Downtown Toronto,2,Coffee Shop,Aquarium,Café,Hotel,Scenic Lookout,Restaurant,Brewery,Italian Restaurant,Fried Chicken Joint,History Museum
12,East Toronto,2,Greek Restaurant,Coffee Shop,Italian Restaurant,Bookstore,Furniture / Home Store,Ice Cream Shop,Yoga Studio,Bubble Tea Shop,Spa,Japanese Restaurant


In [38]:
# CLUSTER 4
tormerged.loc[tormerged['cluster labels'] == 3, tormerged.columns[[1] + list(range(5, tormerged.shape[1]))]]

Unnamed: 0,borough,cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Central Toronto,3,Park,Bus Line,Swim School,Yoga Studio,Diner,Event Space,Ethiopian Restaurant,Escape Room,Electronics Store,Eastern European Restaurant


In [39]:
# CLUSTER 5
tormerged.loc[tormerged['cluster labels'] == 4, tormerged.columns[[1] + list(range(5, tormerged.shape[1]))]]

Unnamed: 0,borough,cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,Central Toronto,4,Garden,Home Service,Dessert Shop,Falafel Restaurant,Event Space,Ethiopian Restaurant,Escape Room,Electronics Store,Eastern European Restaurant,Donut Shop


### --- End of Part 3 ---