In [1]:
# Beginning of Part 1 --> Webscraping for the Toronto information

In [2]:
# importing the required libraries

from bs4 import BeautifulSoup 
import requests
import pandas as pd

In [3]:
# souce is the website url and .text() is used for parsing the html code of the webpage
# reading the table using the 'table' attribute of the HTML
# reaidng each row using 'tr' attribute of the HTML

source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text 
soup = BeautifulSoup(source,'lxml')
table = soup.find('table').tbody
rows = table.find_all('tr')

In [4]:
# extracting the table headers from the html 
# table headers are in the first row of the 'rows' generated above
# column headers are extracted from that
# creating the data frame from the table --> tdf stands for toronto data frame
# tdf for now only contains the column headers

column_headers = [v.text.replace('\n','') for v in rows[0].find_all('th')] 

tdf = pd.DataFrame(columns=column_headers)

# at this point the tdf displays only the column headers
tdf

Unnamed: 0,Postal code,Borough,Neighborhood


In [195]:
# creating the new dataframe from the table, ignoring the rows where borough = not assigned

for i in range(1,len(rows)):
    row = [data.text.replace('\n','') for data in rows[i].find_all('td')]
    
    # assigning the empty neighborhoods to the names of the Boroughs
    
    if row[2]=='' or row[2]=='Not assigned':
        row[2]=row[1]                       
        
    # ignoring all the rows with Boroughs = Not Assigned and appending the remaining ones to tdf
    
    if row[1]!='Not assigned': 
        rowdf = pd.DataFrame([row],columns=column_headers)       
        tdf = tdf.append(rowdf, ignore_index=True) 

# dropping the rows with duplicate postal codes
tdf.drop_duplicates(subset = 'Postal code', inplace=True, ignore_index = True)
tdf

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government
...,...,...,...
98,M8X,Etobicoke,The Kingsway / Montgomery Road / Old Mill North
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,Business reply mail Processing CentrE
101,M8Y,Etobicoke,Old Mill South / King's Mill Park / Sunnylea /...


In [6]:
# dataframe and the shape


tdf.shape



(103, 3)

In [7]:
# End of Part 1 --> Webscraping for the Toronto information

In [8]:
# Beginning of Part 2 --> Concatenating the webscraped data frame and lat lon data frame to make a single data frame

In [9]:
# Reading the latitude and Longitude data into a data frame from the provided csv file

csv_path='C:\\Users\\karth\\Desktop\\Python\\Projects\\Capstone\\Toronto-Data-Project\\GC.csv'
df_latlng= pd.read_csv(csv_path)

In [10]:
# Changing the name of the postal code column for uniformity in the two data frames

df_latlng.rename(columns={'Postal Code':'Postal code'}, inplace=True)

In [11]:
# Concatenating the two data frames to make one
# tdf from the part 1 containing the boroughs and neighborhood data and df_latlng from part 2 containing the lat lng values

# first the indices in both of them are changed to the postal code as concatenation needs similar indices

df1 = df_latlng.set_index('Postal code')
df2 = tdf.set_index('Postal code')


# concatenation along y axis is done to add columns

tdf_new = pd.concat([df2,df1],axis=1)

# for the new data frame the index is reset and columns are renamed

tdf_new.reset_index(inplace=True)
tdf_new.rename(columns={'index':'Postal code'}, inplace=True)

tdf_new

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Regent Park / Harbourfront,43.654260,-79.360636
3,M6A,North York,Lawrence Manor / Lawrence Heights,43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,The Kingsway / Montgomery Road / Old Mill North,43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,Business reply mail Processing CentrE,43.662744,-79.321558
101,M8Y,Etobicoke,Old Mill South / King's Mill Park / Sunnylea /...,43.636258,-79.498509


In [12]:
# showing the first few rows from the dataframe example in the coursera assignment for verification purpose
codes = ['M5G','M2H','M4B','M1J','M4G']

df = pd.DataFrame(columns=tdf_new.columns)

for code in codes:
    df=df.append(tdf_new[tdf_new['Postal code']==code],ignore_index=True)

df

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
1,M2H,North York,Hillcrest Village,43.803762,-79.363452
2,M4B,East York,Parkview Hill / Woodbine Gardens,43.706397,-79.309937
3,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
4,M4G,East York,Leaside,43.70906,-79.363452


In [13]:
# End of Part 2 --> Concatenating the webscraped data frame and lat lon data frame to make a single data frame

In [14]:
# Begining of Part 3 --> Exploring and Clustering the neighborhoods in Toronto

In [15]:
# importing the plotting, map and other required libraries 

import numpy as np
import json
from pandas import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
from geopy.geocoders import Nominatim
print('All the packages imported')

All the packages imported


In [16]:
# Dropping the postal code column from the data frame tdf_new and assign to tdf1

tdf1 = tdf_new.drop('Postal code',axis=1)
tdf1

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,North York,Parkwoods,43.753259,-79.329656
1,North York,Victoria Village,43.725882,-79.315572
2,Downtown Toronto,Regent Park / Harbourfront,43.654260,-79.360636
3,North York,Lawrence Manor / Lawrence Heights,43.718518,-79.464763
4,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494
...,...,...,...,...
98,Etobicoke,The Kingsway / Montgomery Road / Old Mill North,43.653654,-79.506944
99,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,East Toronto,Business reply mail Processing CentrE,43.662744,-79.321558
101,Etobicoke,Old Mill South / King's Mill Park / Sunnylea /...,43.636258,-79.498509


In [78]:
# tdf1 is the data frame containing the Toronto neighborhoods
# as suggested in the coursera,making a data frame with only those Boroughs that contain 'Toronto'
#tdf1 will be updated with only those rows whose borough name contains 'Toronto'

boroughs = tdf1['Borough']
size = len(boroughs)

for i in range(size):
    if 'Toronto' not in boroughs[i]:
        tdf1.drop(index=i,inplace=True)
    
tdf1.reset_index(drop=True,inplace=True)
tdf1



Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
6,Downtown Toronto,Central Bay Street,43.657952,-79.387383


In [18]:
# tdf1 is the final data frame on which exploring will be done

# obtaining the coordinates of Tornoto usiong geopy

address = 'Toronto,Ontario'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [19]:
# creating the map of Toronto showing neighborhoods belonging to boroughs containing 'Toronto'

tmap = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(tdf1['Latitude'], tdf1['Longitude'], tdf1['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(tmap)  
    
tmap

In [20]:
# using foursquare api to explore the neighborhoods

CLIENT_ID = '25WDXR0CLBDJULQMQOO155NHAKNTZ0QFVZJPPTQPGURVZZQO' # your Foursquare ID
CLIENT_SECRET = 'MUHJSH3LKSVWCNNBAYFZWCTPLEL4LUAWLB5524OU30SVLEVS' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('My credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

My credentails:
CLIENT_ID: 25WDXR0CLBDJULQMQOO155NHAKNTZ0QFVZJPPTQPGURVZZQO
CLIENT_SECRET:MUHJSH3LKSVWCNNBAYFZWCTPLEL4LUAWLB5524OU30SVLEVS


In [21]:
# getting the venues for all the neighborehoods in the borough containing 'Toronto'

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    LIMIT = 100
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
                   
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)
    
    
#  getting the venues for all the neighborehoods in the borough containing 'Toronto'

tvenues = getNearbyVenues(names=tdf1['Neighborhood'],
                                   latitudes=tdf1['Latitude'],
                                   longitudes=tdf1['Longitude']
                                  )

print(tvenues.shape)

tvenues.head()

(1651, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Regent Park / Harbourfront,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,Regent Park / Harbourfront,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,Regent Park / Harbourfront,43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,Regent Park / Harbourfront,43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,Regent Park / Harbourfront,43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot


In [39]:
# analyzing the neighborhoods

# identifying the unique categories

tvenues.groupby('Neighborhood').count()
print('There are {} unique categories.'.format(len(tvenues['Venue Category'].unique())))

There are 233 unique categories.


In [44]:
# venue category is a categorical variable
# we can quantize it by using get_dummies method that converts it into an indicator
# in this step we can see how many venues of each category are present near each neighborhood 
# the dataframe generated is expected to have 234 columns after adding the neighborhood column
# but one of the neighborhoods name is neighborhood, so to data frame the column name will be neighborhoods

t_hot = pd.get_dummies(tvenues[['Venue Category']], prefix="", prefix_sep="")

t_hot['Neighborhoods'] = tvenues['Neighborhood']

fixed_columns = [t_hot.columns[-1]] + list(t_hot.columns[:-1])
t_hot = t_hot[fixed_columns]
t_hot


Unnamed: 0,Neighborhoods,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Regent Park / Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Regent Park / Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Regent Park / Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Regent Park / Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Regent Park / Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1646,Business reply mail Processing CentrE,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1647,Business reply mail Processing CentrE,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1648,Business reply mail Processing CentrE,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1649,Business reply mail Processing CentrE,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [80]:
# grouping the data frame by neighborhood and taking the means of indicator values of each venue
# higher the mean, higher is the number of venues of that category

tgroup = t_hot.groupby('Neighborhoods').mean().reset_index()

tgroup

Unnamed: 0,Neighborhoods,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Brockton / Parkdale Village / Exhibition Place,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Business reply mail Processing CentrE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556
3,CN Tower / King and Spadina / Railway Lands / ...,0.0,0.0625,0.0625,0.0625,0.125,0.125,0.0625,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013699,0.0,...,0.0,0.0,0.013699,0.0,0.0,0.013699,0.0,0.0,0.0,0.013699
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.025974
7,Commerce Court / Victoria Hotel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,...,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [248]:
# now will display the top 'n' top venues for each of the neighborhoods
# tsorted dataframe will contain the dataframe

ntop = 6


# assigning column headers

tsorted_columns = ['Neighborhoods']
                   
for ind in np.arange(ntop):
    try:
        tsorted_columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        tsorted_columns.append('{}th Most Common Venue'.format(ind+1))


#creating an empty dataframe

tsorted = pd.DataFrame(columns=tsorted_columns)

# sorting and creating the data frame with top 'n' venues

for hood in tgroup['Neighborhoods']:
    row = tgroup[tgroup['Neighborhoods']==hood].T.reset_index()
    row.columns =['Venue','Freq']
    row = row.iloc[1:]
    row['Freq'] = row['Freq'].astype(float).round(2)
    row =row.sort_values('Freq',ascending=False).reset_index(drop=True).head(ntop)
    
 
    
    rowdf = pd.DataFrame(row['Venue']).T.reset_index(drop=True) 
    rowdf.columns= tsorted_columns[1:len(tsorted_columns)+1]
    rowdf['Neighborhoods'] = hood

    fixed_columns = [rowdf.columns[-1]] + list(rowdf.columns[:-1])
    rowdf = rowdf[fixed_columns]
   
    tsorted = tsorted.append(rowdf, ignore_index=True) 
    
    
tsorted.head()

Unnamed: 0,Neighborhoods,1th Most Common Venue,2th Most Common Venue,3th Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue
0,Berczy Park,Coffee Shop,Beer Bar,Italian Restaurant,Farmers Market,Café,Cheese Shop
1,Brockton / Parkdale Village / Exhibition Place,Café,Breakfast Spot,Coffee Shop,Gym,Italian Restaurant,Burrito Place
2,Business reply mail Processing CentrE,Light Rail Station,Yoga Studio,Auto Workshop,Smoke Shop,Spa,Burrito Place
3,CN Tower / King and Spadina / Railway Lands / ...,Airport Lounge,Airport Service,Coffee Shop,Plane,Bar,Rental Car Location
4,Central Bay Street,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Japanese Restaurant,Middle Eastern Restaurant


In [270]:
# clustering the neighborhoods

# set number of clusters
kclusters = 12

tcluster = tgroup.drop('Neighborhoods', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(tcluster)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([10, 10, 10,  7,  5,  9, 10,  5,  0, 11])

In [265]:
# creating a dataframe with all the information

tsorted.insert(0,'Cluster Labels', kmeans.labels_)
tmerged = tdf1
tmerged = tmerged.join(tsorted.set_index('Neighborhoods'), on='Neighborhood')
tmerged

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1th Most Common Venue,2th Most Common Venue,3th Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue
0,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636,5,Coffee Shop,Pub,Park,Bakery,Breakfast Spot,Café
1,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494,5,Coffee Shop,Diner,Gym,Distribution Center,Burger Joint,Burrito Place
2,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,10,Clothing Store,Coffee Shop,Café,Middle Eastern Restaurant,Italian Restaurant,Bubble Tea Shop
3,Downtown Toronto,St. James Town,43.651494,-79.375418,10,Café,Coffee Shop,Cocktail Bar,American Restaurant,Restaurant,Beer Bar
4,East Toronto,The Beaches,43.676357,-79.293031,2,Neighborhood,Health Food Store,Pub,Trail,Monument / Landmark,Museum
5,Downtown Toronto,Berczy Park,43.644771,-79.373306,10,Coffee Shop,Beer Bar,Italian Restaurant,Farmers Market,Café,Cheese Shop
6,Downtown Toronto,Central Bay Street,43.657952,-79.387383,5,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Japanese Restaurant,Middle Eastern Restaurant
7,Downtown Toronto,Christie,43.669542,-79.422564,9,Grocery Store,Café,Park,Gas Station,Restaurant,Candy Store
8,Downtown Toronto,Richmond / Adelaide / King,43.650571,-79.384568,5,Coffee Shop,Café,Gym,Restaurant,Deli / Bodega,Hotel
9,West Toronto,Dufferin / Dovercourt Village,43.669005,-79.442259,10,Pharmacy,Bakery,Bank,Recording Studio,Brazilian Restaurant,Middle Eastern Restaurant


In [266]:
# visulaization of the clusters

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
#ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, kclusters))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(tmerged['Latitude'], tmerged['Longitude'], tmerged['Neighborhood'], tmerged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [276]:
# examining the clusters

tmerged.loc[tmerged['Cluster Labels'] == 5, tmerged.columns[[1] + list(range(5, tmerged.shape[1]))]]

Unnamed: 0,Neighborhood,1th Most Common Venue,2th Most Common Venue,3th Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue
0,Regent Park / Harbourfront,Coffee Shop,Pub,Park,Bakery,Breakfast Spot,Café
1,Queen's Park / Ontario Provincial Government,Coffee Shop,Diner,Gym,Distribution Center,Burger Joint,Burrito Place
6,Central Bay Street,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Japanese Restaurant,Middle Eastern Restaurant
8,Richmond / Adelaide / King,Coffee Shop,Café,Gym,Restaurant,Deli / Bodega,Hotel
10,Harbourfront East / Union Station / Toronto Is...,Coffee Shop,Aquarium,Hotel,Café,Italian Restaurant,Restaurant
12,The Danforth West / Riverdale,Greek Restaurant,Coffee Shop,Italian Restaurant,Furniture / Home Store,Ice Cream Shop,Restaurant
13,Toronto Dominion Centre / Design Exchange,Coffee Shop,Hotel,Café,Restaurant,Seafood Restaurant,Italian Restaurant
16,Commerce Court / Victoria Hotel,Coffee Shop,Café,Restaurant,Hotel,Gym,American Restaurant
34,Stn A PO Boxes,Coffee Shop,Italian Restaurant,Restaurant,Seafood Restaurant,Café,Hotel
36,First Canadian Place / Underground city,Coffee Shop,Café,Restaurant,Gym,Asian Restaurant,Steakhouse


In [277]:
# examining the clusters

tmerged.loc[tmerged['Cluster Labels'] == 0, tmerged.columns[[1] + list(range(5, tmerged.shape[1]))]]

Unnamed: 0,Neighborhood,1th Most Common Venue,2th Most Common Venue,3th Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue
15,India Bazaar / The Beaches West,Pizza Place,Park,Sandwich Place,Gym,Fast Food Restaurant,Brewery
24,The Annex / North Midtown / Yorkville,Café,Sandwich Place,Coffee Shop,Liquor Store,BBQ Joint,Pub
25,Parkdale / Roncesvalles,Gift Shop,Bookstore,Movie Theater,Eastern European Restaurant,Bar,Bank
26,Davisville,Dessert Shop,Sandwich Place,Gym,Café,Sushi Restaurant,Italian Restaurant
28,Runnymede / Swansea,Café,Coffee Shop,Sushi Restaurant,Pizza Place,Pub,Italian Restaurant
31,Summerhill West / Rathnelly / South Hill / For...,Coffee Shop,Pub,Restaurant,Fried Chicken Joint,Pizza Place,Sushi Restaurant
35,St. James Town / Cabbagetown,Coffee Shop,Restaurant,Park,Pub,Bakery,Pizza Place


In [278]:
# examining the clusters

tmerged.loc[tmerged['Cluster Labels'] == 11, tmerged.columns[[1] + list(range(5, tmerged.shape[1]))]]

Unnamed: 0,Neighborhood,1th Most Common Venue,2th Most Common Venue,3th Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue
20,Davisville North,Gym,Food & Drink Shop,Sandwich Place,Department Store,Hotel,Park


In [None]:
# End of Part 3 --> Exploring and Clustering the neighborhoods in Toronto