# Declare Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
import math

%matplotlib inline

from sklearn.cluster import KMeans
from sklearn.datasets.samples_generator import make_blobs
from urllib.request import urlopen
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim

print('-- Libraries retrieved --')

ImportError: No module named 'seaborn'

Import Data

In [2]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html = urlopen(url)
soup = BeautifulSoup(html, 'lxml')

Clean up the data

In [3]:
# get the table in question.
table = soup.find('table', attrs={'class':'wikitable sortable'})

# get the table headers
table_headers= table.findAll('th')
for i, head in enumerate(table_headers):
    table_headers[i] = str(table_headers[i]).replace("<th>", "").replace("</th>", "").replace("\n","")
    
# get the table rows
table_rows=table.findAll('tr')
table_rows=table_rows[1:len(table_rows)]

# get the row columns
for i, row in enumerate(table_rows):
    row_tds=row.find_all('td')
    
    for j, elem in enumerate(row_tds):
        row_tds[j]=str(row_tds[j]).replace("<td>","").replace("</td>", "").replace("\n", "")
        if  '<a' in row_tds[j]:
            row_tds[j]=row_tds[j][row_tds[j].find(">")+1:row_tds[j].find("</")]
  
    table_rows[i]=row_tds

In [4]:
# create dataframe 
df=pd.DataFrame(table_rows)
df.columns= table_headers
# filter out not assigned.
df = df.drop(df[(df.Borough == "Not assigned")].index)
df.Neighbourhood.replace("Not assigned", df.Borough, inplace=True)
df.Neighbourhood.fillna(df.Borough, inplace=True)
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


In [5]:
# combine all the neigborhoods in the same postcode.
df_merge = pd.DataFrame([df.Postcode,df.Borough]).transpose()
df_merge.drop_duplicates(keep='last',inplace=True) 
df_merge.set_index("Postcode")
df_merge = df_merge.reset_index(drop=True)
df_merge['Neighborhood']=pd.Series(list(set(df['Neighbourhood'].loc[df['Postcode'] == x['Postcode']])) for i, x in df_merge.iterrows())
df_merge['Neighborhood']=df_merge['Neighborhood'].apply(lambda x: ', '.join(x))
df_merge.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Queen's Park


In [10]:
print('Rows {}, columns {}'.format(df_merge.shape[0], df_merge.shape[1]))

Rows 103, columns 3


Add GeoSpatial data added.


In [6]:
df_geo = pd.read_csv("http://cocl.us/Geospatial_data")
df_geo.rename(columns={'Postal Code':'Postcode'}, inplace=True)
df_geo.set_index("Postcode")
tor_data=pd.merge(df_merge, df_geo)
tor_data.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494


Generate a map of the city

In [7]:
# get city coordinates
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

# create a map of Toronto
map_tor = folium.Map(location=[latitude, longitude], zoom_start=11)
# add the neighbourhood markers.
for lat, lng, borough, neighborhood in zip(tor_data['Latitude'], tor_data['Longitude'], tor_data['Borough'], tor_data['Neighborhood']):
    label = '{} - {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_tor)

map_tor

The geograpical coordinate of Toronto are 43.653963, -79.387207.


# Explore Neighborhoods of Toronto

In [8]:
# Setup forsquare
CLIENT_ID = 'VMYCK4YV4IASU4S1XH520XF3JD3W5SNHD1SLUOIDNDOMKMZA' # your Foursquare ID
CLIENT_SECRET = 'QWLPLWSTXIS0AZN5HL1VEIUQ24AYRLNEH5GQH2G2UTFTAOH1' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: VMYCK4YV4IASU4S1XH520XF3JD3W5SNHD1SLUOIDNDOMKMZA
CLIENT_SECRET:QWLPLWSTXIS0AZN5HL1VEIUQ24AYRLNEH5GQH2G2UTFTAOH1


In [9]:
# Function to retrieve neighborhood data.
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [10]:
tor_data_filter = tor_data.loc[tor_data['Borough'].str.contains('Toronto')]

toronto_venues = getNearbyVenues(names=tor_data_filter['Neighborhood'],
                                   latitudes=tor_data_filter['Latitude'],
                                   longitudes=tor_data_filter['Longitude']
                                  )

Harbourfront, Regent Park
Ryerson, Garden District
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Adelaide, Richmond, King
Dovercourt Village, Dufferin
Harbourfront East, Toronto Islands, Union Station
Little Portugal, Trinity
Riverdale, The Danforth West
Design Exchange, Toronto Dominion Centre
Exhibition Place, Parkdale Village, Brockton
The Beaches West, India Bazaar
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill West, Forest Hill North
High Park, The Junction South
North Toronto West
Yorkville, The Annex, North Midtown
Parkdale, Roncesvalles
Davisville
University of Toronto, Harbord
Swansea, Runnymede
Summerhill East, Moore Park
Kensington Market, Grange Park, Chinatown
Summerhill West, Forest Hill SE, Deer Park, South Hill, Rathnelly
South Niagara, Bathurst Quay, King and Spadina, Railway Lands, CN Tower, Harbourfront West, Island airport
Rosedale
Stn A PO Boxes 25 The Esplanade
St. James Town, Cabbagetown
Und

Number of venues returned for each Neighborhood

In [13]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, Richmond, King",100,100,100,100,100,100
Berczy Park,57,57,57,57,57,57
Business Reply Mail Processing Centre 969 Eastern,17,17,17,17,17,17
Central Bay Street,88,88,88,88,88,88
Christie,16,16,16,16,16,16
Church and Wellesley,88,88,88,88,88,88
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
Davisville,32,32,32,32,32,32
Davisville North,7,7,7,7,7,7
"Design Exchange, Toronto Dominion Centre",100,100,100,100,100,100


Number of unique categories

In [14]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 237 uniques categories.


Analyze Each Neighborhood

In [17]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [18]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store
0,"Adelaide, Richmond, King",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0
2,Business Reply Mail Processing Centre 969 Eastern,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Central Bay Street,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.011364,0.0,0.0
4,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Church and Wellesley,0.011364,0.011364,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.011364,0.011364,0.0,0.011364,0.0
6,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0
7,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Design Exchange, Toronto Dominion Centre",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0


Print each neighborhood along with the top 5 most common venues

In [19]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, Richmond, King----
                 venue  freq
0          Coffee Shop  0.06
1                 Café  0.05
2  American Restaurant  0.04
3           Steakhouse  0.04
4      Thai Restaurant  0.04


----Berczy Park----
            venue  freq
0     Coffee Shop  0.09
1    Cocktail Bar  0.05
2      Restaurant  0.04
3  Farmers Market  0.04
4        Beer Bar  0.04


----Business Reply Mail Processing Centre 969 Eastern----
                venue  freq
0  Light Rail Station  0.12
1         Yoga Studio  0.06
2       Auto Workshop  0.06
3       Garden Center  0.06
4              Garden  0.06


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.15
1                Café  0.06
2  Italian Restaurant  0.05
3        Burger Joint  0.03
4      Ice Cream Shop  0.02


----Christie----
                venue  freq
0                Café  0.19
1       Grocery Store  0.19
2                Park  0.12
3   Convenience Store  0.06
4  Italian Restaurant  0.06


----Church an

Put it all in a dataframe

In [20]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [51]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, Richmond, King",Coffee Shop,Café,Steakhouse,Thai Restaurant,American Restaurant,Bakery,Bar,Hotel,Burger Joint,Salad Place
1,Berczy Park,Coffee Shop,Cocktail Bar,Café,Cheese Shop,Beer Bar,Farmers Market,Steakhouse,Seafood Restaurant,Restaurant,Bakery
2,Business Reply Mail Processing Centre 969 Eastern,Light Rail Station,Yoga Studio,Auto Workshop,Garden Center,Garden,Fast Food Restaurant,Farmers Market,Comic Shop,Park,Recording Studio
3,Central Bay Street,Coffee Shop,Café,Italian Restaurant,Burger Joint,Indian Restaurant,Bubble Tea Shop,Ice Cream Shop,Japanese Restaurant,Restaurant,Bakery
4,Christie,Grocery Store,Café,Park,Athletics & Sports,Coffee Shop,Italian Restaurant,Diner,Baby Store,Nightclub,Convenience Store
5,Church and Wellesley,Japanese Restaurant,Coffee Shop,Gay Bar,Sushi Restaurant,Restaurant,Bubble Tea Shop,Gastropub,Mediterranean Restaurant,Nightclub,Men's Store
6,"Commerce Court, Victoria Hotel",Coffee Shop,Café,Hotel,Restaurant,American Restaurant,Bakery,Seafood Restaurant,Italian Restaurant,Deli / Bodega,Gastropub
7,Davisville,Pizza Place,Sandwich Place,Dessert Shop,Café,Coffee Shop,Sushi Restaurant,Italian Restaurant,Greek Restaurant,Farmers Market,Deli / Bodega
8,Davisville North,Sandwich Place,Gym,Park,Clothing Store,Food & Drink Shop,Breakfast Spot,Hotel,Doner Restaurant,Diner,Discount Store
9,"Design Exchange, Toronto Dominion Centre",Coffee Shop,Café,Hotel,Restaurant,American Restaurant,Seafood Restaurant,Gastropub,Deli / Bodega,Italian Restaurant,Pizza Place


#  Cluster Neighborhoods

In [24]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

Create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [55]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = tor_data_filter

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
toronto_merged

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,0,Coffee Shop,Bakery,Pub,Park,Café,Mexican Restaurant,Theater,Breakfast Spot,Bank,Italian Restaurant
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,0,Coffee Shop,Clothing Store,Café,Cosmetics Shop,Middle Eastern Restaurant,Bubble Tea Shop,Pizza Place,Bookstore,Ramen Restaurant,Bakery
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Coffee Shop,Café,Hotel,Restaurant,Cosmetics Shop,Bakery,Breakfast Spot,Gastropub,Pizza Place,Italian Restaurant
19,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Music Venue,Health Food Store,Pub,Deli / Bodega,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,Coffee Shop,Cocktail Bar,Café,Cheese Shop,Beer Bar,Farmers Market,Steakhouse,Seafood Restaurant,Restaurant,Bakery
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0,Coffee Shop,Café,Italian Restaurant,Burger Joint,Indian Restaurant,Bubble Tea Shop,Ice Cream Shop,Japanese Restaurant,Restaurant,Bakery
25,M6G,Downtown Toronto,Christie,43.669542,-79.422564,0,Grocery Store,Café,Park,Athletics & Sports,Coffee Shop,Italian Restaurant,Diner,Baby Store,Nightclub,Convenience Store
30,M5H,Downtown Toronto,"Adelaide, Richmond, King",43.650571,-79.384568,0,Coffee Shop,Café,Steakhouse,Thai Restaurant,American Restaurant,Bakery,Bar,Hotel,Burger Joint,Salad Place
31,M6H,West Toronto,"Dovercourt Village, Dufferin",43.669005,-79.442259,0,Pharmacy,Supermarket,Bakery,Music Venue,Pool,Middle Eastern Restaurant,Café,Discount Store,Brewery,Brazilian Restaurant
36,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752,0,Coffee Shop,Aquarium,Hotel,Café,Italian Restaurant,Scenic Lookout,Fried Chicken Joint,Restaurant,Bakery,Brewery


Visualize the clusters

In [58]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    
    
    
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)

    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examine Clusters

Cluster 1

In [59]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,0,Coffee Shop,Bakery,Pub,Park,Café,Mexican Restaurant,Theater,Breakfast Spot,Bank,Italian Restaurant
9,Downtown Toronto,0,Coffee Shop,Clothing Store,Café,Cosmetics Shop,Middle Eastern Restaurant,Bubble Tea Shop,Pizza Place,Bookstore,Ramen Restaurant,Bakery
15,Downtown Toronto,0,Coffee Shop,Café,Hotel,Restaurant,Cosmetics Shop,Bakery,Breakfast Spot,Gastropub,Pizza Place,Italian Restaurant
19,East Toronto,0,Music Venue,Health Food Store,Pub,Deli / Bodega,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant
20,Downtown Toronto,0,Coffee Shop,Cocktail Bar,Café,Cheese Shop,Beer Bar,Farmers Market,Steakhouse,Seafood Restaurant,Restaurant,Bakery
24,Downtown Toronto,0,Coffee Shop,Café,Italian Restaurant,Burger Joint,Indian Restaurant,Bubble Tea Shop,Ice Cream Shop,Japanese Restaurant,Restaurant,Bakery
25,Downtown Toronto,0,Grocery Store,Café,Park,Athletics & Sports,Coffee Shop,Italian Restaurant,Diner,Baby Store,Nightclub,Convenience Store
30,Downtown Toronto,0,Coffee Shop,Café,Steakhouse,Thai Restaurant,American Restaurant,Bakery,Bar,Hotel,Burger Joint,Salad Place
31,West Toronto,0,Pharmacy,Supermarket,Bakery,Music Venue,Pool,Middle Eastern Restaurant,Café,Discount Store,Brewery,Brazilian Restaurant
36,Downtown Toronto,0,Coffee Shop,Aquarium,Hotel,Café,Italian Restaurant,Scenic Lookout,Fried Chicken Joint,Restaurant,Bakery,Brewery


Cluster 2

In [60]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
62,Central Toronto,1,Garden,Women's Store,Dessert Shop,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


Cluster 3

In [61]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
91,Downtown Toronto,2,Park,Playground,Trail,Women's Store,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


Cluster 4

In [62]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
61,Central Toronto,3,Bus Line,Park,Construction & Landscaping,Swim School,Women's Store,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


Cluster 5

In [64]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
83,Central Toronto,4,Playground,Tennis Court,Trail,Women's Store,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Donut Shop


Cluster 1: Is the busiest areas where people spendmost of their days.
Cluster 2,3,4,5: Is more residential area.

Cluster 1: Most common venue in the areas are coffee shops. 