# Segmenting and Clustering Neighbourhoods in Toronto

## Imports and installs

In [1]:
import numpy as np
import pandas as pd

!conda install -c conda-forge geocoder --yes 
!conda install -c conda-forge geopy --yes
!conda install -c conda-forge tqdm --yes
!conda install -c conda-forge folium=0.5.0 --yes


Collecting package metadata: done
Solving environment: done

# All requested packages already installed.

Collecting package metadata: done
Solving environment: done

# All requested packages already installed.

Collecting package metadata: done
Solving environment: done

# All requested packages already installed.

Collecting package metadata: done
Solving environment: done

# All requested packages already installed.



## Scrape the Wikipedia page

In [2]:
wiki = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
wiki[0]

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


## Create the frame

In [3]:
#The dataframe will consist of three columns: PostalCode, Borough, and Neighbourhood
df = pd.DataFrame(wiki[0])
df.rename(index=str, columns={'Postcode': 'PostalCode'}, inplace=True)
print(df.head())
print(df.shape)

  PostalCode           Borough     Neighbourhood
0        M1A      Not assigned      Not assigned
1        M2A      Not assigned      Not assigned
2        M3A        North York         Parkwoods
3        M4A        North York  Victoria Village
4        M5A  Downtown Toronto      Harbourfront
(289, 3)


## Clean the frame

### Ignore cells with a borough that is Not assigned.

In [4]:
df.replace({'Borough': 'Not assigned' }, 
           np.nan,
           inplace = True)
df.dropna(subset=["Borough"],
          inplace=True)
print(df.head())
print(df.shape)

  PostalCode           Borough     Neighbourhood
2        M3A        North York         Parkwoods
3        M4A        North York  Victoria Village
4        M5A  Downtown Toronto      Harbourfront
5        M5A  Downtown Toronto       Regent Park
6        M6A        North York  Lawrence Heights
(212, 3)


### If not assigned neighbourhood, then the neighbourhood will be the same as the borough

In [5]:
df['Neighbourhood'] = np.where(df['Neighbourhood'] == 'Not assigned',
                               df['Borough'], 
                               df['Neighbourhood'])
assert(df.loc[df['Neighbourhood'] == 'Not assigned'].shape == (0,3))

### Merge neighbourhoods in one postal code area.

In [6]:
df = df.groupby(['PostalCode','Borough'])['Neighbourhood'].apply(', '.join).reset_index()
print(df.head())
print(df.shape)

  PostalCode      Borough                           Neighbourhood
0        M1B  Scarborough                          Rouge, Malvern
1        M1C  Scarborough  Highland Creek, Rouge Hill, Port Union
2        M1E  Scarborough       Guildwood, Morningside, West Hill
3        M1G  Scarborough                                  Woburn
4        M1H  Scarborough                               Cedarbrae
(103, 3)


## Adding Latitude and Longitude

In [7]:
# import geocoder # import geocoder
# import geopy
# from geopy.geocoders import Nominatim
# from geopy.extra.rate_limiter import RateLimiter
# from geopy.geocoders import Nominatim
# from tqdm import tqdm

# geopy.geocoders.options.default_user_agent = 'my_app/1'
# geopy.geocoders.options.default_timeout = 7
# geolocator = Nominatim()

# df['search_string'] = df['Borough'].str.cat(df[['Neighbourhood']], sep=', ')
# df['search_string'] = df['search_string'].astype(str) + ', Toronto, Ontario, Canada'

# geocode = RateLimiter(geolocator.geocode, min_delay_seconds = 1)

# tqdm.pandas()
# df['location'] = df['Neighbourhood'].apply(geocode)
# df['point'] = df['location'].apply(lambda loc: tuple(loc.point) if loc else None)
# print(df.head())
# df.drop(['search_string', 'location', 'point'], axis=1, inplace = True)

Given that Geocoder is unreliable and we have lots of None and locations form other countries, we are going to use https://cocl.us/Geospatial_data and merge it to our data frame

In [8]:
gf = pd.read_csv('https://cocl.us/Geospatial_data')
gf.rename(index=str, columns={'Postal Code': 'PostalCode'}, inplace=True)
df = pd.merge(df, gf, how='inner', on=['PostalCode'])
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


## Clustering neighbourhoods

Create a map of Toronto with neighborhoods superimposed on top.

In [9]:
latitude = df["Latitude"].mean()
longitude = df["Longitude"].mean()

import folium
tmap = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
folium.CircleMarker(
    [latitude, longitude],
    radius=5,
    popup='Toronto',
    color='red',
    fill=True,
    fill_color='red',
    fill_opacity=0.6,
    parse_html=False).add_to(tmap) 

  # add markers to map
for lat, lng, borough, neighbourhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(tmap)
    
tmap

Add the parameters for the Foursquare API

In [10]:
CLIENT_ID = '4UDQN1GGFP32A2SVSR1LZ5KXYRQO2YZ3FZPVF3FOO03YGVRO' # your Foursquare ID
CLIENT_SECRET = 'DDITPF41OI5SYMM5LGXJT3KXKPHWF1UZTCAIURMIIJ3DNSYM' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

In [11]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [12]:
#df['fs_url'] = df[['Latitude','Longitude']].apply(lambda x: retrieve_data(x['Latitude'], x['Longitude']), axis=1)
# dd['json'] = df['fs_url'].apply(lambda url: get_json_from_url(url))


Let's look at the Neighbourhoods in the center of Toronto, on a map.

In [13]:
central = df[df['Borough'].str.contains('Toronto')]
latitude = central["Latitude"].mean()
longitude = central["Longitude"].mean()

cmap = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(central['Latitude'], central['Longitude'], central['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(cmap)  
    
cmap

We retrieve the venues for these neighbourhoods

In [14]:
import requests
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

central_venues = getNearbyVenues(names=central['Neighbourhood'],
                                   latitudes=central['Latitude'],
                                   longitudes=central['Longitude']
                                  )

The Beaches
The Danforth West, Riverdale
The Beaches West, India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park, Summerhill East
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront, Regent Park
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North, Forest Hill West
The Annex, North Midtown, Yorkville
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place, Underground city
Christie
Dovercourt Village, Dufferin
Little Portugal, Trinity
Brockton, Exhibition Place, Parkdale Village
High Park, The 

Let's see how many venues do we have

In [15]:
central_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Berczy Park,57,57,57,57,57,57
"Brockton, Exhibition Place, Parkdale Village",20,20,20,20,20,20
Business Reply Mail Processing Centre 969 Eastern,20,20,20,20,20,20
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",14,14,14,14,14,14
"Cabbagetown, St. James Town",46,46,46,46,46,46
Central Bay Street,81,81,81,81,81,81
"Chinatown, Grange Park, Kensington Market",100,100,100,100,100,100
Christie,16,16,16,16,16,16
Church and Wellesley,85,85,85,85,85,85


Let's find out how many unique categories can be curated from all the returned venues

In [16]:
print('There are {} uniques categories.'.format(len(central_venues['Venue Category'].unique())))

There are 234 uniques categories.


## Analyze Each Central Neighborhood

In [17]:
# one hot encoding
central_onehot = pd.get_dummies(central_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighbourhood column back to dataframe
central_onehot['Neighbourhood'] = central_venues['Neighbourhood'] 

# move neighbourhood column to the first column
fixed_columns = [central_onehot.columns[-1]] + list(central_onehot.columns[:-1])
central_onehot = central_onehot[fixed_columns]

central_onehot.head()



Unnamed: 0,Neighbourhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Let's examine the new dataframe size.

In [18]:
central_onehot.shape

(1693, 235)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [19]:
central_grouped = central_onehot.groupby('Neighbourhood').mean().reset_index()
central_grouped

Unnamed: 0,Neighbourhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,...,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.071429,0.071429,0.071429,0.142857,0.142857,0.142857,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,...,0.0,0.0,0.012346,0.0,0.0,0.012346,0.0,0.0,0.0,0.012346
7,"Chinatown, Grange Park, Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.05,0.0,0.04,0.01,0.0,0.0,0.01,0.0
8,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Church and Wellesley,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,...,0.0,0.0,0.0,0.011765,0.011765,0.0,0.011765,0.011765,0.0,0.011765


Let's confirm the new size

In [20]:
central_grouped.shape

(38, 235)

#### Let's print each neighborhood along with the top 5 most common venues

In [21]:
num_top_venues = 5

for hood in central_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = central_grouped[central_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
             venue  freq
0      Coffee Shop  0.06
1             Café  0.05
2  Thai Restaurant  0.04
3       Steakhouse  0.04
4              Bar  0.04


----Berczy Park----
                venue  freq
0         Coffee Shop  0.07
1        Cocktail Bar  0.05
2              Bakery  0.04
3         Cheese Shop  0.04
4  Seafood Restaurant  0.04


----Brockton, Exhibition Place, Parkdale Village----
            venue  freq
0  Breakfast Spot  0.10
1     Coffee Shop  0.10
2            Café  0.10
3   Grocery Store  0.05
4             Bar  0.05


----Business Reply Mail Processing Centre 969 Eastern----
                venue  freq
0  Light Rail Station  0.10
1       Auto Workshop  0.05
2             Brewery  0.05
3                 Spa  0.05
4         Pizza Place  0.05


----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----
                venue  freq
0      Airport Lounge  0.14
1     Airport Service  0.14

Let's create the new dataframe and display the top 10 venues for each neighborhood.

In [22]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = central_grouped['Neighbourhood']

for ind in np.arange(central_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(central_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Bar,Thai Restaurant,Steakhouse,American Restaurant,Bakery,Burger Joint,Restaurant,Asian Restaurant
1,Berczy Park,Coffee Shop,Cocktail Bar,Italian Restaurant,Restaurant,Farmers Market,Seafood Restaurant,Cheese Shop,Café,Bakery,Steakhouse
2,"Brockton, Exhibition Place, Parkdale Village",Breakfast Spot,Café,Coffee Shop,Climbing Gym,Falafel Restaurant,Convenience Store,Burrito Place,Stadium,Caribbean Restaurant,Bar
3,Business Reply Mail Processing Centre 969 Eastern,Light Rail Station,Yoga Studio,Park,Brewery,Skate Park,Burrito Place,Butcher,Restaurant,Recording Studio,Comic Shop
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Terminal,Airport Lounge,Airport Service,Plane,Sculpture Garden,Boutique,Boat or Ferry,Harbor / Marina,Airport Food Court,Airport
5,"Cabbagetown, St. James Town",Coffee Shop,Restaurant,Café,Bakery,Pizza Place,Italian Restaurant,Pub,Beer Store,Bank,Indian Restaurant
6,Central Bay Street,Coffee Shop,Café,Italian Restaurant,Bar,Bubble Tea Shop,Burger Joint,Thai Restaurant,Spa,Sandwich Place,Salad Place
7,"Chinatown, Grange Park, Kensington Market",Café,Bar,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Chinese Restaurant,Bakery,Coffee Shop,Caribbean Restaurant,Mexican Restaurant,Gaming Cafe
8,Christie,Café,Grocery Store,Park,Restaurant,Convenience Store,Baby Store,Italian Restaurant,Athletics & Sports,Coffee Shop,Diner
9,Church and Wellesley,Coffee Shop,Japanese Restaurant,Gay Bar,Sushi Restaurant,Restaurant,Burger Joint,Dance Studio,Gastropub,Mediterranean Restaurant,Men's Store


## Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 5 clusters.

In [23]:
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

grouped_for_clustering = central_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(grouped_for_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [24]:
# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

central_merged = central
central_merged = central_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')
central_merged.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,M4E,East Toronto,The Beaches,43.676357,-79.293031,4,Health Food Store,Coffee Shop,Pub,Neighborhood,Costume Shop,Coworking Space,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,0,Greek Restaurant,Coffee Shop,Ice Cream Shop,Bookstore,Italian Restaurant,Cosmetics Shop,Brewery,Bubble Tea Shop,Restaurant,Caribbean Restaurant
42,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572,0,Park,Liquor Store,Fish & Chips Shop,Italian Restaurant,Steakhouse,Brewery,Movie Theater,Ice Cream Shop,Pub,Sandwich Place
43,M4M,East Toronto,Studio District,43.659526,-79.340923,0,Café,Coffee Shop,Gastropub,Italian Restaurant,American Restaurant,Bakery,Yoga Studio,Convenience Store,Seafood Restaurant,Sandwich Place
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,3,Park,Swim School,Bus Line,Yoga Studio,Discount Store,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant


Finally, let's visualize the resulting clusters

In [25]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(central_merged['Latitude'], central_merged['Longitude'], central_merged['Neighbourhood'], central_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Clusters

In [26]:
central_merged.loc[central_merged['Cluster Labels'] == 0, central_merged.columns[[1] + list(range(5, central_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
41,East Toronto,0,Greek Restaurant,Coffee Shop,Ice Cream Shop,Bookstore,Italian Restaurant,Cosmetics Shop,Brewery,Bubble Tea Shop,Restaurant,Caribbean Restaurant
42,East Toronto,0,Park,Liquor Store,Fish & Chips Shop,Italian Restaurant,Steakhouse,Brewery,Movie Theater,Ice Cream Shop,Pub,Sandwich Place
43,East Toronto,0,Café,Coffee Shop,Gastropub,Italian Restaurant,American Restaurant,Bakery,Yoga Studio,Convenience Store,Seafood Restaurant,Sandwich Place
45,Central Toronto,0,Burger Joint,Hotel,Dog Run,Food & Drink Shop,Clothing Store,Sandwich Place,Park,Breakfast Spot,Gym,Electronics Store
46,Central Toronto,0,Clothing Store,Sporting Goods Shop,Coffee Shop,Yoga Studio,Gift Shop,Ice Cream Shop,Fast Food Restaurant,Mexican Restaurant,Diner,Dessert Shop
47,Central Toronto,0,Pizza Place,Sandwich Place,Dessert Shop,Italian Restaurant,Café,Restaurant,Sushi Restaurant,Coffee Shop,Pharmacy,Seafood Restaurant
49,Central Toronto,0,Pub,Coffee Shop,Pizza Place,Light Rail Station,Sports Bar,Sandwich Place,Supermarket,Sushi Restaurant,Bagel Shop,Fried Chicken Joint
51,Downtown Toronto,0,Coffee Shop,Restaurant,Café,Bakery,Pizza Place,Italian Restaurant,Pub,Beer Store,Bank,Indian Restaurant
52,Downtown Toronto,0,Coffee Shop,Japanese Restaurant,Gay Bar,Sushi Restaurant,Restaurant,Burger Joint,Dance Studio,Gastropub,Mediterranean Restaurant,Men's Store
53,Downtown Toronto,0,Coffee Shop,Café,Bakery,Park,Pub,Theater,Breakfast Spot,Restaurant,Mexican Restaurant,Bank


In [27]:
central_merged.loc[central_merged['Cluster Labels'] == 1, central_merged.columns[[1] + list(range(5, central_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
63,Central Toronto,1,Garden,Yoga Studio,Discount Store,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


In [28]:
central_merged.loc[central_merged['Cluster Labels'] == 2, central_merged.columns[[1] + list(range(5, central_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
48,Central Toronto,2,Playground,Restaurant,Tennis Court,Yoga Studio,Dim Sum Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


In [29]:
central_merged.loc[central_merged['Cluster Labels'] == 3, central_merged.columns[[1] + list(range(5, central_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
44,Central Toronto,3,Park,Swim School,Bus Line,Yoga Studio,Discount Store,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
50,Downtown Toronto,3,Park,Playground,Trail,Yoga Studio,Dim Sum Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
64,Central Toronto,3,Trail,Park,Jewelry Store,Bus Line,Sushi Restaurant,Yoga Studio,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant


In [30]:
central_merged.loc[central_merged['Cluster Labels'] == 4, central_merged.columns[[1] + list(range(5, central_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,East Toronto,4,Health Food Store,Coffee Shop,Pub,Neighborhood,Costume Shop,Coworking Space,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store
