# Segmenting and Clustering Neighborhoods in Toronto

## part 1

## Dataframe of the postal code of each neighborhood

Import the required packages for retriving the data

In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

Extracting the data from the data source 

In [2]:
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(source.content,'lxml')

select table by its html tag and css class name

In [3]:
table = soup.find_all('table', class_="wikitable sortable")[0]

Create dataframe by reading the table

In [4]:
df = pd.read_html(str(table))[0]
df.head()

Unnamed: 0,0,1,2
0,Postcode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village


Set first row to be the headers

In [5]:
headers = df.iloc[0]
df = pd.DataFrame(df.values[1:], columns=headers)
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


Ignoring not assigned borough cells

In [6]:
df = df[df.Borough != 'Not assigned']
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


Group based on postal code, combine neighborhoods in one cell

In [7]:
df = df.groupby('Postcode').agg({'Postcode':'first', 'Borough':'first', 'Neighbourhood':', '.join})

In [8]:
df.head()

Unnamed: 0_level_0,Postcode,Borough,Neighbourhood
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
M1B,M1B,Scarborough,"Rouge, Malvern"
M1C,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
M1E,M1E,Scarborough,"Guildwood, Morningside, West Hill"
M1G,M1G,Scarborough,Woburn
M1H,M1H,Scarborough,Cedarbrae


Replace not assigned neighborhood with borough name

In [9]:
df['Neighbourhood'] = np.where(df['Neighbourhood'] == 'Not assigned', df['Borough'], df['Neighbourhood'])

Reset the index from 0

In [10]:
df = df.reset_index(drop=True)
df.head(12)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


Verify is there any record with neighbourhood as not assigned, it's expected to be empty.

In [11]:
df.query("Neighbourhood == 'Not assigned'")

Unnamed: 0,Postcode,Borough,Neighbourhood


verify a known 'Not assigned' Neighborhood case, it should be equal to Borough.

In [12]:
df.query("Postcode == 'M7A'") 

Unnamed: 0,Postcode,Borough,Neighbourhood
85,M7A,Queen's Park,Queen's Park


check number of rows in dataframe using 'shape'

In [13]:
df.shape

(103, 3)

# PART 2

## Latitude and the longitude coordinates of each neighborhood

Read the Geospatial_Coordinates from the csv file provided

In [14]:
df_geocode = pd.read_csv('Geospatial_Coordinates.csv')
df_geocode.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [15]:
df_geocode.shape

(103, 3)

Concatinate the lat long with the original dataframe

In [16]:
neigh_hoods = pd.concat([df,df_geocode], axis=1)
neigh_hoods = neigh_hoods.drop(['Postal Code'], axis=1) 

Display the head of the concatinated dataframe

In [17]:
neigh_hoods.head(12)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [18]:
neigh_hoods.shape

(103, 5)

# Part 3

## Neighborhoods Clustering

#### Select Toronto boroughs with word "Toronto" in the name.

In [19]:
neigh_hoods = neigh_hoods[neigh_hoods['Borough'].str.contains('Toronto')].reset_index(drop=True)
print(neigh_hoods.shape)
neigh_hoods.head(10)

(38, 5)


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
5,M4P,Central Toronto,Davisville North,43.712751,-79.390197
6,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
7,M4S,Central Toronto,Davisville,43.704324,-79.38879
8,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
9,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",43.686412,-79.400049


#### Build Toronto map including markers for boroughs.

In [20]:
from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim
import folium
import os
import requests
import json
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors

## 1. Creating Tornoto map

In [21]:
address = 'Toronto'
geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of {} is {}, {}.'.format(address, latitude, longitude))

  


The geograpical coordinate of Toronto is 43.653963, -79.387207.


In [22]:
toronto_map = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(neigh_hoods['Latitude'], neigh_hoods['Longitude'], \
                                           neigh_hoods['Borough'], neigh_hoods['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(toronto_map)  
    
toronto_map

In [42]:
# Foursquare ID and Secret masked as xxx for security purpose
CLIENT_ID = '0QDMBVLYECNIBYPUI3CMWTUKWB1VLP4HJOBVA3IIGZZMT31H'
CLIENT_SECRET = 'UNVEKYYZWINMOFN3GYDKFFLCYXVZ1DVHNLXLRFNVLSJZN5JA'
VERSION = '20180605' # Foursquare API version

print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

CLIENT_ID: 0QDMBVLYECNIBYPUI3CMWTUKWB1VLP4HJOBVA3IIGZZMT31H
CLIENT_SECRET:UNVEKYYZWINMOFN3GYDKFFLCYXVZ1DVHNLXLRFNVLSJZN5JA


Get category type

In [24]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

### Creating Dataframe with Foursquare data

In [25]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
        
        LIMIT = 100 
        radius = 500    
        
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        try:
            results = requests.get(url).json()["response"]['groups'][0]['items']
        except:
            print("ERROR: ", url)

        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [26]:
# Get venues for every neighborhood
toronto_venues = getNearbyVenues(names=neigh_hoods['Neighbourhood'],
                                   latitudes=neigh_hoods['Latitude'],
                                   longitudes=neigh_hoods['Longitude']
                                  )

In [27]:
print(toronto_venues.shape)
toronto_venues.head()

(1697, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
1,The Beaches,43.676357,-79.293031,Starbucks,43.678798,-79.298045,Coffee Shop
2,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
3,The Beaches,43.676357,-79.293031,Seaspray Restaurant,43.678888,-79.298167,Asian Restaurant
4,"The Danforth West, Riverdale",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant


In [28]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 238 uniques categories.


### 2. Processing data for clustering

### one-hot encoding

In [29]:
# Build venue categories dataframe
# Group by neighborhood and calculate mean value for each

toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 
toronto_onehot.head()

Unnamed: 0,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio,Neighbourhood
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,The Beaches
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,The Beaches
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,The Beaches
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,The Beaches
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"The Danforth West, Riverdale"


In [30]:

# add neighborhood column back to dataframe and move column to the first column
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 
col_index = toronto_onehot.columns.tolist().index('Neighbourhood')
col_order = [toronto_onehot.columns[col_index]] \
                + list(toronto_onehot.columns[0:col_index]) \
                + list(toronto_onehot.columns[col_index+1:])
toronto_onehot = toronto_onehot[col_order]
print("categories dataset shape {}".format(toronto_onehot.shape))
toronto_onehot.head()

toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
print("categories grouped by neighbourhood shape {}".format(toronto_grouped.shape))
toronto_grouped.head(10)

categories dataset shape (1697, 239)
categories grouped by neighbourhood shape (38, 239)


Unnamed: 0,Neighbourhood,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,...,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.0,0.071429,0.071429,0.071429,0.142857,0.142857,0.142857,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012048,...,0.0,0.0,0.0,0.012048,0.0,0.0,0.012048,0.0,0.0,0.012048
7,"Chinatown, Grange Park, Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.05,0.0,0.05,0.01,0.0,0.0,0.0
8,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Church and Wellesley,0.011628,0.011628,0.0,0.0,0.0,0.0,0.0,0.0,0.011628,...,0.0,0.0,0.0,0.0,0.011628,0.011628,0.0,0.011628,0.0,0.011628


## Dataframe containing 10 venues for each neighborhood

In [31]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [32]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

print(neighbourhoods_venues_sorted.shape)
neighbourhoods_venues_sorted.head(10)

(38, 11)


Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Steakhouse,Thai Restaurant,American Restaurant,Clothing Store,Gym,Hotel,Bakery,Bar
1,Berczy Park,Coffee Shop,Restaurant,Cocktail Bar,Seafood Restaurant,Café,Bakery,Italian Restaurant,Beer Bar,Steakhouse,Cheese Shop
2,"Brockton, Exhibition Place, Parkdale Village",Breakfast Spot,Coffee Shop,Café,Performing Arts Venue,Burrito Place,Stadium,Bar,Caribbean Restaurant,Furniture / Home Store,Climbing Gym
3,Business Reply Mail Processing Centre 969 Eastern,Light Rail Station,Yoga Studio,Auto Workshop,Garden Center,Garden,Fast Food Restaurant,Farmers Market,Park,Comic Shop,Recording Studio
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Service,Airport Terminal,Airport Lounge,Plane,Sculpture Garden,Boutique,Harbor / Marina,Boat or Ferry,Airport Gate,Airport Food Court
5,"Cabbagetown, St. James Town",Coffee Shop,Restaurant,Park,Bakery,Italian Restaurant,Café,Pub,Pizza Place,Pharmacy,Breakfast Spot
6,Central Bay Street,Coffee Shop,Café,Italian Restaurant,Burger Joint,Bar,Thai Restaurant,Bubble Tea Shop,Salad Place,Chinese Restaurant,Sandwich Place
7,"Chinatown, Grange Park, Kensington Market",Café,Bar,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Coffee Shop,Bakery,Dumpling Restaurant,Chinese Restaurant,Mexican Restaurant,Gaming Cafe
8,Christie,Café,Grocery Store,Park,Convenience Store,Nightclub,Restaurant,Athletics & Sports,Diner,Italian Restaurant,Baby Store
9,Church and Wellesley,Japanese Restaurant,Sushi Restaurant,Coffee Shop,Gay Bar,Restaurant,Burger Joint,Café,Gastropub,Fast Food Restaurant,Mediterranean Restaurant


## Creating clusters

In [33]:
# Calculate clustering using k-means algorithm
kclusters = 5
toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)
kmeans.labels_

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       2, 0, 4, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [34]:
# Build cluster dataset and plot the map

toronto_merged = neigh_hoods
# add clustering labels
toronto_merged['Cluster Labels'] = kmeans.labels_
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

print(toronto_merged.shape)
toronto_merged.head(10)

(38, 16)


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Pub,Coffee Shop,Neighborhood,Asian Restaurant,Fast Food Restaurant,Farmers Market,Filipino Restaurant,Falafel Restaurant,Fish & Chips Shop,Event Space
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,0,Greek Restaurant,Coffee Shop,Ice Cream Shop,Italian Restaurant,Bookstore,Brewery,Fruit & Vegetable Store,Bakery,Juice Bar,Spa
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572,0,Park,Gym,Ice Cream Shop,Movie Theater,Italian Restaurant,Steakhouse,Sandwich Place,Pub,Burrito Place,Burger Joint
3,M4M,East Toronto,Studio District,43.659526,-79.340923,0,Café,Coffee Shop,Italian Restaurant,Bakery,American Restaurant,Yoga Studio,Park,Seafood Restaurant,Sandwich Place,Cheese Shop
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,0,Park,Dim Sum Restaurant,Swim School,Bus Line,Yoga Studio,Donut Shop,Fish & Chips Shop,Filipino Restaurant,Fast Food Restaurant,Farmers Market
5,M4P,Central Toronto,Davisville North,43.712751,-79.390197,0,Food & Drink Shop,Hotel,Burger Joint,Park,Gym,Sandwich Place,Breakfast Spot,Farmers Market,Donut Shop,Falafel Restaurant
6,M4R,Central Toronto,North Toronto West,43.715383,-79.405678,0,Coffee Shop,Clothing Store,Sporting Goods Shop,Yoga Studio,Bagel Shop,Dessert Shop,Chinese Restaurant,Diner,Rental Car Location,Salon / Barbershop
7,M4S,Central Toronto,Davisville,43.704324,-79.38879,0,Pizza Place,Sandwich Place,Dessert Shop,Italian Restaurant,Café,Sushi Restaurant,Coffee Shop,Thai Restaurant,Seafood Restaurant,Toy / Game Store
8,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,0,Playground,Tennis Court,Gym,Yoga Studio,Electronics Store,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Event Space
9,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",43.686412,-79.400049,0,Pub,Coffee Shop,Pizza Place,Light Rail Station,Supermarket,Bagel Shop,Sports Bar,Sushi Restaurant,American Restaurant,Vietnamese Restaurant


### visualizing clusters on map

In [35]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], \
                                  toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Analyzing Clusters

### Cluster 1

In [36]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, \
                   toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East Toronto,0,Pub,Coffee Shop,Neighborhood,Asian Restaurant,Fast Food Restaurant,Farmers Market,Filipino Restaurant,Falafel Restaurant,Fish & Chips Shop,Event Space
1,East Toronto,0,Greek Restaurant,Coffee Shop,Ice Cream Shop,Italian Restaurant,Bookstore,Brewery,Fruit & Vegetable Store,Bakery,Juice Bar,Spa
2,East Toronto,0,Park,Gym,Ice Cream Shop,Movie Theater,Italian Restaurant,Steakhouse,Sandwich Place,Pub,Burrito Place,Burger Joint
3,East Toronto,0,Café,Coffee Shop,Italian Restaurant,Bakery,American Restaurant,Yoga Studio,Park,Seafood Restaurant,Sandwich Place,Cheese Shop
4,Central Toronto,0,Park,Dim Sum Restaurant,Swim School,Bus Line,Yoga Studio,Donut Shop,Fish & Chips Shop,Filipino Restaurant,Fast Food Restaurant,Farmers Market
5,Central Toronto,0,Food & Drink Shop,Hotel,Burger Joint,Park,Gym,Sandwich Place,Breakfast Spot,Farmers Market,Donut Shop,Falafel Restaurant
6,Central Toronto,0,Coffee Shop,Clothing Store,Sporting Goods Shop,Yoga Studio,Bagel Shop,Dessert Shop,Chinese Restaurant,Diner,Rental Car Location,Salon / Barbershop
7,Central Toronto,0,Pizza Place,Sandwich Place,Dessert Shop,Italian Restaurant,Café,Sushi Restaurant,Coffee Shop,Thai Restaurant,Seafood Restaurant,Toy / Game Store
8,Central Toronto,0,Playground,Tennis Court,Gym,Yoga Studio,Electronics Store,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Event Space
9,Central Toronto,0,Pub,Coffee Shop,Pizza Place,Light Rail Station,Supermarket,Bagel Shop,Sports Bar,Sushi Restaurant,American Restaurant,Vietnamese Restaurant


### Cluster 2

In [37]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, \
                   toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,Downtown Toronto,1,Coffee Shop,Café,Italian Restaurant,Burger Joint,Bar,Thai Restaurant,Bubble Tea Shop,Salad Place,Chinese Restaurant,Sandwich Place
27,Downtown Toronto,1,Airport Service,Airport Terminal,Airport Lounge,Plane,Sculpture Garden,Boutique,Harbor / Marina,Boat or Ferry,Airport Gate,Airport Food Court


### Cluster 3

In [38]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, \
                   toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,Central Toronto,2,Garden,Music Venue,Yoga Studio,Flea Market,Fish & Chips Shop,Filipino Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space


### Cluster 4

In [39]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, \
                   toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
28,Downtown Toronto,3,Coffee Shop,Restaurant,Café,Cocktail Bar,Seafood Restaurant,Pub,Hotel,Beer Bar,Italian Restaurant,Fast Food Restaurant


### Cluster 5

In [40]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, \
                   toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
24,Central Toronto,4,Coffee Shop,Sandwich Place,Café,Pizza Place,Cosmetics Shop,Burger Joint,Park,Flower Shop,Liquor Store,Jewish Restaurant
