# Toronto Neighbourhood Clustering 

### Step 1: Import Libaries

In [1]:
import numpy as np 
import pandas as pd 
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim 
import requests 
from pandas.io.json import json_normalize 

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium 
print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    certifi-2019.11.28         |           py36_0         149 KB  conda-forge
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    geopy-1.21.0               |             py_0          58 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0         conda-forge
    geopy:           1.21.0-py_0       conda-forge

The following packages will be UPDATED:

    ca-

## Step 2: Scrape Data from Wikipedia Site

### Use Beautiful Soup to extract the table

In [2]:
from bs4 import BeautifulSoup
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
source = requests.get(url).text
soup = BeautifulSoup(source)


In [3]:
table_data = soup.find('div', class_='mw-parser-output')
table = table_data.table.tbody

columns = ['PostalCode', 'Borough', 'Neighbourhood']
data = dict({key:[]*len(columns) for key in columns})

for row in table.find_all('tr'):
    for i,column in zip(row.find_all('td'),columns):
        i = i.text
        i = i.replace('\n', '')
        data[column].append(i)

df = pd.DataFrame.from_dict(data=data)[columns]
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [4]:
print("shape:", df.shape)

shape: (287, 3)


## Step 3: Cleaning Data

Remove entries in the table where borough is not assigned

In [5]:
df = df[df['Borough'] != 'Not assigned'].reset_index(drop = True)
df.head(10)


Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor
5,M7A,Downtown Toronto,Queen's Park
6,M9A,Queen's Park,Not assigned
7,M1B,Scarborough,Rouge
8,M1B,Scarborough,Malvern
9,M3B,North York,Don Mills North


Rename neighbourhood to borough name if neighboorhood is not assigned

In [6]:
#create new dataframe
#if neigh == 'Not assigned' assign borough name
post, bor, ne = [], [], []
for postcode, borough, neigh in zip(df['PostalCode'], df['Borough'], df['Neighbourhood']):
    post.append(postcode)
    bor.append(borough)
    if neigh == 'Not assigned':
        ne.append(borough)
    else:
        ne.append(neigh)

df = pd.DataFrame({'PostalCode': post, 'Borough': bor, 'Neighbourhood':ne})[columns]
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor


If there is the same postcode and borough but multiple neighbourhoods, combine neighbourhoods and separate with a comma



In [9]:
df2 = df.groupby(['PostalCode', 'Borough'])['Neighbourhood'].apply(', '.join).reset_index(drop=False)
#create new dataframe

print("shape of new df:", df2.shape)
df2.head(10)

shape of new df: (103, 3)


Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


## Step 4: Add Latitude and Longitude to the dataframe

In [10]:
#use CSV file
url = 'http://cocl.us/Geospatial_data'
df3 = pd.read_csv(url)

df4 = pd.DataFrame({'PostalCode':df2['PostalCode'], 
                            'Borough':df2['Borough'], 
                            'Neighbourhood':df2['Neighbourhood'], 
                            'Latitude':df3['Latitude'], 
                            'Longitude':df3['Longitude']})


print("Postal Codes: {} Boroughs: {}  Neighbourhoods: {} ".format(len(df4.PostalCode.unique()), len(df4.Borough.unique()), len(df4.Neighbourhood.unique())))

Postal Codes: 103 Boroughs: 11  Neighbourhoods: 102 


In [11]:
df4.head(10)


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [12]:
print(df4.shape)

(103, 5)


## Step 5: Cluster the Neighbourhoods


In [13]:
address = 'Toronto, Ontario'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('Coordinates of Toronto are {}, {}. '.format(latitude, longitude))

Coordinates of Toronto are 43.653963, -79.387207. 


In [14]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)
for lat, lng, borough, neighbourhood in zip(df4['Latitude'], df4['Longitude'], df4['Borough'], df4['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.5,
        parse_html=False).add_to(map_toronto)
    

In [15]:
map_toronto

Define Foursquare credentials

In [18]:
CLIENT_ID = '1JR4XCEU2NTPIM4WWUQCKMM2KQYP2N0ZP1IZ1SFKALSC2EAU'
CLIENT_SECRET = 'LJY0TOAEQ0XFUUQMW2EDKJUSYVVKWNPN1NOKU0LUTCVMUPLS'
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 1JR4XCEU2NTPIM4WWUQCKMM2KQYP2N0ZP1IZ1SFKALSC2EAU
CLIENT_SECRET:LJY0TOAEQ0XFUUQMW2EDKJUSYVVKWNPN1NOKU0LUTCVMUPLS


Exploring Toronto Neighbourhoods
Create Dataframe for each part of Toronto

In [23]:
downtown_toronto = df4[df4['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
east_toronto = df4[df4['Borough'] == 'East Toronto'].reset_index(drop=True)
west_toronto = df4[df4['Borough'] == 'West Toronto'].reset_index(drop=True)
cent_toronto = df4[df4['Borough'] == 'Central Toronto'].reset_index(drop=True)

#combine into 1 dataframe
combined = pd.concat([downtown_toronto, east_toronto, west_toronto, cent_toronto], sort=False)
toronto_dataframe = combined.reset_index(drop=True)

print(toronto_dataframe.shape)
toronto_dataframe.head(10)

(39, 5)


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529
1,M4X,Downtown Toronto,"Cabbagetown, St. James Town",43.667967,-79.367675
2,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316
3,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
4,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
5,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
6,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
7,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
8,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.650571,-79.384568
9,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752


Map of Toronto Neighbourhoods

In [32]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# adding markers
for lat, lng, borough, neighbourhood in zip(toronto_dataframe['Latitude'], toronto_dataframe['Longitude'], toronto_dataframe['Borough'], toronto_dataframe['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.5,
        parse_html=False).add_to(map_toronto)
    
map_toronto

In [43]:
client_id = '1JR4XCEU2NTPIM4WWUQCKMM2KQYP2N0ZP1IZ1SFKALSC2EAU'
client_secret = 'LJY0TOAEQ0XFUUQMW2EDKJUSYVVKWNPN1NOKU0LUTCVMUPLS'
version = '20180605' # Foursquare API version
no_of_venues = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(client_id, client_secret, version, rosedale_latitude, rosedale_longitude, radius, no_of_venues)

response = requests.get(url).json()
response

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list = []
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(client_id, client_secret, version, lat, lng, radius, no_of_venues)
        
        results = requests.get(url).json()["response"]['groups'][0]['items']
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'],
            v['venue']['location']['lat'],
            v['venue']['location']['lng'],
            v['venue']['categories'][0]['name']) for v in results])
        
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                            'Neighbourhood Latitude',
                            'Neighbourhood Longitude',
                            'Venue',
                            'Venue Latitude',
                            'Venue Longitude',
                            'Venue Category']
    
    return(nearby_venues)

NameError: name 'rosedale_latitude' is not defined

In [45]:
#reate a new dataframe called for venues
venues = getNearbyVenues(names=toronto_dataframe['Neighbourhood'],
                                 latitudes=toronto_dataframe['Latitude'],
                                 longitudes=toronto_dataframe['Longitude'])


Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place, Underground city
Christie
Queen's Park
The Beaches
The Danforth West, Riverdale
The Beaches West, India Bazaar
Studio District
Business Reply Mail Processing Centre 969 Eastern
Dovercourt Village, Dufferin
Little Portugal, Trinity
Brockton, Exhibition Place, Parkdale Village
High Park, The Junction South
Parkdale, Roncesvalles
Runnymede, Swansea
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park, Summerhill East
Deer Park, Forest Hill SE, Rathnelly, So

In [47]:
#Venues in each neighbourhood
venues.groupby('Neighbourhood')['Venue'].count()


Neighbourhood
Adelaide, King, Richmond                                                                                      100
Berczy Park                                                                                                    55
Brockton, Exhibition Place, Parkdale Village                                                                   21
Business Reply Mail Processing Centre 969 Eastern                                                              16
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara     16
Cabbagetown, St. James Town                                                                                    47
Central Bay Street                                                                                             83
Chinatown, Grange Park, Kensington Market                                                                      87
Christie                                                                  

In [48]:
# one hot encoding
toronto_onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.shape

(1713, 231)

In [62]:
#Group the Neighbourhoods
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
print(toronto_grouped.shape)
toronto_grouped.head(5)
num_top_venues = 5


(39, 231)


In [66]:
for hood in toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
         venue  freq
0  Coffee Shop  0.07
1         Café  0.04
2          Bar  0.04
3   Steakhouse  0.04
4       Bakery  0.03


----Berczy Park----
            venue  freq
0     Coffee Shop  0.09
1    Cocktail Bar  0.05
2      Steakhouse  0.04
3        Beer Bar  0.04
4  Farmers Market  0.04


----Brockton, Exhibition Place, Parkdale Village----
               venue  freq
0     Breakfast Spot  0.10
1               Café  0.10
2        Coffee Shop  0.10
3  Convenience Store  0.05
4          Nightclub  0.05


----Business Reply Mail Processing Centre 969 Eastern----
                  venue  freq
0    Light Rail Station  0.12
1           Pizza Place  0.06
2         Auto Workshop  0.06
3            Smoke Shop  0.06
4  Fast Food Restaurant  0.06


----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----
              venue  freq
0   Airport Service  0.19
1    Airport Lounge  0.12
2  Airport Terminal  0.

In [67]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [69]:
#dataframe for the 10 most common venues
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Steakhouse,Café,Bar,Burger Joint,Bakery,Asian Restaurant,Cosmetics Shop,Thai Restaurant,Restaurant
1,Berczy Park,Coffee Shop,Cocktail Bar,Beer Bar,Farmers Market,Bakery,Steakhouse,Seafood Restaurant,Cheese Shop,Café,Gourmet Shop
2,"Brockton, Exhibition Place, Parkdale Village",Breakfast Spot,Café,Coffee Shop,Nightclub,Stadium,Bar,Bakery,Intersection,Italian Restaurant,Restaurant
3,Business Reply Mail Processing Centre 969 Eastern,Light Rail Station,Park,Auto Workshop,Comic Shop,Pizza Place,Restaurant,Burrito Place,Brewery,Skate Park,Spa
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Service,Airport Lounge,Airport Terminal,Airport,Airport Food Court,Airport Gate,Bar,Boutique,Harbor / Marina,Boat or Ferry


In [92]:
# set number of clusters
kclusters = 4

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [97]:
# add clustering labels
neighborhoods_venues_sorted
toronto_merged = toronto_dataframe

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head(10)

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,kmeans,Clusters,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,1,1,1,Park,Playground,Trail,Cupcake Shop,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store
1,M4X,Downtown Toronto,"Cabbagetown, St. James Town",43.667967,-79.367675,0,0,0,Coffee Shop,Park,Pub,Bakery,Pizza Place,Italian Restaurant,Café,Restaurant,Convenience Store,Japanese Restaurant
2,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,0,0,0,Coffee Shop,Japanese Restaurant,Gay Bar,Sushi Restaurant,Restaurant,Pub,Men's Store,Mediterranean Restaurant,Hotel,Gym
3,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636,0,0,0,Coffee Shop,Pub,Park,Bakery,Café,Breakfast Spot,Mexican Restaurant,Restaurant,Event Space,Shoe Store
4,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,0,0,0,Coffee Shop,Clothing Store,Café,Japanese Restaurant,Cosmetics Shop,Restaurant,Tea Room,Electronics Store,Bakery,Bubble Tea Shop
5,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,0,0,Coffee Shop,Café,Restaurant,Clothing Store,Hotel,Beer Bar,Bakery,Cosmetics Shop,Italian Restaurant,Cocktail Bar
6,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,0,0,Coffee Shop,Cocktail Bar,Beer Bar,Farmers Market,Bakery,Steakhouse,Seafood Restaurant,Cheese Shop,Café,Gourmet Shop
7,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0,0,0,Coffee Shop,Sandwich Place,Italian Restaurant,Ice Cream Shop,Burger Joint,Juice Bar,Japanese Restaurant,Café,Salad Place,Department Store
8,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.650571,-79.384568,0,0,0,Coffee Shop,Steakhouse,Café,Bar,Burger Joint,Bakery,Asian Restaurant,Cosmetics Shop,Thai Restaurant,Restaurant
9,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752,0,0,0,Coffee Shop,Aquarium,Café,Italian Restaurant,Hotel,Scenic Lookout,Restaurant,Fried Chicken Joint,Sporting Goods Shop,Brewery


In [95]:
toronto_merged.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,kmeans,Clusters,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,1,1,1,Park,Playground,Trail,Cupcake Shop,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store
1,M4X,Downtown Toronto,"Cabbagetown, St. James Town",43.667967,-79.367675,0,0,0,Coffee Shop,Park,Pub,Bakery,Pizza Place,Italian Restaurant,Café,Restaurant,Convenience Store,Japanese Restaurant
2,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,0,0,0,Coffee Shop,Japanese Restaurant,Gay Bar,Sushi Restaurant,Restaurant,Pub,Men's Store,Mediterranean Restaurant,Hotel,Gym
3,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636,0,0,0,Coffee Shop,Pub,Park,Bakery,Café,Breakfast Spot,Mexican Restaurant,Restaurant,Event Space,Shoe Store
4,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,0,0,0,Coffee Shop,Clothing Store,Café,Japanese Restaurant,Cosmetics Shop,Restaurant,Tea Room,Electronics Store,Bakery,Bubble Tea Shop


In [96]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Clusters']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters