In [1]:
import pandas as pd 
import numpy as np

Use read_html function to scrape the table from Wikipedia page.

In [2]:
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

There are 2 tables on the page, so I make sure that I select the correct object scraped [0]

In [3]:
df1 = df[0]
df1.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


Check how many rows we have. We can see that some values for Borough are "Not Assigned" and we need to delete them.

In [4]:
df1.shape

(180, 3)

Deleting "Not assigned" rows

In [5]:
df1.drop(df1[df1['Borough'] == 'Not assigned'].index, inplace=True)
df1.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


Checking if there are any NaN (missing data) values

In [6]:
missing_data = df1.isnull()
for column in missing_data.columns.values.tolist():
    print(column)
    print(missing_data[column].value_counts())
    print('')

Postal Code
False    103
Name: Postal Code, dtype: int64

Borough
False    103
Name: Borough, dtype: int64

Neighborhood
False    103
Name: Neighborhood, dtype: int64



There are no NaN values in Neighborhood column, meaning that the table is complete!

Geocoder not working, so we will try using read_csv function.

In [7]:
df_coord = pd.read_csv("http://cocl.us/Geospatial_data")

In [8]:
df_coord.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Now we need to merge df and df_coord using Left join, where df1 is Left and on "postal code column, so that for each postal code in dataframe df, we will get latitude and longitude coordinates

In [9]:
df_combined = df1.merge(df_coord, on = 'Postal Code', how = 'left')

In [10]:
df_combined.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [11]:
df_combined.dtypes

Postal Code      object
Borough          object
Neighborhood     object
Latitude        float64
Longitude       float64
dtype: object

In [12]:
missing_data2 = df_combined.isnull()
for column in missing_data2.columns.values.tolist():
    print(column)
    print(missing_data2[column].value_counts())
    print('')

Postal Code
False    103
Name: Postal Code, dtype: int64

Borough
False    103
Name: Borough, dtype: int64

Neighborhood
False    103
Name: Neighborhood, dtype: int64

Latitude
False    103
Name: Latitude, dtype: int64

Longitude
False    103
Name: Longitude, dtype: int64



So there are no NaN values and table looks complete!

Here we create a new dataframe, where "Borough" columns only contain "York", like "North York", "East York", in order to reduce size of the table.

In [13]:
df_york = df_combined[df_combined['Borough'].str.contains('York')].reset_index(drop=True)

In [14]:
df_york.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
3,M3B,North York,Don Mills,43.745906,-79.352188
4,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937


In [15]:
print('The dataframe has {} boroughs, {} neighborhoods and {} postal codes.'.format(
        len(df_york['Borough'].unique()),
        len(df_york['Neighborhood'].unique()),
        df_york.shape[0]
    )
)

The dataframe has 3 boroughs, 30 neighborhoods and 34 postal codes.


As we can see, number of postal codes reduced from 104 to 34.

In [16]:
CLIENT_ID = 'W40S05W1UCMBMYIKJLOPO41ZVGDUDAX2PH1XJACF3YRSKJVC' # your Foursquare ID
CLIENT_SECRET = 'MXTUAAC5F0X1HOQFVHLAT23HRYYAOI4X3MDTEQD0GCWK13HZ' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: W40S05W1UCMBMYIKJLOPO41ZVGDUDAX2PH1XJACF3YRSKJVC
CLIENT_SECRET:MXTUAAC5F0X1HOQFVHLAT23HRYYAOI4X3MDTEQD0GCWK13HZ


In [17]:
#importing all necessary libraries to search for data, use clustering algorithm and draw it on a map. It will take a while!

import json # library to handle JSON files

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [18]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

In [19]:
!pip install folium==0.5.0 # uncomment this line if you haven't completed the Foursquare API lab



In [20]:
import folium # map rendering library

Define venue extracting function for all neighbourhoods in dataframe

In [21]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

run function on dataframe

In [23]:
LIMIT=100

york_venues = getNearbyVenues(names=df_york['Neighborhood'],
                                   latitudes=df_york['Latitude'],
                                   longitudes=df_york['Longitude']
                                  )


Parkwoods
Victoria Village
Lawrence Manor, Lawrence Heights
Don Mills
Parkview Hill, Woodbine Gardens
Glencairn
Don Mills
Woodbine Heights
Humewood-Cedarvale
Caledonia-Fairbanks
Leaside
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto, Broadview North (Old East York)
Bayview Village
Downsview
York Mills, Silver Hills
Downsview
North Park, Maple Leaf Park, Upwood Park
Humber Summit
Willowdale, Newtonbrook
Downsview
Bedford Park, Lawrence Manor East
Del Ray, Mount Dennis, Keelsdale and Silverthorn
Humberlea, Emery
Willowdale, Willowdale East
Downsview
Runnymede, The Junction North
Weston
York Mills West
Willowdale, Willowdale West


Check size of dataframe

In [24]:
print(york_venues.shape)
york_venues.head()

(340, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
3,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop
4,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant


Check how many venues were returned for each neighborhood

In [25]:
york_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor, Wilson Heights, Downsview North",20,20,20,20,20,20
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",24,24,24,24,24,24
Caledonia-Fairbanks,4,4,4,4,4,4
"Del Ray, Mount Dennis, Keelsdale and Silverthorn",5,5,5,5,5,5
Don Mills,26,26,26,26,26,26
Downsview,16,16,16,16,16,16
"East Toronto, Broadview North (Old East York)",3,3,3,3,3,3
"Fairview, Henry Farm, Oriole",67,67,67,67,67,67
Glencairn,4,4,4,4,4,4


Create dummy variables

In [26]:
# one hot encoding
york_onehot = pd.get_dummies(york_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
york_onehot['Neighborhood'] = york_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [york_onehot.columns[-1]] + list(york_onehot.columns[:-1])
york_onehot = york_onehot[fixed_columns]

york_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bagel Shop,Bakery,...,Theater,Toy / Game Store,Trail,Turkish Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Examine size

In [27]:
york_onehot.shape

(340, 122)

Calculate frequency of each venue

In [28]:
york_grouped = york_onehot.groupby('Neighborhood').mean().reset_index()
york_grouped

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bagel Shop,Bakery,...,Theater,Toy / Game Store,Trail,Turkish Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Women's Store,Yoga Studio
0,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park, Lawrence Manor East",0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Caledonia-Fairbanks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0
4,"Del Ray, Mount Dennis, Keelsdale and Silverthorn",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0
5,Don Mills,0.0,0.0,0.0,0.038462,0.0,0.076923,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Downsview,0.0,0.0625,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"East Toronto, Broadview North (Old East York)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Fairview, Henry Farm, Oriole",0.0,0.0,0.014925,0.0,0.0,0.014925,0.0,0.0,0.014925,...,0.014925,0.014925,0.0,0.0,0.014925,0.0,0.0,0.0,0.014925,0.0
9,Glencairn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Create top 10 venues list

In [29]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [30]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = york_grouped['Neighborhood']

for ind in np.arange(york_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(york_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Gas Station,Ice Cream Shop,Mobile Phone Shop,Park,Fried Chicken Joint,Pharmacy,Pizza Place,Deli / Bodega
1,Bayview Village,Bank,Chinese Restaurant,Japanese Restaurant,Café,Yoga Studio,Dog Run,Discount Store,Diner,Dim Sum Restaurant,Department Store
2,"Bedford Park, Lawrence Manor East",Sandwich Place,Italian Restaurant,Coffee Shop,Restaurant,Grocery Store,Greek Restaurant,Ice Cream Shop,Indian Restaurant,Juice Bar,Liquor Store
3,Caledonia-Fairbanks,Park,Women's Store,Pool,Yoga Studio,Electronics Store,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Deli / Bodega
4,"Del Ray, Mount Dennis, Keelsdale and Silverthorn",Skating Rink,Turkish Restaurant,Sandwich Place,Discount Store,Restaurant,Yoga Studio,Deli / Bodega,Dim Sum Restaurant,Dessert Shop,Department Store


Now the dataframe is ready, we define and apply K-means algorithm

In [31]:
# set number of clusters
kclusters = 5

york_grouped_clustering = york_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(york_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 0, 1, 1, 1, 2, 1, 1], dtype=int32)

In [32]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

york_merged = df_york

# merge york_grouped with york_data to add latitude/longitude for each neighborhood
york_merged = york_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

york_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,0.0,Park,Food & Drink Shop,Yoga Studio,Electronics Store,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Deli / Bodega,Department Store
1,M4A,North York,Victoria Village,43.725882,-79.315572,1.0,Coffee Shop,Portuguese Restaurant,Hockey Arena,French Restaurant,Intersection,Yoga Studio,Department Store,Diner,Dim Sum Restaurant,Dessert Shop
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,1.0,Clothing Store,Accessories Store,Boutique,Furniture / Home Store,Event Space,Miscellaneous Shop,Coffee Shop,Gift Shop,Vietnamese Restaurant,Athletics & Sports
3,M3B,North York,Don Mills,43.745906,-79.352188,1.0,Coffee Shop,Japanese Restaurant,Asian Restaurant,Gym,Restaurant,Beer Store,Café,Supermarket,Sandwich Place,Shopping Mall
4,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937,1.0,Pizza Place,Bank,Gym / Fitness Center,Intersection,Fast Food Restaurant,Pharmacy,Café,Breakfast Spot,Gastropub,Athletics & Sports


In [33]:
york_merged['Cluster Labels'].unique

<bound method Series.unique of 0     0.0
1     1.0
2     1.0
3     1.0
4     1.0
5     1.0
6     1.0
7     1.0
8     1.0
9     0.0
10    1.0
11    1.0
12    1.0
13    1.0
14    1.0
15    1.0
16    2.0
17    1.0
18    1.0
19    NaN
20    1.0
21    0.0
22    1.0
23    NaN
24    1.0
25    1.0
26    1.0
27    3.0
28    1.0
29    1.0
30    4.0
31    2.0
32    2.0
33    1.0
Name: Cluster Labels, dtype: float64>

Turns out that rows 19 and 23 resulted in NaN cluster, because there were 0 venues found. We will delete these rows for convenience.

In [34]:
york_merged.drop([19,23],inplace=True)

In [35]:
york_merged.reset_index()

Unnamed: 0,index,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,M3A,North York,Parkwoods,43.753259,-79.329656,0.0,Park,Food & Drink Shop,Yoga Studio,Electronics Store,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Deli / Bodega,Department Store
1,1,M4A,North York,Victoria Village,43.725882,-79.315572,1.0,Coffee Shop,Portuguese Restaurant,Hockey Arena,French Restaurant,Intersection,Yoga Studio,Department Store,Diner,Dim Sum Restaurant,Dessert Shop
2,2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,1.0,Clothing Store,Accessories Store,Boutique,Furniture / Home Store,Event Space,Miscellaneous Shop,Coffee Shop,Gift Shop,Vietnamese Restaurant,Athletics & Sports
3,3,M3B,North York,Don Mills,43.745906,-79.352188,1.0,Coffee Shop,Japanese Restaurant,Asian Restaurant,Gym,Restaurant,Beer Store,Café,Supermarket,Sandwich Place,Shopping Mall
4,4,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937,1.0,Pizza Place,Bank,Gym / Fitness Center,Intersection,Fast Food Restaurant,Pharmacy,Café,Breakfast Spot,Gastropub,Athletics & Sports
5,5,M6B,North York,Glencairn,43.709577,-79.445073,1.0,Pizza Place,Pub,Japanese Restaurant,Electronics Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Deli / Bodega
6,6,M3C,North York,Don Mills,43.7259,-79.340923,1.0,Coffee Shop,Japanese Restaurant,Asian Restaurant,Gym,Restaurant,Beer Store,Café,Supermarket,Sandwich Place,Shopping Mall
7,7,M4C,East York,Woodbine Heights,43.695344,-79.318389,1.0,Bus Stop,Athletics & Sports,Park,Diner,Beer Store,Curling Ice,Skating Rink,Video Store,Pharmacy,Cosmetics Shop
8,8,M6C,York,Humewood-Cedarvale,43.693781,-79.428191,1.0,Park,Field,Hockey Arena,Trail,Yoga Studio,Department Store,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop
9,9,M6E,York,Caledonia-Fairbanks,43.689026,-79.453512,0.0,Park,Women's Store,Pool,Yoga Studio,Electronics Store,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Deli / Bodega


In [36]:
york_merged.shape

(32, 16)

In [37]:
york_merged['Cluster Labels'].unique

<bound method Series.unique of 0     0.0
1     1.0
2     1.0
3     1.0
4     1.0
5     1.0
6     1.0
7     1.0
8     1.0
9     0.0
10    1.0
11    1.0
12    1.0
13    1.0
14    1.0
15    1.0
16    2.0
17    1.0
18    1.0
20    1.0
21    0.0
22    1.0
24    1.0
25    1.0
26    1.0
27    3.0
28    1.0
29    1.0
30    4.0
31    2.0
32    2.0
33    1.0
Name: Cluster Labels, dtype: float64>

In [38]:
york_merged['Cluster Labels'] = york_merged['Cluster Labels'].astype('int32')

In [39]:
york_merged['Cluster Labels'].dtype

dtype('int32')

In [40]:
map_latitude = york_merged['Latitude'].mean()
map_longitude = york_merged['Longitude'].mean()

In [42]:
# create map
map_clusters = folium.Map(location=[map_latitude, map_longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(york_merged['Latitude'], york_merged['Longitude'], york_merged['Neighborhood'], york_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters