# Applied Data Science Capstone - Week 3 Project #

<I>Tracy Wang</I>

### PART 1: Make dataframe with PostalCode, Borough, Neighborhood ###

In [1]:
# Setup environment

# standard libraries
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Foursquare
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# JSON files
import json # library to handle JSON files
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# plotting data
import matplotlib.cm as cm
import matplotlib.colors as colors

# k-means clustering
from sklearn.cluster import KMeans

# map display
!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

# other functions
!pip install lxml   # for read_html

print('Libraries imported.')

Solving environment: done


  current version: 4.5.11
  latest version: 4.7.12

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.

Solving environment: done


  current version: 4.5.11
  latest version: 4.7.12

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.

Libraries imported.


In [2]:
# Use the Notebook to build the code to scrape the following Wikipedia page, 
# https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M, in order 
# to obtain the data that is in the table of postal codes and to transform 
# the data into a pandas dataframe like the one shown below

lPostal = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
dfPostal = pd.DataFrame(lPostal[0])    # add [0] to capture table properly
dfPostal.head(10)                      # view first rows to verify dataframe looks correct

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


In [3]:
# Only process the cells that have an assigned borough. 
# Ignore cells with a borough that is Not assigned.

dfClean = dfPostal[dfPostal['Borough'] != 'Not assigned'].copy(deep=False)
dfClean.head(10)                      # view first rows to verify dataframe looks correct

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [4]:
# If a cell has a borough but a Not assigned neighborhood, then the 
# neighborhood will be the same as the borough. So for the 9th cell 
# in the table on the Wikipedia page, the value of the Borough and 
# the Neighborhood columns will be Queen's Park.

dfClean.loc[dfClean['Neighbourhood'] == 'Not assigned', 'Neighbourhood'] = dfClean['Borough']
dfClean.head(10)                      # view first rows to verify dataframe looks correct

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [5]:
# More than one neighborhood can exist in one postal code area. 
# For example, in the table on the Wikipedia page, you will notice 
# that M5A is listed twice and has two neighborhoods: Harbourfront 
# and Regent Park. These two rows will be combined into one row with 
# the neighborhoods separated with a comma as shown in row 11 in the 
# above table.

# create dfNew: define the list of columns & create blank data frame
dfNew = pd.DataFrame(columns=['PostalCode', 'Borough', 'Neighborhood'])
print(dfNew)

# generate a list of unique & sorted postal codes
lPostalCode = np.sort(dfClean['Postcode'].unique())
print(lPostalCode)

# populate the new table dfNew with items correponding to each postal code:

for sPostalCode in lPostalCode:
    dfResults = dfClean[dfClean['Postcode'] == sPostalCode]
    # print(dfResults)
    lBoroughs = np.sort(dfResults['Borough'].unique()).tolist()[0]
    # df['value'] = df.lookup(df.index, df['best'])
    # lBoroughs = dfClean.lookup(dfResults, dfClean['Borough'])
    # print(dfClean.lookup(dfResults, dfClean['Borough']))
    lNeighborhoods = np.sort(dfResults['Neighbourhood'].unique()).tolist()
    dfNew = dfNew.append({'PostalCode': sPostalCode,
                          'Borough': lBoroughs,
                          'Neighborhood': lNeighborhoods},
                         ignore_index=True)
dfNew

Empty DataFrame
Columns: [PostalCode, Borough, Neighborhood]
Index: []
['M1B' 'M1C' 'M1E' 'M1G' 'M1H' 'M1J' 'M1K' 'M1L' 'M1M' 'M1N' 'M1P' 'M1R'
 'M1S' 'M1T' 'M1V' 'M1W' 'M1X' 'M2H' 'M2J' 'M2K' 'M2L' 'M2M' 'M2N' 'M2P'
 'M2R' 'M3A' 'M3B' 'M3C' 'M3H' 'M3J' 'M3K' 'M3L' 'M3M' 'M3N' 'M4A' 'M4B'
 'M4C' 'M4E' 'M4G' 'M4H' 'M4J' 'M4K' 'M4L' 'M4M' 'M4N' 'M4P' 'M4R' 'M4S'
 'M4T' 'M4V' 'M4W' 'M4X' 'M4Y' 'M5A' 'M5B' 'M5C' 'M5E' 'M5G' 'M5H' 'M5J'
 'M5K' 'M5L' 'M5M' 'M5N' 'M5P' 'M5R' 'M5S' 'M5T' 'M5V' 'M5W' 'M5X' 'M6A'
 'M6B' 'M6C' 'M6E' 'M6G' 'M6H' 'M6J' 'M6K' 'M6L' 'M6M' 'M6N' 'M6P' 'M6R'
 'M6S' 'M7A' 'M7R' 'M7Y' 'M8V' 'M8W' 'M8X' 'M8Y' 'M8Z' 'M9A' 'M9B' 'M9C'
 'M9L' 'M9M' 'M9N' 'M9P' 'M9R' 'M9V' 'M9W']


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"[Malvern, Rouge]"
1,M1C,Scarborough,"[Highland Creek, Port Union, Rouge Hill]"
2,M1E,Scarborough,"[Guildwood, Morningside, West Hill]"
3,M1G,Scarborough,[Woburn]
4,M1H,Scarborough,[Cedarbrae]
5,M1J,Scarborough,[Scarborough Village]
6,M1K,Scarborough,"[East Birchmount Park, Ionview, Kennedy Park]"
7,M1L,Scarborough,"[Clairlea, Golden Mile, Oakridge]"
8,M1M,Scarborough,"[Cliffcrest, Cliffside, Scarborough Village West]"
9,M1N,Scarborough,"[Birch Cliff, Cliffside West]"


In [6]:
# In the last cell of your notebook, use the .shape method 
# to print the number of rows of your dataframe.

print(dfNew.shape)

(103, 3)


### PART 2: Add Latitude & Longitude columns to the dataframe ###

In [7]:
# Use the Geocoder package or the csv file to create the dataframe
# http://cocl.us/Geospatial_data

# read in CSV with Latitude & Longitude for each Postal Code
dfGeo = pd.read_csv('http://cocl.us/Geospatial_data')
print(dfGeo.head(10))

  Postal Code   Latitude  Longitude
0         M1B  43.806686 -79.194353
1         M1C  43.784535 -79.160497
2         M1E  43.763573 -79.188711
3         M1G  43.770992 -79.216917
4         M1H  43.773136 -79.239476
5         M1J  43.744734 -79.239476
6         M1K  43.727929 -79.262029
7         M1L  43.711112 -79.284577
8         M1M  43.716316 -79.239476
9         M1N  43.692657 -79.264848


In [8]:
# Add Latitudes & Longitudes for each Postal Code from Geocoder table
# into my dataframe as Latitude & Longitude columns

lPC = dfNew['PostalCode'].tolist()
lLatitude = dfGeo.loc[dfGeo['Postal Code'] == lPC,'Latitude'].tolist()
lLongitude = dfGeo.loc[dfGeo['Postal Code'] == lPC,'Longitude'].tolist()
dfNew['Latitude'] = lLatitude
dfNew['Longitude'] = lLongitude
print(dfNew.head(10))

  PostalCode      Borough                                       Neighborhood  \
0        M1B  Scarborough                                   [Malvern, Rouge]   
1        M1C  Scarborough           [Highland Creek, Port Union, Rouge Hill]   
2        M1E  Scarborough                [Guildwood, Morningside, West Hill]   
3        M1G  Scarborough                                           [Woburn]   
4        M1H  Scarborough                                        [Cedarbrae]   
5        M1J  Scarborough                              [Scarborough Village]   
6        M1K  Scarborough      [East Birchmount Park, Ionview, Kennedy Park]   
7        M1L  Scarborough                  [Clairlea, Golden Mile, Oakridge]   
8        M1M  Scarborough  [Cliffcrest, Cliffside, Scarborough Village West]   
9        M1N  Scarborough                      [Birch Cliff, Cliffside West]   

    Latitude  Longitude  
0  43.806686 -79.194353  
1  43.784535 -79.160497  
2  43.763573 -79.188711  
3  43.770992 -7

### PART 3: Explore and Cluster Neighborhoods in Toronto ###

In [9]:
#  You can decide to work with only boroughs that contain the word Toronto

# Create list of Toronto Boroughs
lBoroughs = np.sort(dfNew['Borough'].unique())
lToronto = ([i for i,item in enumerate(lBoroughs) if "Toronto" in item])
lBT = lBoroughs[lToronto].tolist()
print('List of Toronto boroughs:')
print(lBT)
dfToronto = dfNew[dfNew['Borough'].isin(lBT)].copy(deep=True)
print('Toronto data:')
print(dfToronto)

List of Toronto boroughs:
['Central Toronto', 'Downtown Toronto', 'East Toronto', 'West Toronto']
Toronto data:
   PostalCode           Borough  \
37        M4E      East Toronto   
41        M4K      East Toronto   
42        M4L      East Toronto   
43        M4M      East Toronto   
44        M4N   Central Toronto   
45        M4P   Central Toronto   
46        M4R   Central Toronto   
47        M4S   Central Toronto   
48        M4T   Central Toronto   
49        M4V   Central Toronto   
50        M4W  Downtown Toronto   
51        M4X  Downtown Toronto   
52        M4Y  Downtown Toronto   
53        M5A  Downtown Toronto   
54        M5B  Downtown Toronto   
55        M5C  Downtown Toronto   
56        M5E  Downtown Toronto   
57        M5G  Downtown Toronto   
58        M5H  Downtown Toronto   
59        M5J  Downtown Toronto   
60        M5K  Downtown Toronto   
61        M5L  Downtown Toronto   
63        M5N   Central Toronto   
64        M5P   Central Toronto   
65        M5R

In [10]:
# Use geopy to get Latitude & Longitude of Toronto for map
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="TO_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

# create map of Toronto using latitude and longitude values
mapToronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(dfToronto['Latitude'], dfToronto['Longitude'], dfToronto['Borough'], dfToronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(mapToronto)  
    
mapToronto

The geograpical coordinates of Toronto are 43.653963, -79.387207.


In [11]:
# replicate the same analysis we did to the New York City data

# Foursquare credentials
CLIENT_ID = 'XO2ABQ5J2EXXKQJTQNMEHEQVMSPBRE1AWOWMD0AOVDL2YG1S' # your Foursquare ID
CLIENT_SECRET = '22IN2LGH2IWXTMTKDEU4PQRAIEVOEM2V5HMVFC2WI5IWNLRZ' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']
    

# I didn't use the same variable name as the example in an earlier section, so define it here: LIMIT=100
# Note: this function shouldn't be using LIMIT without including it as a parameter, imho.
LIMIT = 100

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)


Your credentails:
CLIENT_ID: XO2ABQ5J2EXXKQJTQNMEHEQVMSPBRE1AWOWMD0AOVDL2YG1S
CLIENT_SECRET:22IN2LGH2IWXTMTKDEU4PQRAIEVOEM2V5HMVFC2WI5IWNLRZ


In [12]:
# test Foursquare calls

# first Postal Code in list
# print(dfToronto.loc[37, 'Latitude'])        # example borough is index 37 (first in Toronto list)
latPC = dfToronto.loc[37, 'Latitude']         # postal code's latitude
longPC = dfToronto.loc[37, 'Longitude']       # postal code's longitude
namePC = dfToronto.loc[37, 'PostalCode']      # postal code
print('Latitude and longitude values of postal code {} are {}, {}.'.format(namePC, latPC, longPC))

# setup a GET call to foursquare
nMax = 100
iDistance = 500
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, latPC, longPC, iDistance, nMax)
print(url)
results = requests.get(url).json()
# results         # will show the JSON as returned by foursquare (long)

# test with single postal code
venues = results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues) # flatten JSON
# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]
# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)
# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
print(nearby_venues.head())

Latitude and longitude values of postal code M4E are 43.67635739999999, -79.2930312.
https://api.foursquare.com/v2/venues/explore?client_id=XO2ABQ5J2EXXKQJTQNMEHEQVMSPBRE1AWOWMD0AOVDL2YG1S&client_secret=22IN2LGH2IWXTMTKDEU4PQRAIEVOEM2V5HMVFC2WI5IWNLRZ&v=20180605&ll=43.67635739999999,-79.2930312&radius=500&limit=100
                                 name         categories        lat        lng
0                   Glen Manor Ravine              Trail  43.676821 -79.293942
1  The Big Carrot Natural Food Market  Health Food Store  43.678879 -79.297734
2                 Grover Pub and Grub                Pub  43.679181 -79.297215
3                       Upper Beaches       Neighborhood  43.680563 -79.292869
4                          Dip 'n Sip        Coffee Shop  43.678897 -79.297745


In [13]:
# venue categories per borough for all boroughs in list (Toronto):

venuesToronto = getNearbyVenues(names=dfToronto['PostalCode'],
                                   latitudes=dfToronto['Latitude'],
                                   longitudes=dfToronto['Longitude'],
                                   radius=500
                                  )
print(venuesToronto.shape)
print(venuesToronto.head(10))
print(venuesToronto.groupby('Neighborhood').count())
print('There are {} unique categories.'.format(len(venuesToronto['Venue Category'].unique())))

# one hot encoding
onehotToronto = pd.get_dummies(venuesToronto[['Venue Category']], prefix="", prefix_sep="")
# print(onehotToronto.head())

# add neighborhood column back to dataframe
onehotToronto['PostalCode'] = venuesToronto['Neighborhood'] 
# print(onehotToronto.head())

# move neighborhood column to the first column
fixed_columns = [onehotToronto.columns[-1]] + list(onehotToronto.columns[:-1])
onehotToronto = onehotToronto[fixed_columns]

# print(onehotToronto.head())
# print(onehotToronto.shape)
groupedToronto = onehotToronto.groupby('PostalCode').mean().reset_index()
print(groupedToronto.head(10))
print(groupedToronto.shape)

M4E
M4K
M4L
M4M
M4N
M4P
M4R
M4S
M4T
M4V
M4W
M4X
M4Y
M5A
M5B
M5C
M5E
M5G
M5H
M5J
M5K
M5L
M5N
M5P
M5R
M5S
M5T
M5V
M5W
M5X
M6G
M6H
M6J
M6K
M6P
M6R
M6S
M7Y
(1680, 7)
  Neighborhood  Neighborhood Latitude  Neighborhood Longitude  \
0          M4E              43.676357              -79.293031   
1          M4E              43.676357              -79.293031   
2          M4E              43.676357              -79.293031   
3          M4E              43.676357              -79.293031   
4          M4E              43.676357              -79.293031   
5          M4K              43.679557              -79.352188   
6          M4K              43.679557              -79.352188   
7          M4K              43.679557              -79.352188   
8          M4K              43.679557              -79.352188   
9          M4K              43.679557              -79.352188   

                                Venue  Venue Latitude  Venue Longitude  \
0                   Glen Manor Ravine       43.6

In [14]:
# summarize most common venues per Postal Code

# define function
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]

# use function
num_top_venues = 10
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['PostalCode']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['PostalCode'] = groupedToronto['PostalCode']
for ind in np.arange(groupedToronto.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(groupedToronto.iloc[ind, :], num_top_venues)
print(neighborhoods_venues_sorted.head(10))

  PostalCode 1st Most Common Venue 2nd Most Common Venue  \
0        M4E     Health Food Store           Coffee Shop   
1        M4K      Greek Restaurant           Coffee Shop   
2        M4L               Brewery           Coffee Shop   
3        M4M                  Café           Coffee Shop   
4        M4N                  Park           Swim School   
5        M4P                 Hotel          Dance Studio   
6        M4R        Clothing Store   Sporting Goods Shop   
7        M4S        Sandwich Place          Dessert Shop   
8        M4T            Playground            Restaurant   
9        M4V                   Pub           Coffee Shop   

  3rd Most Common Venue 4th Most Common Venue   5th Most Common Venue  \
0                 Trail                   Pub            Neighborhood   
1        Ice Cream Shop    Italian Restaurant  Furniture / Home Store   
2    Italian Restaurant          Intersection        Sushi Restaurant   
3                Bakery    Italian Restaurant  

In [15]:
# perform clustering analysis

# set number of clusters
kclusters = 5          # arbitrarily choose # of clusters

toronto_grouped_clustering = groupedToronto.drop('PostalCode', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

mergedToronto = dfToronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
mergedToronto = mergedToronto.join(neighborhoods_venues_sorted.set_index('PostalCode'), on='PostalCode')

# show a sample of the most common venues per Postal Code, with cluster numbers assigned
mergedToronto.head()


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,M4E,East Toronto,[The Beaches],43.676357,-79.293031,0,Health Food Store,Coffee Shop,Trail,Pub,Neighborhood,Ethiopian Restaurant,Event Space,Electronics Store,Falafel Restaurant,Dim Sum Restaurant
41,M4K,East Toronto,"[Riverdale, The Danforth West]",43.679557,-79.352188,0,Greek Restaurant,Coffee Shop,Ice Cream Shop,Italian Restaurant,Furniture / Home Store,Yoga Studio,Brewery,Bubble Tea Shop,Caribbean Restaurant,Restaurant
42,M4L,East Toronto,"[India Bazaar, The Beaches West]",43.668999,-79.315572,0,Brewery,Coffee Shop,Italian Restaurant,Intersection,Sushi Restaurant,Sandwich Place,Ice Cream Shop,Fish & Chips Shop,Movie Theater,Pub
43,M4M,East Toronto,[Studio District],43.659526,-79.340923,0,Café,Coffee Shop,Bakery,Italian Restaurant,American Restaurant,Yoga Studio,Fish Market,Bookstore,Latin American Restaurant,Brewery
44,M4N,Central Toronto,[Lawrence Park],43.72802,-79.38879,4,Park,Swim School,Bus Line,Yoga Studio,Dog Run,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant


In [16]:
# create cluster map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(mergedToronto['Latitude'], mergedToronto['Longitude'], mergedToronto['Neighborhood'], mergedToronto['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### Observations ####
Most of the areas (by Postal Code) in the Toronto area are very similar, with a few different selections of venues in the outlying areas to the North.  It is possible to speculate about the nature of the Postal Code areas by the top few venue types.  Perhaps urban areas generally have mostly restaurants, and residential areas have other types of items, such as parks.  I tried using k = 4, 5, 7 clusters, and the results were similar for each.  The map above shows clustering assuming k=5 clusters.