# Explore and Cluster Neighborhoods in Toronto

### 1) Get data 'PostalCode', 'Borough', and 'Neighborhoods' in Toronto

In [1]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

Import pandas module

In [2]:
import pandas as pd

Get Wikipedia webpage with pandas

In [3]:
url_toronto = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = pd.read_html(url_toronto)

Get table into dataframe

In [4]:
# get dataframe from website above
df = page[0]
print(type(df))
print(df.shape)
df.head()

<class 'pandas.core.frame.DataFrame'>
(180, 3)


Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


Rename columns

In [5]:
# rename columns
df.rename(columns={'Postal Code': 'PostalCode', 'Neighbourhood': 'Neighborhood'}, inplace=True)
df = df.astype(str)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


Delete all rows with no 'Borough' assigned

In [6]:
df = df.drop(df[df['Borough'] == 'Not assigned'].index, axis=0).reset_index(drop=True)
print(df.shape)
df.head()

(103, 3)


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


If data has 'Borough' but not 'Neighborhood' name neighborhood as borough

In [7]:
df.loc[(df.Borough != 'Not assigned') & (df.Neighborhood == 'Not assigned'), 'Neighborhood'] = df['Borough']
print(df.shape)
df.head()

(103, 3)


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


Combining 'Neighborhoods' when equal 'PostalCode'

In [8]:
indexCodes = 0
while indexCodes < df.shape[0]:
    indexSame = df[df['PostalCode'] == df.iloc[indexCodes,0]].index.values
    indexSame = indexSame[1:]
    newStr = df.iloc[indexCodes,2]
    for idx in indexSame:
        newStr += ', ' + df.iloc[idx,2]
    df.iloc[indexCodes,2] = newStr
    df = df.drop(indexSame, axis=0).reset_index(drop=True)
    indexCodes += 1

Print shape of the dataframe

In [9]:
print(df.shape)

(103, 3)


### 2) Get 'Latitude', and 'Longitude' coordinates for each 'postal code' in Toronto

Get data from file 'Geospatial_Coordinates.csv'

In [10]:
file = 'Geospatial_Coordinates.csv'
df_coord = pd.read_csv(file)
df_coord.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Add the 'Latitude' and 'Longitude' columns to data frame with 'PostalCod', 'Borough', and 'Neighborhood'

In [11]:
#neighborhoods = pd.DataFrame(columns=column_names)
df_latlng = pd.DataFrame(columns=['Latitude', 'Longitude'])

for code in df['PostalCode']:
    temp = df_coord[df_coord['Postal Code'] == code]
    df_latlng = df_latlng.append({'Latitude': temp.iloc[0,1], 'Longitude': temp.iloc[0,2]}, ignore_index=True)
    
df = pd.concat([df, df_latlng], axis=1, sort=False)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


### 3) Plotting "PostalCodes" in Toronto then clustering the "PostalCodes" around the University based on the venues.

Import some libraries

In [12]:
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors

##### Plot markers for each "Borough" in Toronto

In [13]:
# get latitude and longitude from central Toronto
central_toronto = df[df['PostalCode'] == 'M4V']
latitude = central_toronto['Latitude'].values[0].astype(float)
longitude = central_toronto['Longitude'].values[0].astype(float)

# create map object
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers
for lat, lng, postCode in zip(df['Latitude'], df['Longitude'], df['PostalCode']):
    label = 'Postal Code is: {}'.format(postCode)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
map_toronto

Group 'Postal Codes' by 'Boroughs'

In [14]:
# groupe boroughs
df_grouped = df.groupby(df['Borough']).count()
df_grouped

Unnamed: 0_level_0,PostalCode,Neighborhood,Latitude,Longitude
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Central Toronto,9,9,9,9
Downtown Toronto,19,19,19,19
East Toronto,5,5,5,5
East York,5,5,5,5
Etobicoke,12,12,12,12
Mississauga,1,1,1,1
North York,24,24,24,24
Scarborough,17,17,17,17
West Toronto,6,6,6,6
York,5,5,5,5


##### Simple clustering of "PostalCodes" by into each "Borough"

In [15]:
import numpy as np
# get latitude and longitude from central Toronto
central_toronto = df[df['PostalCode'] == 'M4V']
latitude = central_toronto['Latitude'].values[0].astype(float)
longitude = central_toronto['Longitude'].values[0].astype(float)

# create map object
map_toronto_grouped_codes = folium.Map(location=[latitude, longitude], zoom_start=11)

# groupe boroughs
df_grouped = df.groupby(df['Borough']).count()
# Boroughs
boroughs = df_grouped.index

# set color scheme for the clusters
x = np.arange(df_grouped.shape[0])
color_array = cm.rainbow(np.linspace(0,1,df_grouped.shape[0]))
rainbow = [colors.rgb2hex(i) for i in color_array]

# add markers
for idx in range(df_grouped.shape[0]):
    df_temp = df[df['Borough'] == boroughs[idx]]
    for lat, lng, postalCode in zip(df_temp['Latitude'], df_temp['Longitude'], df_temp['PostalCode']):
        label = folium.Popup('Postal Code {} in Borough {}'.format(postalCode, boroughs[idx]), parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color=rainbow[idx],
            fill=True,
            fill_opacity=0.7).add_to(map_toronto_grouped_codes)
        
map_toronto_grouped_codes

##### Now we will search for all the "PostalCodes" within a radius 'R' around University of Toronto. Then, the "PostalCodes" will be clustered by the top 5 most venues in each "PostalCode"

In [16]:
# get lat long coordinates for the university
from geopy.geocoders import Nominatim

# get the latitude and longitude for the University of Toronto
geolocator = Nominatim(user_agent='toronto_explorer')
location = geolocator.geocode('University of Toronto')
latitude = location.latitude
longitude = location.longitude
print('University of Toronto geographial coordinates are {} latitude & {} longitude'.format(latitude, longitude))

University of Toronto geographial coordinates are 43.663461999999996 latitude & -79.39775965337452 longitude


get postalcodes that are a radius from the univesity

In [17]:
from geopy import distance

# search distance around the university
RADIUS = 4 #km
university = (latitude, longitude)
cols = ['PostalCode', 'Borough', 'Neighborhood', 'Latitude', 'Longitude']
df_univ = pd.DataFrame(columns=cols)

# build new data frame of "PostalCodes" around the University
for code, bor, neigh, lat, lng in zip(df['PostalCode'], df['Borough'], df['Neighborhood'], df['Latitude'], df['Longitude']):
    if distance.distance(university, (lat,lng)).km <= RADIUS:
        df_univ = df_univ.append({'PostalCode':code, 'Borough':bor, 'Neighborhood':neigh, 'Latitude':lat, 'Longitude':lng}, ignore_index=True)

# sort dataframe based on "PostalCode"
df_univ = df_univ.sort_values('PostalCode', axis=0).reset_index(drop=True)
df_univ

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
1,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049
2,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529
3,M4X,Downtown Toronto,"St. James Town, Cabbagetown",43.667967,-79.367675
4,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316
5,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
6,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
7,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
8,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
9,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383


Make map showing the "PostalCodes" within a RADIUS "R" from the University

In [18]:
# create map object
map_postalCode_university = folium.Map(location=[university[0], university[1]], zoom_start=12)

# plot a circle with radius 4km from the University
folium.Circle(
    [university[0], university[1]],
    radius=RADIUS*1000,
    color='black',
    fill=True,
    fill_opacity=0.15).add_to(map_postalCode_university)

# Plot a centerpoint for the University
label = folium.Popup('University of Toronto', parse_html=True)
folium.CircleMarker(
    [university[0], university[1]],
    radius=2.5,
    popup=label,
    color='black',
    fill=True,
    fill_opacity=1).add_to(map_postalCode_university)

# add markers for PostalCodes
for lat, lng, postalCode in zip(df_univ['Latitude'], df_univ['Longitude'], df_univ['PostalCode']):
    label = folium.Popup('Postal Code {}'.format(postalCode), parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_opacity=0.7).add_to(map_postalCode_university)
        
map_postalCode_university

Get credentials for foursquares API

In [19]:
import requests
from pandas.io.json import json_normalize

creds = pd.read_csv('FoursquaresCredentials.csv')
# foursquare credentials and version
CLIENT_ID = creds.iloc[0,1]
CLIENT_SECRET = creds.iloc[0,2]
VERSION = creds.iloc[0,3]
LIMIT = 100 # A default Foursquare API limit value

Create a dataframe with all the venues within 1000 meters of the "PostalCode" centers

In [20]:
# new dataframe columns
nearbyVenuesCols = ['PostalCode', 'Borough', 'Neighborhood', 'Name', 'Latitude', 'Longitude', 'Category']
df_venues = pd.DataFrame(columns=nearbyVenuesCols)

# get all venues with in 1km of each PostalCode
for code, bor, neigh, lat, lng in zip(df_univ['PostalCode'], df_univ['Borough'], df_univ['Neighborhood'], df_univ['Latitude'], df_univ['Longitude']):
    url = 'http://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        lng,
        RADIUS*1000/4,
        LIMIT)  
    results = requests.get(url).json()['response']['groups'][0]['items']
    for idx in range(len(results)):
        df_venues = df_venues.append({'PostalCode':code,
                                      'Borough':bor, 
                                      'Neighborhood':neigh, 
                                      'Name':results[idx]['venue']['name'],
                                      'Latitude':results[idx]['venue']['location']['lat'],
                                      'Longitude':results[idx]['venue']['location']['lng'],
                                      'Category':results[idx]['venue']['categories'][0]['name']}, ignore_index=True)

print(df_venues.shape)
df_venues.head()

(2223, 7)


Unnamed: 0,PostalCode,Borough,Neighborhood,Name,Latitude,Longitude,Category
0,M4T,Central Toronto,"Moore Park, Summerhill East",Summerhill Market,43.686265,-79.375458,Grocery Store
1,M4T,Central Toronto,"Moore Park, Summerhill East",9bars,43.68866,-79.39194,Café
2,M4T,Central Toronto,"Moore Park, Summerhill East",The Bagel House,43.687374,-79.393696,Bagel Shop
3,M4T,Central Toronto,"Moore Park, Summerhill East",David A. Balfour Park,43.685629,-79.388078,Park
4,M4T,Central Toronto,"Moore Park, Summerhill East",Capocaccia Café,43.685915,-79.393305,Italian Restaurant


Create dataframe with all "Catagories" as onehot

In [21]:
# dummy categories for venues
df_venues_onehot = pd.get_dummies(df_venues[['Category']], prefix="", prefix_sep="")

# add zone information back to dataframe
df_venues_onehot.insert(loc=0, column='Name', value=df_venues['Name'])
df_venues_onehot.insert(loc=0, column='Count', value=np.ones(df_venues_onehot.shape[0]).astype(int))
df_venues_onehot.insert(loc=0, column='PostalCode', value=df_venues['PostalCode'])

print(df_venues_onehot.shape)
df_venues_onehot.head()

(2223, 237)


Unnamed: 0,PostalCode,Count,Name,Accessories Store,Airport,American Restaurant,Animal Shelter,Aquarium,Art Gallery,Art Museum,...,Trail,Train Station,Udon Restaurant,University,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Yoga Studio
0,M4T,1,Summerhill Market,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M4T,1,9bars,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M4T,1,The Bagel House,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M4T,1,David A. Balfour Park,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M4T,1,Capocaccia Café,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Groupe the venues by their "PostalCode"

In [22]:
# group by postalcode
df_venues_onehot_grouped = df_venues_onehot.groupby('PostalCode').mean().reset_index()

# add number of venues for each "PostalCode" to the dataframe
df_venues_onehot_grouped_count = df_venues_onehot.groupby('PostalCode')['Count'].sum().reset_index()
df_venues_onehot_grouped['Count'] = df_venues_onehot_grouped_count['Count']

# Sort data by the "PostalCode"
df_venues_onehot_grouped = df_venues_onehot_grouped.sort_values('PostalCode', axis=0).reset_index(drop=True)
df_venues_onehot_grouped

Unnamed: 0,PostalCode,Count,Accessories Store,Airport,American Restaurant,Animal Shelter,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,...,Trail,Train Station,Udon Restaurant,University,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Yoga Studio
0,M4T,64,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,...,0.015625,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.015625
1,M4V,77,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.012987,0.0,0.012987,0.0,0.012987,0.012987
2,M4W,22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M4X,33,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,M4Y,100,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,...,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.02
5,M5A,100,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01
6,M5B,100,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01
7,M5C,100,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,...,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0
8,M5E,100,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,...,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0
9,M5G,100,0.0,0.0,0.01,0.0,0.0,0.02,0.01,0.01,...,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.01,0.02


Build dataframe with top 5 venues for each "PostalCode"

In [23]:
# dataframe to be split into clusters
dfCols = ['1st Most Common', '2nd Most Common', '3rd Most Common', '4th Most Common', '5th Most Common']
df_venues_common = pd.DataFrame(columns=dfCols)
for idx in range(df_venues_onehot_grouped.shape[0]):
    cols = df_venues_onehot_grouped.columns[2:]
    data = df_venues_onehot_grouped.iloc[idx,2:].sort_values(axis=0, ascending=False)
    df_venues_common = df_venues_common.append({dfCols[0]:data.index[0],
                                                dfCols[1]:data.index[1],
                                                dfCols[2]:data.index[2],
                                                dfCols[3]:data.index[3],
                                                dfCols[4]:data.index[4]}, ignore_index=True)

df_venues_common

Unnamed: 0,1st Most Common,2nd Most Common,3rd Most Common,4th Most Common,5th Most Common
0,Grocery Store,Coffee Shop,Italian Restaurant,Gym,Restaurant
1,Coffee Shop,Sushi Restaurant,Thai Restaurant,Italian Restaurant,Restaurant
2,Park,Coffee Shop,Grocery Store,Convenience Store,Bank
3,Diner,Café,Gastropub,Japanese Restaurant,Park
4,Coffee Shop,Japanese Restaurant,Gay Bar,Café,Park
5,Coffee Shop,Pub,Theater,Park,Café
6,Coffee Shop,Gastropub,Japanese Restaurant,Hotel,Italian Restaurant
7,Coffee Shop,Café,Restaurant,Gastropub,Hotel
8,Coffee Shop,Café,Hotel,Japanese Restaurant,Park
9,Coffee Shop,Hotel,Café,Park,Sushi Restaurant


Use k-means to assign each venue to a cluster 

In [24]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

kclusters = 4
df_toronto_clustered = df_venues_onehot_grouped.drop('PostalCode', axis=1)
df_toronto_clustered = df_toronto_clustered.drop('Count', axis=1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(df_toronto_clustered)
df_cluster = pd.DataFrame({'Cluster':kmeans.labels_})

# add culumns to the dataframe
df_univ['Cluster'] =  df_cluster['Cluster']     
df_univ['Venue Count'] =  df_venues_onehot_grouped['Count']     
df_univ['1st Most Common'] =  df_venues_common['1st Most Common']     
df_univ['2nd Most Common'] =  df_venues_common['2nd Most Common']     
df_univ['3rd Most Common'] =  df_venues_common['3rd Most Common']     
df_univ['4th Most Common'] =  df_venues_common['4th Most Common']     
df_univ['5th Most Common'] =  df_venues_common['5th Most Common']     

df_univ

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster,Venue Count,1st Most Common,2nd Most Common,3rd Most Common,4th Most Common,5th Most Common
0,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,3,64,Grocery Store,Coffee Shop,Italian Restaurant,Gym,Restaurant
1,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,3,77,Coffee Shop,Sushi Restaurant,Thai Restaurant,Italian Restaurant,Restaurant
2,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,3,22,Park,Coffee Shop,Grocery Store,Convenience Store,Bank
3,M4X,Downtown Toronto,"St. James Town, Cabbagetown",43.667967,-79.367675,0,33,Diner,Café,Gastropub,Japanese Restaurant,Park
4,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,0,100,Coffee Shop,Japanese Restaurant,Gay Bar,Café,Park
5,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,3,100,Coffee Shop,Pub,Theater,Park,Café
6,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,100,Coffee Shop,Gastropub,Japanese Restaurant,Hotel,Italian Restaurant
7,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,100,Coffee Shop,Café,Restaurant,Gastropub,Hotel
8,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,100,Coffee Shop,Café,Hotel,Japanese Restaurant,Park
9,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0,100,Coffee Shop,Hotel,Café,Park,Sushi Restaurant


Create map with the "PostalCodes" clustered based on the venues

In [25]:
# create map object
map_postalCode_venues = folium.Map(location=[university[0], university[1]], zoom_start=12)

# set color scheme for the clusters
#x = np.arange(kclusters)
color_array = cm.rainbow(np.linspace(0,1,kclusters))
rainbow = [colors.rgb2hex(i) for i in color_array]

# add circles of the search radius for each postalcode
for lat, lng, clust, cnt, code, one, two, three, four, five in zip(
            df_univ['Latitude'],
            df_univ['Longitude'],
            df_univ['Cluster'],
            df_univ['Venue Count'],
            df_univ['PostalCode'],
            df_univ['1st Most Common'],
            df_univ['2nd Most Common'],
            df_univ['3rd Most Common'],
            df_univ['4th Most Common'],
            df_univ['5th Most Common']):
    folium.Circle(
        [lat, lng],
        radius=1000,
        color=rainbow[clust],
        fill=True,
        fill_opacity=0.15).add_to(map_postalCode_venues)

# add markers for the postalcodes
for lat, lng, clust, cnt, code, one, two, three, four, five in zip(
            df_univ['Latitude'],
            df_univ['Longitude'],
            df_univ['Cluster'],
            df_univ['Venue Count'],
            df_univ['PostalCode'],
            df_univ['1st Most Common'],
            df_univ['2nd Most Common'],
            df_univ['3rd Most Common'],
            df_univ['4th Most Common'],
            df_univ['5th Most Common']):
    label = folium.Popup('There are {} venues around the Postal Code {} with 5 most common venus being {}, {}, {}, {} & {}.'.format(
                        cnt, code, one, two, three, four, five), parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=rainbow[clust],
        fill=True,
        fill_opacity=0.7).add_to(map_postalCode_venues)
        
map_postalCode_venues

##### Inspect clusters

In [26]:
# cluster 1
df_univ[df_univ['Cluster'] == 0]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster,Venue Count,1st Most Common,2nd Most Common,3rd Most Common,4th Most Common,5th Most Common
3,M4X,Downtown Toronto,"St. James Town, Cabbagetown",43.667967,-79.367675,0,33,Diner,Café,Gastropub,Japanese Restaurant,Park
4,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,0,100,Coffee Shop,Japanese Restaurant,Gay Bar,Café,Park
6,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,100,Coffee Shop,Gastropub,Japanese Restaurant,Hotel,Italian Restaurant
7,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,100,Coffee Shop,Café,Restaurant,Gastropub,Hotel
8,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,100,Coffee Shop,Café,Hotel,Japanese Restaurant,Park
9,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0,100,Coffee Shop,Hotel,Café,Park,Sushi Restaurant
10,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568,0,100,Coffee Shop,Café,Hotel,Theater,Italian Restaurant
11,M5J,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.640816,-79.381752,0,100,Coffee Shop,Hotel,Café,Restaurant,Park
12,M5K,Downtown Toronto,"Toronto Dominion Centre, Design Exchange",43.647177,-79.381576,0,100,Hotel,Coffee Shop,Café,Japanese Restaurant,Restaurant
13,M5L,Downtown Toronto,"Commerce Court, Victoria Hotel",43.648198,-79.379817,0,100,Coffee Shop,Hotel,Japanese Restaurant,Restaurant,Café


In [27]:
# cluster 2
df_univ[df_univ['Cluster'] == 1]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster,Venue Count,1st Most Common,2nd Most Common,3rd Most Common,4th Most Common,5th Most Common
15,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678,1,100,Italian Restaurant,Café,Vegetarian / Vegan Restaurant,Restaurant,Gym
16,M5S,Downtown Toronto,"University of Toronto, Harbord",43.662696,-79.400049,1,100,Café,Bakery,Vegetarian / Vegan Restaurant,Coffee Shop,Bar
17,M5T,Downtown Toronto,"Kensington Market, Chinatown, Grange Park",43.653206,-79.400049,1,100,Café,Bar,Vegetarian / Vegan Restaurant,Coffee Shop,Art Gallery
21,M6G,Downtown Toronto,Christie,43.669542,-79.422564,1,100,Korean Restaurant,Café,Coffee Shop,Grocery Store,Cocktail Bar
23,M6J,West Toronto,"Little Portugal, Trinity",43.647927,-79.41975,1,100,Café,Restaurant,Bar,Coffee Shop,Bakery
24,M6K,West Toronto,"Brockton, Parkdale Village, Exhibition Place",43.636847,-79.428191,1,100,Café,Coffee Shop,Bar,Bakery,Furniture / Home Store


In [28]:
# cluster 3
df_univ[df_univ['Cluster'] == 2]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster,Venue Count,1st Most Common,2nd Most Common,3rd Most Common,4th Most Common,5th Most Common
18,M5V,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442,2,14,Café,Coffee Shop,Harbor / Marina,Track,Dog Run


In [29]:
# cluster 4
df_univ[df_univ['Cluster'] == 3]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster,Venue Count,1st Most Common,2nd Most Common,3rd Most Common,4th Most Common,5th Most Common
0,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,3,64,Grocery Store,Coffee Shop,Italian Restaurant,Gym,Restaurant
1,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,3,77,Coffee Shop,Sushi Restaurant,Thai Restaurant,Italian Restaurant,Restaurant
2,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,3,22,Park,Coffee Shop,Grocery Store,Convenience Store,Bank
5,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,3,100,Coffee Shop,Pub,Theater,Park,Café
14,M5P,Central Toronto,"Forest Hill North & West, Forest Hill Road Park",43.696948,-79.411307,3,49,Park,Café,Coffee Shop,Bank,Italian Restaurant
22,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259,3,64,Café,Coffee Shop,Park,Sushi Restaurant,Italian Restaurant
25,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,3,100,Coffee Shop,Park,Thai Restaurant,Italian Restaurant,Café


Cluster 1: many Japanese Restaurants and Hotels

Cluster 2: many Vegetarian / Vegan Restaurants and bars

Cluster 3: Harbor / Marina	and Track

Cluster 4: Italian Restaurant, Thai Restaurant, and Restraunts