## Segmenting and Clustering Neighborhoods in Toronto with Geo Data

In [1]:
import pandas as pd
import numpy as np

from pandas import Series,DataFrame

import requests
from bs4 import BeautifulSoup

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering librar

In [2]:
url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
url_get = requests.get(url)
html = BeautifulSoup(url_get.text,"html.parser")
table = html.find_all('table', class_="wikitable sortable")
df = pd.read_html(str(table))[0]
df = df[df.Borough != "Not assigned"]

df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [3]:
import geocoder
#g = geocoder.google('Mountain View, CA') #google does not work
g = geocoder.arcgis('Mountain View, CA')
g = g.latlng
print(g)

[37.389670000000024, -122.08159999999998]


In [4]:
import geocoder

def get_latlon(postal_code):
    lat_lng_coords = None
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [5]:
for idx, row in df.iterrows():
    g = geocoder.arcgis('{}, Toronto, Ontario'.format(df.loc[idx,'Postal code']))   
    lat_lng_coords = g.latlng
    lat = lat_lng_coords[0]
    lng = lat_lng_coords[1]
    df.loc[idx,'Latitude'] = lat
    df.loc[idx,'Longitude'] = lng      
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
2,M3A,North York,Parkwoods,43.752935,-79.335641
3,M4A,North York,Victoria Village,43.728102,-79.31189
4,M5A,Downtown Toronto,Regent Park / Harbourfront,43.650964,-79.353041
5,M6A,North York,Lawrence Manor / Lawrence Heights,43.723265,-79.451211
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.66179,-79.38939


In order to define an instance of the geocoder, we need to define a user_agent. We will name our agent toronto_explorer, as shown below.

In [6]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [7]:
# create map of Tronto using latitude and longitude values
map_toronto = folium.Map(location=[lat, lng], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#Let's focus on the neighborhood of Toronto

In [8]:
toronto_data = df[df['Borough'].str.contains('Toronto',case=False)].reset_index(drop=True)

toronto_data.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,Regent Park / Harbourfront,43.650964,-79.353041
1,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.66179,-79.38939
2,M5B,Downtown Toronto,Garden District / Ryerson,43.657491,-79.377529
3,M5C,Downtown Toronto,St. James Town,43.651734,-79.375554
4,M4E,East Toronto,The Beaches,43.678148,-79.295349


In [9]:
# create map of Tronto using latitude and longitude values
map_toronto = folium.Map(location=[lat, lng], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Let's utilize the Foursquare API to explore the neighborhoods and segment them

In [10]:
CLIENT_ID = 'UECSPXWJJBZGG1GUPTYOCAL1124V0LYILPX1WYTYLNFRAYKX' # your Foursquare ID
CLIENT_SECRET = 'NN4J1EZZW5IRDSQDPPOJU2DMT5GJU2UWMP0IJG4Q4P1UU2UK' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [11]:
toronto_data.loc[0, 'Neighborhood']

'Regent Park / Harbourfront'

In [12]:
neighborhood_latitude = toronto_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = toronto_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = toronto_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Regent Park / Harbourfront are 43.65096410900003, -79.35304116399999.


let's get the top 100 venues that are in Regent Park / Harbourfront within a radius of 500 meters

In [13]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=UECSPXWJJBZGG1GUPTYOCAL1124V0LYILPX1WYTYLNFRAYKX&client_secret=NN4J1EZZW5IRDSQDPPOJU2DMT5GJU2UWMP0IJG4Q4P1UU2UK&v=20180605&ll=43.65096410900003,-79.35304116399999&radius=500&limit=100'

In [14]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5eaf7c1c3907e7001bc6f394'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Downtown Toronto',
  'headerFullLocation': 'Downtown Toronto, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 24,
  'suggestedBounds': {'ne': {'lat': 43.65546411350003,
    'lng': -79.34683350482591},
   'sw': {'lat': 43.646464104500026, 'lng': -79.35924882317407}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '57e0745a498ea809dbf75f68',
       'name': 'Souk Tabule',
       'location': {'address': '494 Front St E',
        'crossStreet': 'at Bayview',
        'lat': 43.65375556880743,
        'lng': -79.35439006096168,
        'labeledLatLngs

In [15]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [16]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Souk Tabule,Mediterranean Restaurant,43.653756,-79.35439
1,Young Centre for the Performing Arts,Performing Arts Venue,43.650825,-79.357593
2,SOMA chocolatemaker,Chocolate Shop,43.650622,-79.358127
3,BATLgrounds,Athletics & Sports,43.647088,-79.351306
4,Cluny Bistro & Boulangerie,French Restaurant,43.650565,-79.357843


In [17]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

24 venues were returned by Foursquare.


# Explore Neighborhoods in Toronto

Let's create a function to repeat the same process to all the neighborhoods

In [18]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [19]:
toronto_venues = getNearbyVenues(names=toronto_data['Neighborhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

Regent Park / Harbourfront
Queen's Park / Ontario Provincial Government
Garden District / Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond / Adelaide / King
Dufferin / Dovercourt Village
Harbourfront East / Union Station / Toronto Islands
Little Portugal / Trinity
The Danforth West / Riverdale
Toronto Dominion Centre / Design Exchange
Brockton / Parkdale Village / Exhibition Place
India Bazaar / The Beaches West
Commerce Court / Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West
High Park / The Junction South
North Toronto West
The Annex / North Midtown / Yorkville
Parkdale / Roncesvalles
Davisville
University of Toronto / Harbord
Runnymede / Swansea
Moore Park / Summerhill East
Kensington Market / Chinatown / Grange Park
Summerhill West / Rathnelly / South Hill / Forest Hill SE / Deer Park
CN Tower / King and Spadina / Railway Lands / Harbourfront West / Bathurst Quay / South Niagara / Island airport
Rosed

In [20]:
print(toronto_venues.shape)
toronto_venues.head()

(1598, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Regent Park / Harbourfront,43.650964,-79.353041,Souk Tabule,43.653756,-79.35439,Mediterranean Restaurant
1,Regent Park / Harbourfront,43.650964,-79.353041,Young Centre for the Performing Arts,43.650825,-79.357593,Performing Arts Venue
2,Regent Park / Harbourfront,43.650964,-79.353041,SOMA chocolatemaker,43.650622,-79.358127,Chocolate Shop
3,Regent Park / Harbourfront,43.650964,-79.353041,BATLgrounds,43.647088,-79.351306,Athletics & Sports
4,Regent Park / Harbourfront,43.650964,-79.353041,Cluny Bistro & Boulangerie,43.650565,-79.357843,French Restaurant


In [21]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,66,66,66,66,66,66
Brockton / Parkdale Village / Exhibition Place,43,43,43,43,43,43
Business reply mail Processing Centre,100,100,100,100,100,100
CN Tower / King and Spadina / Railway Lands / Harbourfront West / Bathurst Quay / South Niagara / Island airport,64,64,64,64,64,64
Central Bay Street,54,54,54,54,54,54
Christie,12,12,12,12,12,12
Church and Wellesley,86,86,86,86,86,86
Commerce Court / Victoria Hotel,100,100,100,100,100,100
Davisville,26,26,26,26,26,26
Davisville North,7,7,7,7,7,7


In [22]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 221 uniques categories.


# Analyze Each Neighborhood

In [23]:
# one hot encoding
#The get_dummies() function is used to convert categorical variable into dummy/indicator variables.
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
cols = list(toronto_onehot)
# move the column to head of list using index, pop and insert
cols.insert(0, cols.pop(cols.index('Neighborhood')))
# use loc to reorder
toronto_onehot = toronto_onehot.loc[:, cols]

toronto_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Baby Store,...,Tibetan Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,Regent Park / Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Regent Park / Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Regent Park / Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Regent Park / Harbourfront,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Regent Park / Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


 Let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [24]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Baby Store,...,Tibetan Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.015152,0.0,0.0,0.0,0.0,0.015152,0.0,...,0.0,0.0,0.0,0.0,0.015152,0.0,0.0,0.0,0.0,0.015152
1,Brockton / Parkdale Village / Exhibition Place,0.023256,0.0,0.023256,0.0,0.023256,0.0,0.0,0.0,0.0,...,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Business reply mail Processing Centre,0.0,0.02,0.01,0.0,0.0,0.03,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0
3,CN Tower / King and Spadina / Railway Lands / ...,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.018519,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.018519,0.018519,0.0,0.0,0.0
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.083333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.0,0.011628,0.0,0.0,0.011628,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011628
7,Commerce Court / Victoria Hotel,0.0,0.04,0.01,0.0,0.0,0.01,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01
8,Davisville,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
toronto_grouped.shape

(38, 221)

Let's print each neighborhood along with the top 5 most common venues

In [26]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
            venue  freq
0     Coffee Shop  0.09
1    Cocktail Bar  0.05
2            Café  0.03
3  Breakfast Spot  0.03
4      Restaurant  0.03


----Brockton / Parkdale Village / Exhibition Place----
                    venue  freq
0             Coffee Shop  0.09
1                    Café  0.07
2               Gift Shop  0.05
3  Thrift / Vintage Store  0.05
4                   Diner  0.02


----Business reply mail Processing Centre----
                 venue  freq
0          Coffee Shop  0.07
1                Hotel  0.05
2  Japanese Restaurant  0.04
3                 Café  0.04
4     Asian Restaurant  0.03


----CN Tower / King and Spadina / Railway Lands / Harbourfront West / Bathurst Quay / South Niagara / Island airport----
               venue  freq
0        Coffee Shop  0.06
1               Café  0.06
2               Park  0.05
3  French Restaurant  0.05
4         Restaurant  0.05


----Central Bay Street----
                       venue  freq
0               

In [27]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [28]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Breakfast Spot,Beer Bar,Seafood Restaurant,Lounge,Bakery,Café,Restaurant,Hotel
1,Brockton / Parkdale Village / Exhibition Place,Coffee Shop,Café,Gift Shop,Thrift / Vintage Store,Accessories Store,Chiropractor,Boutique,Brewery,Italian Restaurant,Japanese Restaurant
2,Business reply mail Processing Centre,Coffee Shop,Hotel,Café,Japanese Restaurant,Restaurant,Asian Restaurant,Bookstore,Sandwich Place,Salon / Barbershop,Steakhouse
3,CN Tower / King and Spadina / Railway Lands / ...,Café,Coffee Shop,Restaurant,French Restaurant,Park,Bar,Italian Restaurant,Japanese Restaurant,Lounge,Speakeasy
4,Central Bay Street,Coffee Shop,Café,Middle Eastern Restaurant,Breakfast Spot,Restaurant,Sandwich Place,Bubble Tea Shop,Italian Restaurant,Plaza,Clothing Store


# Cluster Neighborhoods

Run k-means to cluster the neighborhood into 5 clusters.

In [29]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3])

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [30]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,Regent Park / Harbourfront,43.650964,-79.353041,3.0,Pub,Café,Athletics & Sports,Mediterranean Restaurant,Thai Restaurant,Seafood Restaurant,Mexican Restaurant,Food Truck,French Restaurant,Chocolate Shop
1,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.66179,-79.38939,3.0,Coffee Shop,Sushi Restaurant,Café,Creperie,Pharmacy,Diner,Discount Store,Distribution Center,College Auditorium,Italian Restaurant
2,M5B,Downtown Toronto,Garden District / Ryerson,43.657491,-79.377529,3.0,Coffee Shop,Clothing Store,Middle Eastern Restaurant,Sandwich Place,Café,Restaurant,Cosmetics Shop,Italian Restaurant,Hotel,Bar
3,M5C,Downtown Toronto,St. James Town,43.651734,-79.375554,3.0,Coffee Shop,Café,Cosmetics Shop,American Restaurant,Gastropub,Seafood Restaurant,Cocktail Bar,Hotel,Clothing Store,Restaurant
4,M4E,East Toronto,The Beaches,43.678148,-79.295349,3.0,Health Food Store,Trail,Pub,Donut Shop,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm


In [31]:
toronto_merged.shape

(39, 16)

Finally, let's visualize the resulting clusters

In [32]:
toronto_merged.isna().any()

Postal code               False
Borough                   False
Neighborhood              False
Latitude                  False
Longitude                 False
Cluster Labels             True
1st Most Common Venue      True
2nd Most Common Venue      True
3rd Most Common Venue      True
4th Most Common Venue      True
5th Most Common Venue      True
6th Most Common Venue      True
7th Most Common Venue      True
8th Most Common Venue      True
9th Most Common Venue      True
10th Most Common Venue     True
dtype: bool

In [33]:
# Find which columns have NaN
toronto_merged.columns[toronto_merged.isna().any()].tolist()

['Cluster Labels',
 '1st Most Common Venue',
 '2nd Most Common Venue',
 '3rd Most Common Venue',
 '4th Most Common Venue',
 '5th Most Common Venue',
 '6th Most Common Venue',
 '7th Most Common Venue',
 '8th Most Common Venue',
 '9th Most Common Venue',
 '10th Most Common Venue']

In [34]:
#Find the columns containing at least one NaN value:
toronto_merged.loc[:, toronto_merged.isna().any()]

Unnamed: 0,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,3.0,Pub,Café,Athletics & Sports,Mediterranean Restaurant,Thai Restaurant,Seafood Restaurant,Mexican Restaurant,Food Truck,French Restaurant,Chocolate Shop
1,3.0,Coffee Shop,Sushi Restaurant,Café,Creperie,Pharmacy,Diner,Discount Store,Distribution Center,College Auditorium,Italian Restaurant
2,3.0,Coffee Shop,Clothing Store,Middle Eastern Restaurant,Sandwich Place,Café,Restaurant,Cosmetics Shop,Italian Restaurant,Hotel,Bar
3,3.0,Coffee Shop,Café,Cosmetics Shop,American Restaurant,Gastropub,Seafood Restaurant,Cocktail Bar,Hotel,Clothing Store,Restaurant
4,3.0,Health Food Store,Trail,Pub,Donut Shop,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm
5,3.0,Coffee Shop,Cocktail Bar,Breakfast Spot,Beer Bar,Seafood Restaurant,Lounge,Bakery,Café,Restaurant,Hotel
6,3.0,Coffee Shop,Café,Middle Eastern Restaurant,Breakfast Spot,Restaurant,Sandwich Place,Bubble Tea Shop,Italian Restaurant,Plaza,Clothing Store
7,3.0,Grocery Store,Café,Baby Store,Candy Store,Athletics & Sports,Coffee Shop,Park,Playground,Fish Market,Fish & Chips Shop
8,3.0,Coffee Shop,Café,Clothing Store,Restaurant,Sushi Restaurant,Thai Restaurant,Deli / Bodega,Salad Place,Hotel,Gym
9,3.0,Furniture / Home Store,Park,Pharmacy,Grocery Store,Liquor Store,Brazilian Restaurant,Pool,Bank,Bakery,Café


In [35]:
#Delete the row with NaN values
toronto_merged = toronto_merged.drop([19], axis=0)

In [36]:
toronto_merged.shape

(38, 16)

In [37]:
toronto_merged

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,Regent Park / Harbourfront,43.650964,-79.353041,3.0,Pub,Café,Athletics & Sports,Mediterranean Restaurant,Thai Restaurant,Seafood Restaurant,Mexican Restaurant,Food Truck,French Restaurant,Chocolate Shop
1,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.66179,-79.38939,3.0,Coffee Shop,Sushi Restaurant,Café,Creperie,Pharmacy,Diner,Discount Store,Distribution Center,College Auditorium,Italian Restaurant
2,M5B,Downtown Toronto,Garden District / Ryerson,43.657491,-79.377529,3.0,Coffee Shop,Clothing Store,Middle Eastern Restaurant,Sandwich Place,Café,Restaurant,Cosmetics Shop,Italian Restaurant,Hotel,Bar
3,M5C,Downtown Toronto,St. James Town,43.651734,-79.375554,3.0,Coffee Shop,Café,Cosmetics Shop,American Restaurant,Gastropub,Seafood Restaurant,Cocktail Bar,Hotel,Clothing Store,Restaurant
4,M4E,East Toronto,The Beaches,43.678148,-79.295349,3.0,Health Food Store,Trail,Pub,Donut Shop,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm
5,M5E,Downtown Toronto,Berczy Park,43.645196,-79.373855,3.0,Coffee Shop,Cocktail Bar,Breakfast Spot,Beer Bar,Seafood Restaurant,Lounge,Bakery,Café,Restaurant,Hotel
6,M5G,Downtown Toronto,Central Bay Street,43.656072,-79.385653,3.0,Coffee Shop,Café,Middle Eastern Restaurant,Breakfast Spot,Restaurant,Sandwich Place,Bubble Tea Shop,Italian Restaurant,Plaza,Clothing Store
7,M6G,Downtown Toronto,Christie,43.668602,-79.420387,3.0,Grocery Store,Café,Baby Store,Candy Store,Athletics & Sports,Coffee Shop,Park,Playground,Fish Market,Fish & Chips Shop
8,M5H,Downtown Toronto,Richmond / Adelaide / King,43.650542,-79.384116,3.0,Coffee Shop,Café,Clothing Store,Restaurant,Sushi Restaurant,Thai Restaurant,Deli / Bodega,Salad Place,Hotel,Gym
9,M6H,West Toronto,Dufferin / Dovercourt Village,43.66491,-79.438664,3.0,Furniture / Home Store,Park,Pharmacy,Grocery Store,Liquor Store,Brazilian Restaurant,Pool,Bank,Bakery,Café


In [38]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        #color=rainbow[cluster-1], # this cause an error in Python3
        color=rainbow[int(cluster)-1],
        fill=True,
        #fill_color=rainbow[cluster-1], # this cause an error in Python3
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examine Clusters

In [39]:
toronto_merged

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,Regent Park / Harbourfront,43.650964,-79.353041,3.0,Pub,Café,Athletics & Sports,Mediterranean Restaurant,Thai Restaurant,Seafood Restaurant,Mexican Restaurant,Food Truck,French Restaurant,Chocolate Shop
1,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.66179,-79.38939,3.0,Coffee Shop,Sushi Restaurant,Café,Creperie,Pharmacy,Diner,Discount Store,Distribution Center,College Auditorium,Italian Restaurant
2,M5B,Downtown Toronto,Garden District / Ryerson,43.657491,-79.377529,3.0,Coffee Shop,Clothing Store,Middle Eastern Restaurant,Sandwich Place,Café,Restaurant,Cosmetics Shop,Italian Restaurant,Hotel,Bar
3,M5C,Downtown Toronto,St. James Town,43.651734,-79.375554,3.0,Coffee Shop,Café,Cosmetics Shop,American Restaurant,Gastropub,Seafood Restaurant,Cocktail Bar,Hotel,Clothing Store,Restaurant
4,M4E,East Toronto,The Beaches,43.678148,-79.295349,3.0,Health Food Store,Trail,Pub,Donut Shop,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm
5,M5E,Downtown Toronto,Berczy Park,43.645196,-79.373855,3.0,Coffee Shop,Cocktail Bar,Breakfast Spot,Beer Bar,Seafood Restaurant,Lounge,Bakery,Café,Restaurant,Hotel
6,M5G,Downtown Toronto,Central Bay Street,43.656072,-79.385653,3.0,Coffee Shop,Café,Middle Eastern Restaurant,Breakfast Spot,Restaurant,Sandwich Place,Bubble Tea Shop,Italian Restaurant,Plaza,Clothing Store
7,M6G,Downtown Toronto,Christie,43.668602,-79.420387,3.0,Grocery Store,Café,Baby Store,Candy Store,Athletics & Sports,Coffee Shop,Park,Playground,Fish Market,Fish & Chips Shop
8,M5H,Downtown Toronto,Richmond / Adelaide / King,43.650542,-79.384116,3.0,Coffee Shop,Café,Clothing Store,Restaurant,Sushi Restaurant,Thai Restaurant,Deli / Bodega,Salad Place,Hotel,Gym
9,M6H,West Toronto,Dufferin / Dovercourt Village,43.66491,-79.438664,3.0,Furniture / Home Store,Park,Pharmacy,Grocery Store,Liquor Store,Brazilian Restaurant,Pool,Bank,Bakery,Café


Let's examine each cluster from 1 to 5

Cluster 1

In [40]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,West Toronto,0.0,Convenience Store,Park,Gas Station,Sandwich Place,Metro Station,Tennis Court,Eastern European Restaurant,Fish Market,Fish & Chips Shop,Fast Food Restaurant
29,Central Toronto,0.0,Convenience Store,Gym,Park,Donut Shop,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm


Cluster 2

In [41]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
23,Central Toronto,1.0,Playground,Gym Pool,Park,Garden,Yoga Studio,Eastern European Restaurant,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market
33,Downtown Toronto,1.0,Playground,Park,Grocery Store,Candy Store,Eastern European Restaurant,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market


Cluster 3

In [42]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,Central Toronto,2.0,Furniture / Home Store,Park,Yoga Studio,Eastern European Restaurant,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm


Cluster 4

In [43]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,3.0,Pub,Café,Athletics & Sports,Mediterranean Restaurant,Thai Restaurant,Seafood Restaurant,Mexican Restaurant,Food Truck,French Restaurant,Chocolate Shop
1,Downtown Toronto,3.0,Coffee Shop,Sushi Restaurant,Café,Creperie,Pharmacy,Diner,Discount Store,Distribution Center,College Auditorium,Italian Restaurant
2,Downtown Toronto,3.0,Coffee Shop,Clothing Store,Middle Eastern Restaurant,Sandwich Place,Café,Restaurant,Cosmetics Shop,Italian Restaurant,Hotel,Bar
3,Downtown Toronto,3.0,Coffee Shop,Café,Cosmetics Shop,American Restaurant,Gastropub,Seafood Restaurant,Cocktail Bar,Hotel,Clothing Store,Restaurant
4,East Toronto,3.0,Health Food Store,Trail,Pub,Donut Shop,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm
5,Downtown Toronto,3.0,Coffee Shop,Cocktail Bar,Breakfast Spot,Beer Bar,Seafood Restaurant,Lounge,Bakery,Café,Restaurant,Hotel
6,Downtown Toronto,3.0,Coffee Shop,Café,Middle Eastern Restaurant,Breakfast Spot,Restaurant,Sandwich Place,Bubble Tea Shop,Italian Restaurant,Plaza,Clothing Store
7,Downtown Toronto,3.0,Grocery Store,Café,Baby Store,Candy Store,Athletics & Sports,Coffee Shop,Park,Playground,Fish Market,Fish & Chips Shop
8,Downtown Toronto,3.0,Coffee Shop,Café,Clothing Store,Restaurant,Sushi Restaurant,Thai Restaurant,Deli / Bodega,Salad Place,Hotel,Gym
9,West Toronto,3.0,Furniture / Home Store,Park,Pharmacy,Grocery Store,Liquor Store,Brazilian Restaurant,Pool,Bank,Bakery,Café


Cluster 5

In [44]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Central Toronto,4.0,Construction & Landscaping,Bus Line,Swim School,Yoga Studio,Electronics Store,Flower Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market
