# Segmenting and Clustering Neighborhoods in Toronto

## Load data by web scraping

In [1]:
!pip install folium

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes


In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import json
from pandas.io.json import json_normalize
from geopy.geocoders import Nominatim
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

Source of data comes from wikipedia  
This data contains a list of postal codes in Canada where the first letter is M.  
This Postal codes are located within the city of Toronto in the province of Ontario.

In [3]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
data  = requests.get(url).text

In [4]:
soup = BeautifulSoup(data,"html5lib")

**Put data into a dataframe**

In [5]:
table_contents=[]
table=soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

postalM = pd.DataFrame(table_contents)
postalM['Borough'] = postalM['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                                 'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                                 'EtobicokeNorthwest':'Etobicoke Northwest',
                                                 'East YorkEast Toronto':'East York/East Toronto',
                                                 'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

In [6]:
row, col =  postalM.shape
print('This data has {} rows and {} columns.'.format(row, col))

This data has 103 rows and 3 columns.


We start by importing the required libraries.  
Then, we retrieve the url that will be scraped into the variable and creating a Beautifulsoup object.  
We extract the table ignoring the 'Not assigned' in the borough column.  
Finally we place the table in the pandas dataframe.

## Adds the latitude and the longitude coordinates of each neighborhood.

In [7]:
postalM.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


**Information of the latitude and the longitude coordinate is obtained from geospatial dataset.**

In [8]:
# GeoSpatial Dataset

load = False
while load == False: # sometimes an error when trying to load
    try:
        geospatial = pd.read_csv('https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs_v1/Geospatial_Coordinates.csv')
        load = True
    except:
        load = False
geospatial.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [9]:
postalM = postalM.join(geospatial.set_index('Postal Code'), on='PostalCode')
postalM

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto Business,Enclave of M4L,43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


In [10]:
print('This dataframe has {} borough and {} neighborhood.'.format(len(postalM.Borough.unique()), postalM.shape[0]))

This dataframe has 15 borough and 103 neighborhood.


# Explore

In [11]:
postalM.Borough.value_counts()

North York                24
Scarborough               17
Downtown Toronto          17
Etobicoke                 11
Central Toronto            9
West Toronto               6
York                       5
East Toronto               4
East York                  4
Etobicoke Northwest        1
East York/East Toronto     1
Downtown Toronto Stn A     1
East Toronto Business      1
Queen's Park               1
Mississauga                1
Name: Borough, dtype: int64

Furthermore, I will only focus on segmenting and clustering North York, which has the largest number of neighborhoods.

In [12]:
northyork = postalM[postalM.Borough == 'North York'].reset_index(drop=True)
display(northyork.head(),
        'The dataframe has {} rows and {} columns'.format(northyork.shape[0], northyork.shape[1]))

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
3,M3B,North York,Don Mills North,43.745906,-79.352188
4,M6B,North York,Glencairn,43.709577,-79.445073


'The dataframe has 24 rows and 5 columns'

**Get the top 100 venues in each neighborhood within a radius of 500 meters.**  
Source: Foursquare

In [13]:
# The code was removed by Watson Studio for sharing.

In [14]:
# I use the function used in the notebook lab "Segmenting and Clustering Neighborhoods in New York City".

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [15]:
northyork_venues = getNearbyVenues(names = northyork['Neighborhood'],
                            latitudes = northyork['Latitude'],
                            longitudes = northyork['Longitude'])

Parkwoods
Victoria Village
Lawrence Manor, Lawrence Heights
Don Mills North
Glencairn
Don Mills South
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Fairview, Henry Farm, Oriole
Northwood Park, York University
Bayview Village
Downsview East
York Mills, Silver Hills
Downsview West
North Park, Maple Leaf Park, Upwood Park
Humber Summit
Willowdale, Newtonbrook
Downsview Central
Bedford Park, Lawrence Manor East
Humberlea, Emery
Willowdale South
Downsview Northwest
York Mills West
Willowdale West


**Check the results and find out how many venues each neighborhood and how many categories do we get.**

In [16]:
northyork_venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.332140,Park
1,Parkwoods,43.753259,-79.329656,Towns On The Ravine,43.754754,-79.332552,Hotel
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant
...,...,...,...,...,...,...,...
233,Willowdale West,43.782736,-79.442259,Tov-Li,43.784214,-79.446098,Pizza Place
234,Willowdale West,43.782736,-79.442259,Shoppers Drug Mart,43.784847,-79.446028,Pharmacy
235,Willowdale West,43.782736,-79.442259,Dollarama,43.784670,-79.446670,Discount Store
236,Willowdale West,43.782736,-79.442259,Tim Hortons,43.780940,-79.444231,Coffee Shop


In [17]:
display(northyork_venues.groupby('Neighborhood').count())
row, col = northyork_venues.groupby('Neighborhood').count().shape
print('There are {} venues each neighborhood'.format(row))

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor, Wilson Heights, Downsview North",21,21,21,21,21,21
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",22,22,22,22,22,22
Don Mills North,5,5,5,5,5,5
Don Mills South,20,20,20,20,20,20
Downsview Central,3,3,3,3,3,3
Downsview East,2,2,2,2,2,2
Downsview Northwest,5,5,5,5,5,5
Downsview West,5,5,5,5,5,5
"Fairview, Henry Farm, Oriole",62,62,62,62,62,62


There are 23 venues each neighborhood


In [18]:
print('There are {} categories.'.format(len(northyork_venues['Venue Category'].unique())))

There are 101 categories.


# Analyze 

In [19]:
# one hot encoding
northyork_onehot = pd.get_dummies(northyork_venues[['Venue Category']], prefix="", prefix_sep="")

northyork_onehot['Neighborhood'] = northyork_venues['Neighborhood'] 

fixed_columns = [northyork_onehot.columns[-1]] + list(northyork_onehot.columns[:-1])
northyork_onehot = northyork_onehot[fixed_columns]

display(northyork_onehot.head(),
        'The dataframe has {} rows and {} columns'.format(northyork_onehot.shape[0], northyork_onehot.shape[1]))

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,...,Spa,Sporting Goods Shop,Supermarket,Supplement Shop,Sushi Restaurant,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Vietnamese Restaurant
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


'The dataframe has 238 rows and 102 columns'

**Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category.**

In [20]:
northyork_grouped = northyork_onehot.groupby('Neighborhood').mean().reset_index()
northyork_grouped

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,...,Spa,Sporting Goods Shop,Supermarket,Supplement Shop,Sushi Restaurant,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Vietnamese Restaurant
0,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095238,...,0.0,0.0,0.047619,0.0,0.047619,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park, Lawrence Manor East",0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.045455,0.045455,0.0,0.0,0.0,0.0
3,Don Mills North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Don Mills South,0.0,0.0,0.0,0.05,0.0,0.05,0.0,0.0,0.0,...,0.0,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Downsview Central,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Downsview East,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Downsview Northwest,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Downsview West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Fairview, Henry Farm, Oriole",0.0,0.0,0.016129,0.0,0.0,0.016129,0.0,0.032258,0.032258,...,0.016129,0.032258,0.0,0.016129,0.0,0.0,0.016129,0.016129,0.016129,0.0


**Find out the top 5 most common venues in each neighborhood**

In [21]:
num_top_venues = 5

for hood in northyork_grouped['Neighborhood']:
    print(hood)
    print((len(hood) + 3) * '-')
    temp = northyork_grouped[northyork_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

Bathurst Manor, Wilson Heights, Downsview North
--------------------------------------------------
                       venue  freq
0                Coffee Shop  0.10
1                       Bank  0.10
2                Gas Station  0.05
3              Shopping Mall  0.05
4  Middle Eastern Restaurant  0.05


Bayview Village
------------------
                 venue  freq
0   Chinese Restaurant  0.25
1                 Café  0.25
2                 Bank  0.25
3  Japanese Restaurant  0.25
4    Accessories Store  0.00


Bedford Park, Lawrence Manor East
------------------------------------
                     venue  freq
0           Sandwich Place  0.09
1              Coffee Shop  0.09
2       Italian Restaurant  0.09
3         Greek Restaurant  0.05
4  Comfort Food Restaurant  0.05


Don Mills North
------------------
                  venue  freq
0                   Gym   0.2
1  Caribbean Restaurant   0.2
2                  Café   0.2
3   Japanese Restaurant   0.2
4          Dessert Sho

**Put into the dataframe**

In [22]:
# function to sort the venues in descending order
# I took this function from the notebook lab "Segmenting and Clustering Neighborhoods in New York City".

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [23]:
#create the new dataframe: the top 10 venues for each neighborhood.
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = northyork_grouped['Neighborhood']

for ind in np.arange(northyork_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(northyork_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Gas Station,Sandwich Place,Pharmacy,Pizza Place,Mobile Phone Shop,Deli / Bodega,Middle Eastern Restaurant,Ice Cream Shop
1,Bayview Village,Chinese Restaurant,Café,Bank,Japanese Restaurant,Dim Sum Restaurant,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
2,"Bedford Park, Lawrence Manor East",Italian Restaurant,Coffee Shop,Sandwich Place,Fast Food Restaurant,Restaurant,Juice Bar,Café,Butcher,Indian Restaurant,Comfort Food Restaurant
3,Don Mills North,Japanese Restaurant,Caribbean Restaurant,Café,Gym,Dessert Shop,Vietnamese Restaurant,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
4,Don Mills South,Gym,Coffee Shop,Restaurant,Sandwich Place,Italian Restaurant,Dim Sum Restaurant,Bike Shop,Beer Store,Discount Store,Clothing Store


# Cluster Neighborhoods

Run _k_-means to cluster the neighborhood into 5 clusters.

In [24]:
# set number of clusters
kclusters = 5

northyork_grouped_clustering = northyork_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(northyork_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 1, 0, 1, 0, 4, 2, 0, 0, 0], dtype=int32)

In [25]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

northyork_merged = northyork

# add latitude and longitude for each neighborhood
northyork_merged = northyork_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

northyork_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,0.0,Park,Food & Drink Shop,Hotel,Vietnamese Restaurant,Dessert Shop,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping
1,M4A,North York,Victoria Village,43.725882,-79.315572,0.0,French Restaurant,Coffee Shop,Hockey Arena,Portuguese Restaurant,Dessert Shop,Chocolate Shop,Clothing Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,0.0,Clothing Store,Furniture / Home Store,Vietnamese Restaurant,Miscellaneous Shop,Arts & Crafts Store,Boutique,Carpet Store,Coffee Shop,Accessories Store,Spa
3,M3B,North York,Don Mills North,43.745906,-79.352188,1.0,Japanese Restaurant,Caribbean Restaurant,Café,Gym,Dessert Shop,Vietnamese Restaurant,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
4,M6B,North York,Glencairn,43.709577,-79.445073,0.0,Park,Asian Restaurant,Bakery,Pizza Place,Japanese Restaurant,Vietnamese Restaurant,Dessert Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant


A data type of 'Cluster Labels' changes to float. It looks like some value is missing.

**Drop if any values are missing or not found venue information near the neighborhood from the API**

In [26]:
northyork_merged[northyork_merged.isnull().any(axis=1)]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,M2L,North York,"York Mills, Silver Hills",43.75749,-79.374714,,,,,,,,,,,


In [27]:
northyork_merged.dropna(inplace=True)
northyork_merged.reset_index(drop=True)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,0.0,Park,Food & Drink Shop,Hotel,Vietnamese Restaurant,Dessert Shop,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping
1,M4A,North York,Victoria Village,43.725882,-79.315572,0.0,French Restaurant,Coffee Shop,Hockey Arena,Portuguese Restaurant,Dessert Shop,Chocolate Shop,Clothing Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,0.0,Clothing Store,Furniture / Home Store,Vietnamese Restaurant,Miscellaneous Shop,Arts & Crafts Store,Boutique,Carpet Store,Coffee Shop,Accessories Store,Spa
3,M3B,North York,Don Mills North,43.745906,-79.352188,1.0,Japanese Restaurant,Caribbean Restaurant,Café,Gym,Dessert Shop,Vietnamese Restaurant,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
4,M6B,North York,Glencairn,43.709577,-79.445073,0.0,Park,Asian Restaurant,Bakery,Pizza Place,Japanese Restaurant,Vietnamese Restaurant,Dessert Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant
5,M3C,North York,Don Mills South,43.7259,-79.340923,0.0,Gym,Coffee Shop,Restaurant,Sandwich Place,Italian Restaurant,Dim Sum Restaurant,Bike Shop,Beer Store,Discount Store,Clothing Store
6,M2H,North York,Hillcrest Village,43.803762,-79.363452,0.0,Mediterranean Restaurant,Golf Course,Fast Food Restaurant,Dog Run,Pool,Vietnamese Restaurant,Department Store,Clothing Store,Coffee Shop,Comfort Food Restaurant
7,M3H,North York,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259,0.0,Coffee Shop,Bank,Gas Station,Sandwich Place,Pharmacy,Pizza Place,Mobile Phone Shop,Deli / Bodega,Middle Eastern Restaurant,Ice Cream Shop
8,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556,0.0,Clothing Store,Coffee Shop,Fast Food Restaurant,Japanese Restaurant,Restaurant,Juice Bar,Bank,Sporting Goods Shop,Bakery,Liquor Store
9,M3J,North York,"Northwood Park, York University",43.76798,-79.487262,0.0,Furniture / Home Store,Bar,Caribbean Restaurant,Massage Studio,Coffee Shop,Metro Station,Food & Drink Shop,Deli / Bodega,Clothing Store,French Restaurant


In [28]:
# change data type of 'Cluster Labels' back to integer
northyork_merged['Cluster Labels'] = northyork_merged['Cluster Labels'].astype(int)

**Visualize the Results.**

In [29]:
# create map
address = 'North York'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(northyork_merged['Latitude'], northyork_merged['Longitude'], northyork_merged['Neighborhood'], northyork_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examine Clusters

## Cluster 1

In [30]:
northyork_merged.loc[northyork_merged['Cluster Labels'] == 0, northyork_merged.columns[[1] + list(range(5, northyork_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,0,Park,Food & Drink Shop,Hotel,Vietnamese Restaurant,Dessert Shop,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping
1,North York,0,French Restaurant,Coffee Shop,Hockey Arena,Portuguese Restaurant,Dessert Shop,Chocolate Shop,Clothing Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
2,North York,0,Clothing Store,Furniture / Home Store,Vietnamese Restaurant,Miscellaneous Shop,Arts & Crafts Store,Boutique,Carpet Store,Coffee Shop,Accessories Store,Spa
4,North York,0,Park,Asian Restaurant,Bakery,Pizza Place,Japanese Restaurant,Vietnamese Restaurant,Dessert Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant
5,North York,0,Gym,Coffee Shop,Restaurant,Sandwich Place,Italian Restaurant,Dim Sum Restaurant,Bike Shop,Beer Store,Discount Store,Clothing Store
6,North York,0,Mediterranean Restaurant,Golf Course,Fast Food Restaurant,Dog Run,Pool,Vietnamese Restaurant,Department Store,Clothing Store,Coffee Shop,Comfort Food Restaurant
7,North York,0,Coffee Shop,Bank,Gas Station,Sandwich Place,Pharmacy,Pizza Place,Mobile Phone Shop,Deli / Bodega,Middle Eastern Restaurant,Ice Cream Shop
8,North York,0,Clothing Store,Coffee Shop,Fast Food Restaurant,Japanese Restaurant,Restaurant,Juice Bar,Bank,Sporting Goods Shop,Bakery,Liquor Store
9,North York,0,Furniture / Home Store,Bar,Caribbean Restaurant,Massage Studio,Coffee Shop,Metro Station,Food & Drink Shop,Deli / Bodega,Clothing Store,French Restaurant
13,North York,0,Grocery Store,Park,Bank,Shopping Mall,Vietnamese Restaurant,Dim Sum Restaurant,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping


## Cluster 2

In [31]:
northyork_merged.loc[northyork_merged['Cluster Labels'] == 1, northyork_merged.columns[[1] + list(range(5, northyork_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,North York,1,Japanese Restaurant,Caribbean Restaurant,Café,Gym,Dessert Shop,Vietnamese Restaurant,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
10,North York,1,Chinese Restaurant,Café,Bank,Japanese Restaurant,Dim Sum Restaurant,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store


## Cluster 3

In [32]:
northyork_merged.loc[northyork_merged['Cluster Labels'] == 2, northyork_merged.columns[[1] + list(range(5, northyork_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,North York,2,Airport,Park,Vietnamese Restaurant,Dim Sum Restaurant,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop
22,North York,2,Park,Convenience Store,Vietnamese Restaurant,Carpet Store,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop


## Cluster 4

In [33]:
northyork_merged.loc[northyork_merged['Cluster Labels'] == 3, northyork_merged.columns[[1] + list(range(5, northyork_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,North York,3,Park,Vietnamese Restaurant,Carpet Store,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop


## Cluster 5

In [34]:
northyork_merged.loc[northyork_merged['Cluster Labels'] == 4, northyork_merged.columns[[1] + list(range(5, northyork_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,North York,4,Food Truck,Business Service,Baseball Field,Vietnamese Restaurant,Dim Sum Restaurant,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
