# **Segmenting and Clustering Neighborhoods in Toronto**

# **First Part**

In [67]:
#import nessecary package
import pandas as pd
import numpy as np
import requests
import lxml.html as lh
import geocoder
from geopy.geocoders import Nominatim
import folium
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

In [68]:
#url for data extarct
link = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [69]:
#get the data from url
page = requests.get(link)

In [70]:
#extract table from html
doc = lh.fromstring(page.content)
tr_elements = doc.xpath('//tr')

In [71]:
#extract columns list
col = []
for i in range(len(tr_elements[0])):
    col.append(tr_elements[0][i].text_content().strip('\n'))
col

['Postcode', 'Borough', 'Neighbourhood']

In [72]:
#extract table data
data = []
for i in range(1,len(tr_elements)):
    l = []
    if len(tr_elements[i])!=3:
        break
    for o in range(len(tr_elements[i])):
        l.append(tr_elements[i][o].text_content().strip('\n'))
    data.append(l)

In [73]:
#create dataframe
df = pd.DataFrame(data,columns=col)
df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


In [74]:
#remove row that borough are not assigned
df = df[df.Borough != 'Not assigned']
df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [75]:
#join row that have the same postcode
df = df.groupby(['Postcode','Borough'], sort=False).agg( ','.join)
df.reset_index(inplace=True)
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Not assigned


In [76]:
df[df.iloc[:,2] == 'Not assigned'] #check if there are any not assigned values

Unnamed: 0,Postcode,Borough,Neighbourhood
4,M7A,Queen's Park,Not assigned


In [77]:
#assign name to Neighbourhood that are not assigned
for i in range(1,len(df['Neighbourhood'])):
    if df.iloc[i][2] == 'Not assigned':
        df.iloc[i][2] = df.iloc[i][1]

In [78]:
df[df.iloc[:,2] == 'Not assigned'] #check again if there are any not assigned values

Unnamed: 0,Postcode,Borough,Neighbourhood


In [79]:
#shape of data
df.shape

(103, 3)

# **Second Part**

In [80]:
#load latitude and longitude data
geo_data = pd.read_csv('http://cocl.us/Geospatial_data')
geo_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [81]:
#merge latitude and longitude data to original dataframe
df_merged = df.merge(geo_data,how='outer',left_on='Postcode', right_on='Postal Code')

In [82]:
#drop duplicate postal code column
df_merged = df_merged.drop('Postal Code',axis=1)
df_merged

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.654260,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens,Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson,Garden District",43.657162,-79.378937


# **Third Part**

In [83]:
#find latitude and longitude of toronto
address = 'Toronto'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [84]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_merged['Latitude'], df_merged['Longitude'], df_merged['Borough'], df_merged['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [85]:
#slice only data from North York
north_york = df_merged[df_merged['Borough'] == 'North York'].reset_index(drop=True)
north_york.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
3,M3B,North York,Don Mills North,43.745906,-79.352188
4,M6B,North York,Glencairn,43.709577,-79.445073


In [86]:
#find latitude and longitude of North York
address = 'North York, Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of North York are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of North York are 43.7708175, -79.4132998.


In [87]:
# create map of North York using latitude and longitude values
map_north_york = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(north_york['Latitude'], north_york['Longitude'], north_york['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_north_york)  
    
map_north_york

In [88]:
#Define argument that need for collect data from foursquare api
CLIENT_ID = 'AXAN1LJCFHFD0V2TP14TXXZN4ABNLAOBAXH2QOPXUREMUA3D' # your Foursquare ID
CLIENT_SECRET = 'XHCUXSLLMYO0BG1TNVTU21IMJUYEHLHNCITKBIEWNK0VGZK4' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100
radius = 500
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: AXAN1LJCFHFD0V2TP14TXXZN4ABNLAOBAXH2QOPXUREMUA3D
CLIENT_SECRET:XHCUXSLLMYO0BG1TNVTU21IMJUYEHLHNCITKBIEWNK0VGZK4


In [89]:
#Define function for get venue list and categories
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
    nearby_venues = pd.DataFrame([item for x in venues_list for item in x])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [90]:
#Use function that we define earlier
north_york_venues = getNearbyVenues(names=north_york['Neighbourhood'],
                                   latitudes=north_york['Latitude'],
                                   longitudes=north_york['Longitude']
                                  )

Parkwoods
Victoria Village
Lawrence Heights,Lawrence Manor
Don Mills North
Glencairn
Flemingdon Park,Don Mills South
Hillcrest Village
Bathurst Manor,Downsview North,Wilson Heights
Fairview,Henry Farm,Oriole
Northwood Park,York University
Bayview Village
CFB Toronto,Downsview East
Silver Hills,York Mills
Downsview West
Downsview,North Park,Upwood Park
Humber Summit
Newtonbrook,Willowdale
Downsview Central
Bedford Park,Lawrence Manor East
Emery,Humberlea
Willowdale South
Downsview Northwest
York Mills West
Willowdale West


In [91]:
#shape of north york venues dataframe
print(north_york_venues.shape)

(234, 7)


In [92]:
print('There are {} uniques categories.'.format(len(north_york_venues['Venue Category'].unique())))

There are 106 uniques categories.


In [93]:
# one hot encoding
north_york_onehot = pd.get_dummies(north_york_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
north_york_onehot['Neighbourhood'] = north_york_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [north_york_onehot.columns[-1]] + north_york_onehot.columns[:-1].tolist()
north_york_onehot = north_york_onehot[fixed_columns]

north_york_onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,...,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Video Store,Vietnamese Restaurant,Wings Joint,Women's Store
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [94]:
#shape of north york after one hot encoding
north_york_onehot.shape

(234, 107)

In [95]:
#groupby neighbourhood and use mean function
north_york_grouped = north_york_onehot.groupby('Neighbourhood').mean().reset_index()
north_york_grouped

Unnamed: 0,Neighbourhood,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,...,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Video Store,Vietnamese Restaurant,Wings Joint,Women's Store
0,"Bathurst Manor,Downsview North,Wilson Heights",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,...,0.055556,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park,Lawrence Manor East",0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,...,0.041667,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CFB Toronto,Downsview East",0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Don Mills North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Downsview Central,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Downsview Northwest,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Downsview West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Downsview,North Park,Upwood Park",0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Emery,Humberlea",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [96]:
north_york_grouped.shape

(23, 107)

In [97]:
#show top venue categories
num_top_venues = 10

for hood in north_york_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = north_york_grouped[north_york_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bathurst Manor,Downsview North,Wilson Heights----
                  venue  freq
0           Coffee Shop  0.11
1           Supermarket  0.06
2         Deli / Bodega  0.06
3             Pet Store  0.06
4              Pharmacy  0.06
5           Pizza Place  0.06
6  Fast Food Restaurant  0.06
7            Restaurant  0.06
8        Sandwich Place  0.06
9         Shopping Mall  0.06


----Bayview Village----
                 venue  freq
0   Chinese Restaurant  0.25
1                 Café  0.25
2                 Bank  0.25
3  Japanese Restaurant  0.25
4    Accessories Store  0.00
5        Luggage Store  0.00
6            Pet Store  0.00
7                 Park  0.00
8        Movie Theater  0.00
9   Miscellaneous Shop  0.00


----Bedford Park,Lawrence Manor East----
                     venue  freq
0       Italian Restaurant  0.08
1     Fast Food Restaurant  0.08
2              Coffee Shop  0.08
3  Comfort Food Restaurant  0.04
4           Sandwich Place  0.04
5           Hardware Store  0.

                venue  freq
0            Pharmacy  0.25
1         Coffee Shop  0.25
2         Pizza Place  0.25
3       Grocery Store  0.25
4        Hockey Arena  0.00
5       Luggage Store  0.00
6           Pet Store  0.00
7                Park  0.00
8       Movie Theater  0.00
9  Miscellaneous Shop  0.00


----York Mills West----
                       venue  freq
0                       Park   0.5
1                       Bank   0.5
2          Accessories Store   0.0
3               Liquor Store   0.0
4                  Pet Store   0.0
5              Movie Theater   0.0
6         Miscellaneous Shop   0.0
7  Middle Eastern Restaurant   0.0
8              Metro Station   0.0
9   Mediterranean Restaurant   0.0




In [98]:
#define function to return most common venues
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [99]:
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = north_york_grouped['Neighbourhood']

for ind in np.arange(north_york_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(north_york_grouped.iloc[ind, :], num_top_venues)


neighborhoods_venues_sorted

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor,Downsview North,Wilson Heights",Coffee Shop,Frozen Yogurt Shop,Supermarket,Pet Store,Pharmacy,Pizza Place,Deli / Bodega,Bridal Shop,Restaurant,Diner
1,Bayview Village,Chinese Restaurant,Café,Bank,Japanese Restaurant,Electronics Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop,Deli / Bodega
2,"Bedford Park,Lawrence Manor East",Fast Food Restaurant,Italian Restaurant,Coffee Shop,Grocery Store,Sandwich Place,Greek Restaurant,Hardware Store,Indian Restaurant,Juice Bar,Liquor Store
3,"CFB Toronto,Downsview East",Airport,Bus Stop,Park,Women's Store,Dog Run,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop,Deli / Bodega
4,Don Mills North,Japanese Restaurant,Gym / Fitness Center,Caribbean Restaurant,Café,Baseball Field,Basketball Court,Empanada Restaurant,Construction & Landscaping,Cosmetics Shop,Deli / Bodega
5,Downsview Central,Business Service,Food Truck,Baseball Field,Women's Store,Electronics Store,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop,Deli / Bodega,Department Store
6,Downsview Northwest,Grocery Store,Gym / Fitness Center,Coffee Shop,Athletics & Sports,Electronics Store,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop,Deli / Bodega,Department Store
7,Downsview West,Grocery Store,Bank,Shopping Mall,Electronics Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop,Deli / Bodega,Department Store
8,"Downsview,North Park,Upwood Park",Construction & Landscaping,Bakery,Basketball Court,Park,Women's Store,Electronics Store,Coffee Shop,Comfort Food Restaurant,Cosmetics Shop,Deli / Bodega
9,"Emery,Humberlea",Furniture / Home Store,Baseball Field,Women's Store,Electronics Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop,Deli / Bodega,Department Store


In [100]:
# set number of clusters
kclusters = 5

north_york_grouped_clustering = north_york_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(north_york_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 0, 1, 1, 4, 4, 0, 2])

In [101]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

north_york_merged = north_york

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
north_york_merged = north_york_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')


In [102]:
#results dataframe
north_york_merged.dropna(axis=0,inplace=True)
north_york_merged.reset_index(inplace=True)
north_york_merged

Unnamed: 0,index,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,M3A,North York,Parkwoods,43.753259,-79.329656,0.0,Park,Bus Stop,Food & Drink Shop,Fast Food Restaurant,Discount Store,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop
1,1,M4A,North York,Victoria Village,43.725882,-79.315572,1.0,Coffee Shop,Hockey Arena,Portuguese Restaurant,Intersection,Women's Store,Dog Run,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop,Deli / Bodega
2,2,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763,1.0,Clothing Store,Accessories Store,Boutique,Furniture / Home Store,Event Space,Coffee Shop,Miscellaneous Shop,Shoe Store,Gift Shop,Vietnamese Restaurant
3,3,M3B,North York,Don Mills North,43.745906,-79.352188,1.0,Japanese Restaurant,Gym / Fitness Center,Caribbean Restaurant,Café,Baseball Field,Basketball Court,Empanada Restaurant,Construction & Landscaping,Cosmetics Shop,Deli / Bodega
4,4,M6B,North York,Glencairn,43.709577,-79.445073,0.0,Bakery,Pub,Japanese Restaurant,Park,Women's Store,Discount Store,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping
5,5,M3C,North York,"Flemingdon Park,Don Mills South",43.7259,-79.340923,1.0,Gym,Asian Restaurant,Coffee Shop,Beer Store,Grocery Store,Sandwich Place,Fast Food Restaurant,Italian Restaurant,Japanese Restaurant,Discount Store
6,6,M2H,North York,Hillcrest Village,43.803762,-79.363452,1.0,Golf Course,Dog Run,Pool,Athletics & Sports,Mediterranean Restaurant,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop,Deli / Bodega
7,7,M3H,North York,"Bathurst Manor,Downsview North,Wilson Heights",43.754328,-79.442259,1.0,Coffee Shop,Frozen Yogurt Shop,Supermarket,Pet Store,Pharmacy,Pizza Place,Deli / Bodega,Bridal Shop,Restaurant,Diner
8,8,M2J,North York,"Fairview,Henry Farm,Oriole",43.778517,-79.346556,1.0,Clothing Store,Fast Food Restaurant,Coffee Shop,Food Court,Toy / Game Store,Bus Station,Restaurant,Bakery,Jewelry Store,Burger Joint
9,9,M3J,North York,"Northwood Park,York University",43.76798,-79.487262,1.0,Coffee Shop,Miscellaneous Shop,Furniture / Home Store,Massage Studio,Bar,Dog Run,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop,Deli / Bodega


In [103]:
#change type of cluster columns from float to int
north_york_merged['Cluster Labels'] = north_york_merged['Cluster Labels'].astype(int)

In [104]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(north_york_merged['Latitude'], north_york_merged['Longitude'], north_york_merged['Neighbourhood'], north_york_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster],
        fill=True,
        fill_color=rainbow[cluster],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [105]:
#First cluster
north_york_merged.loc[north_york_merged['Cluster Labels'] == 0,north_york_merged.columns[[3,7,8,9,10,11]]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Parkwoods,Park,Bus Stop,Food & Drink Shop,Fast Food Restaurant,Discount Store
4,Glencairn,Bakery,Pub,Japanese Restaurant,Park,Women's Store
11,"CFB Toronto,Downsview East",Airport,Bus Stop,Park,Women's Store,Dog Run
14,"Downsview,North Park,Upwood Park",Construction & Landscaping,Bakery,Basketball Court,Park,Women's Store
21,York Mills West,Park,Bank,Electronics Store,Clothing Store,Coffee Shop


In [106]:
#Second cluster
north_york_merged.loc[north_york_merged['Cluster Labels'] == 1,north_york_merged.columns[[3,7,8,9,10,11]]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,Victoria Village,Coffee Shop,Hockey Arena,Portuguese Restaurant,Intersection,Women's Store
2,"Lawrence Heights,Lawrence Manor",Clothing Store,Accessories Store,Boutique,Furniture / Home Store,Event Space
3,Don Mills North,Japanese Restaurant,Gym / Fitness Center,Caribbean Restaurant,Café,Baseball Field
5,"Flemingdon Park,Don Mills South",Gym,Asian Restaurant,Coffee Shop,Beer Store,Grocery Store
6,Hillcrest Village,Golf Course,Dog Run,Pool,Athletics & Sports,Mediterranean Restaurant
7,"Bathurst Manor,Downsview North,Wilson Heights",Coffee Shop,Frozen Yogurt Shop,Supermarket,Pet Store,Pharmacy
8,"Fairview,Henry Farm,Oriole",Clothing Store,Fast Food Restaurant,Coffee Shop,Food Court,Toy / Game Store
9,"Northwood Park,York University",Coffee Shop,Miscellaneous Shop,Furniture / Home Store,Massage Studio,Bar
10,Bayview Village,Chinese Restaurant,Café,Bank,Japanese Restaurant,Electronics Store
12,"Silver Hills,York Mills",Cafeteria,Martial Arts Dojo,Women's Store,Dog Run,Coffee Shop


In [107]:
#Third cluster
north_york_merged.loc[north_york_merged['Cluster Labels'] == 2,north_york_merged.columns[[3,7,8,9,10,11]]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
18,"Emery,Humberlea",Furniture / Home Store,Baseball Field,Women's Store,Electronics Store,Coffee Shop


In [110]:
#Fourth cluster
north_york_merged.loc[north_york_merged['Cluster Labels'] == 3,north_york_merged.columns[[3,7,8,9,10,11]]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
15,Humber Summit,Pizza Place,Empanada Restaurant,Women's Store,Dog Run,Clothing Store


In [111]:
#Five cluster
north_york_merged.loc[north_york_merged['Cluster Labels'] == 4,north_york_merged.columns[[3,7,8,9,10,11]]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
13,Downsview West,Grocery Store,Bank,Shopping Mall,Electronics Store,Coffee Shop
20,Downsview Northwest,Grocery Store,Gym / Fitness Center,Coffee Shop,Athletics & Sports,Electronics Store
22,Willowdale West,Grocery Store,Pharmacy,Pizza Place,Coffee Shop,Caribbean Restaurant
