# Toronto Neighborhood Clustering

In [1]:
import requests
import pandas as pd
import numpy as np
#!pip install geocoder
import geocoder
from geopy.geocoders import Nominatim
import folium
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

## Q1

In [2]:
# Request and check if 200 (able to connect)
data = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
data

<Response [200]>

In [3]:
lst = pd.read_html(data.text)
lst

[               0                 1  \
 0    Postal Code           Borough   
 1            M1A      Not assigned   
 2            M2A      Not assigned   
 3            M3A        North York   
 4            M4A        North York   
 5            M5A  Downtown Toronto   
 6            M6A        North York   
 7            M7A  Downtown Toronto   
 8            M8A      Not assigned   
 9            M9A         Etobicoke   
 10           M1B       Scarborough   
 11           M2B      Not assigned   
 12           M3B        North York   
 13           M4B         East York   
 14           M5B  Downtown Toronto   
 15           M6B        North York   
 16           M7B      Not assigned   
 17           M8B      Not assigned   
 18           M9B         Etobicoke   
 19           M1C       Scarborough   
 20           M2C      Not assigned   
 21           M3C        North York   
 22           M4C         East York   
 23           M5C  Downtown Toronto   
 24           M6C        

In [4]:
# Get table of interest - first list element to df
df = lst[0]
df.head()

#Rename columns as first row
df = df.rename(columns=df.iloc[0]).drop(df.index[0])

print(df.shape)
df.head()

(180, 3)


Unnamed: 0,Postal Code,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [5]:
# Dropping samples 'Not assigned'
df = df[df['Borough'] != 'Not assigned']
print(df.shape)
df['Borough'].value_counts(dropna=False)

(103, 3)


North York          24
Downtown Toronto    19
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
East York            5
East Toronto         5
York                 5
Mississauga          1
Name: Borough, dtype: int64

In [6]:
df.isnull().sum()

Postal Code      0
Borough          0
Neighbourhood    0
dtype: int64

In [7]:
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,"Regent Park, Harbourfront"
6,M6A,North York,"Lawrence Manor, Lawrence Heights"
7,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
9,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
10,M1B,Scarborough,"Malvern, Rouge"
12,M3B,North York,Don Mills
13,M4B,East York,"Parkview Hill, Woodbine Gardens"
14,M5B,Downtown Toronto,"Garden District, Ryerson"


In [8]:
#Grouping by postal code , reset index
df = df.groupby(['Postal Code']).head()
df.reset_index(inplace=True, drop=True)
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [9]:
# Neighborhoods as 'Not assigned'
df.Neighbourhood.str.count('Not assigned').sum()

0

### Result Q1

In [10]:
df.shape

(103, 3)

## Q2

In [11]:
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [12]:
# Function to get coordinates with geocoder

def get_latlng(postal):
    lat_lng_coords = []
    geo = geocoder.arcgis('{}, Toronto, Ontario'.format(postal))
    coords = geo.latlng
    return coords

In [13]:
# Saving coordinates per postal code
postal_codes = df['Postal Code'].tolist()    
latlon = [get_latlng(postal) for postal in postal_codes]
latlon

[[43.75245000000007, -79.32990999999998],
 [43.73057000000006, -79.31305999999995],
 [43.65512000000007, -79.36263999999994],
 [43.72327000000007, -79.45041999999995],
 [43.66253000000006, -79.39187999999996],
 [43.662630000000036, -79.52830999999998],
 [43.811390000000074, -79.19661999999994],
 [43.74923000000007, -79.36185999999998],
 [43.70718000000005, -79.31191999999999],
 [43.65739000000008, -79.37803999999994],
 [43.70687000000004, -79.44811999999996],
 [43.65034000000003, -79.55361999999997],
 [43.78574000000003, -79.15874999999994],
 [43.72168000000005, -79.34351999999996],
 [43.68970000000007, -79.30681999999996],
 [43.65215000000006, -79.37586999999996],
 [43.69211000000007, -79.43035999999995],
 [43.64857000000006, -79.57824999999997],
 [43.765750000000025, -79.17469999999997],
 [43.67709000000008, -79.29546999999997],
 [43.64536000000004, -79.37305999999995],
 [43.68784000000005, -79.45045999999996],
 [43.76812000000007, -79.21760999999998],
 [43.709020000000066, -79.36348

In [14]:
# coord df
df_coords = pd.DataFrame(latlon, columns=['Lat', 'Lon'])
df_coords

Unnamed: 0,Lat,Lon
0,43.75245,-79.32991
1,43.73057,-79.31306
2,43.65512,-79.36264
3,43.72327,-79.45042
4,43.66253,-79.39188
5,43.66263,-79.52831
6,43.81139,-79.19662
7,43.74923,-79.36186
8,43.70718,-79.31192
9,43.65739,-79.37804


In [15]:
# Coordinates to main df
df['Latitude'] = df_coords['Lat']
df['Longitude'] = df_coords['Lon']
df

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.75245,-79.32991
1,M4A,North York,Victoria Village,43.73057,-79.31306
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66253,-79.39188
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.66263,-79.52831
6,M1B,Scarborough,"Malvern, Rouge",43.81139,-79.19662
7,M3B,North York,Don Mills,43.74923,-79.36186
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.70718,-79.31192
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.65739,-79.37804


In [16]:
# Check
df[df['Postal Code'] == 'M9C']

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
17,M9C,Etobicoke,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",43.64857,-79.57825


## Q3 - Borough with 'Toronto' - cluster by venues

In [17]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df['Borough'].unique()),
        df.shape[0]
    )
)

The dataframe has 10 boroughs and 103 neighborhoods.


In [18]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent='toronto_explorer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [19]:
# Toronto Map
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# Markers
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
# Show map
map_toronto


Foursquare API Credentials :

In [20]:
CLIENT_ID = 'HB51MKBUMPRZKFXO3BEAZRKHSSQPM4R4P5OHHFDRRZBLXC0W' # your Foursquare ID
CLIENT_SECRET = 'PCYFYUQ3QA4OT5PXTE2MA4YQS0KNDMVVBSLNOAR5HKLO5HLU' # your Foursquare Secret
VERSION = '20200605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: HB51MKBUMPRZKFXO3BEAZRKHSSQPM4R4P5OHHFDRRZBLXC0W
CLIENT_SECRET:PCYFYUQ3QA4OT5PXTE2MA4YQS0KNDMVVBSLNOAR5HKLO5HLU


In [24]:
# Function to get nearby venues - radius = 500 m
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius
            )
            
        # make the GET request
        results = requests.get(url).json()['response']['groups'][0]['items']
        
        # return relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Category']
    
    return(nearby_venues)

In [25]:
# Toronto venues to df
toronto_venues = getNearbyVenues(names=df['Neighbourhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue, Humber Valley Village
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto, Broadview North (Old East York)
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmo

In [26]:
print(toronto_venues.shape)
toronto_venues.head()

(1367, 5)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Category
0,Parkwoods,43.75245,-79.32991,Brookbanks Park,Park
1,Parkwoods,43.75245,-79.32991,Variety Store,Food & Drink Shop
2,Victoria Village,43.73057,-79.31306,Wigmore Park,Park
3,Victoria Village,43.73057,-79.31306,Memories of Africa,Grocery Store
4,"Regent Park, Harbourfront",43.65512,-79.36264,Roselle Desserts,Bakery


In [27]:
# Venues per neighborhood
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Agincourt,13,13,13,13
"Alderwood, Long Branch",4,4,4,4
"Bathurst Manor, Wilson Heights, Downsview North",2,2,2,2
Bayview Village,4,4,4,4
"Bedford Park, Lawrence Manor East",21,21,21,21
Berczy Park,30,30,30,30
"Birch Cliff, Cliffside West",4,4,4,4
"Brockton, Parkdale Village, Exhibition Place",30,30,30,30
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",30,30,30,30
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",30,30,30,30


In [28]:
# Unique categories from venues
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 223 uniques categories.


## Analysis per Neighbourhood


In [29]:
# one hot encoding for venue categories
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to one-hot dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

print(toronto_onehot.shape)
toronto_onehot.head()

(1367, 224)


Unnamed: 0,Neighbourhood,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
# Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighbourhood,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [31]:
#Top 5 most common venues per neighborhood

num_top_venues = 5

for hood in toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq'] # columns names of temp df
    temp = temp.iloc[1:] # filling with data
    temp['freq'] = temp['freq'].astype(float) # dtype to float
    temp = temp.round({'freq': 2}) # round data to 2 decimal
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues)) #sorting by descending
    print('\n')

----Agincourt----
              venue  freq
0  Department Store  0.08
1     Grocery Store  0.08
2     Shopping Mall  0.08
3   Bubble Tea Shop  0.08
4       Supermarket  0.08


----Alderwood, Long Branch----
                   venue  freq
0      Convenience Store  0.25
1  Performing Arts Venue  0.25
2                    Gym  0.25
3                    Pub  0.25
4          Movie Theater  0.00


----Bathurst Manor, Wilson Heights, Downsview North----
                 venue  freq
0          Men's Store   0.5
1     Business Service   0.5
2  American Restaurant   0.0
3            Pet Store   0.0
4  Martial Arts School   0.0


----Bayview Village----
                        venue  freq
0                       Trail  0.50
1  Construction & Landscaping  0.25
2                        Park  0.25
3         American Restaurant  0.00
4               Movie Theater  0.00


----Bedford Park, Lawrence Manor East----
                venue  freq
0  Italian Restaurant  0.10
1      Sandwich Place  0.10
2    

In [32]:
# function to sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:] # data selection
    row_categories_sorted = row_categories.sort_values(ascending=False) #sorting,descending
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [33]:
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Agincourt,Grocery Store,Hong Kong Restaurant,Skating Rink,Bubble Tea Shop,Supermarket
1,"Alderwood, Long Branch",Performing Arts Venue,Gym,Convenience Store,Pub,Doctor's Office
2,"Bathurst Manor, Wilson Heights, Downsview North",Men's Store,Business Service,Yoga Studio,Convenience Store,Field
3,Bayview Village,Trail,Park,Construction & Landscaping,Cosmetics Shop,Creperie
4,"Bedford Park, Lawrence Manor East",Sandwich Place,Italian Restaurant,Coffee Shop,Thai Restaurant,Liquor Store


# Clustering - KMean

In [34]:
# number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 2, 0, 1, 1, 1, 1, 1, 1])

In [35]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head() 

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M3A,North York,Parkwoods,43.75245,-79.32991,0.0,Park,Food & Drink Shop,Yoga Studio,Dog Run,Fast Food Restaurant
1,M4A,North York,Victoria Village,43.73057,-79.31306,0.0,Park,Grocery Store,Yoga Studio,Doctor's Office,Fast Food Restaurant
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264,1.0,Coffee Shop,Breakfast Spot,Yoga Studio,Thai Restaurant,Italian Restaurant
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042,1.0,Clothing Store,Cosmetics Shop,Women's Store,Food Court,Restaurant
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66253,-79.39188,1.0,Coffee Shop,Sandwich Place,Fried Chicken Joint,Falafel Restaurant,Café


In [36]:
toronto_merged.head() 

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M3A,North York,Parkwoods,43.75245,-79.32991,0.0,Park,Food & Drink Shop,Yoga Studio,Dog Run,Fast Food Restaurant
1,M4A,North York,Victoria Village,43.73057,-79.31306,0.0,Park,Grocery Store,Yoga Studio,Doctor's Office,Fast Food Restaurant
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264,1.0,Coffee Shop,Breakfast Spot,Yoga Studio,Thai Restaurant,Italian Restaurant
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042,1.0,Clothing Store,Cosmetics Shop,Women's Store,Food Court,Restaurant
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66253,-79.39188,1.0,Coffee Shop,Sandwich Place,Fried Chicken Joint,Falafel Restaurant,Café


In [42]:
toronto_merged.isnull().sum()

Postal Code              0
Borough                  0
Neighbourhood            0
Latitude                 0
Longitude                0
Cluster Labels           2
1st Most Common Venue    2
2nd Most Common Venue    2
3rd Most Common Venue    2
4th Most Common Venue    2
5th Most Common Venue    2
dtype: int64

In [44]:
toronto_merged.dropna(subset=['Cluster Labels'], inplace=True)
toronto_merged.isnull().sum()

Postal Code              0
Borough                  0
Neighbourhood            0
Latitude                 0
Longitude                0
Cluster Labels           0
1st Most Common Venue    0
2nd Most Common Venue    0
3rd Most Common Venue    0
4th Most Common Venue    0
5th Most Common Venue    0
dtype: int64

In [46]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup('Cluster ' + str(int(cluster) +1) + '\n' + str(poi) , parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)]
        ).add_to(map_clusters)
        
map_clusters

# Toronto neighbourhoods clusted by venue categories

In [54]:
# Cluster 1
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,North York,0.0,Park,Food & Drink Shop,Yoga Studio,Dog Run,Fast Food Restaurant
1,North York,0.0,Park,Grocery Store,Yoga Studio,Doctor's Office,Fast Food Restaurant
16,York,0.0,Park,Hockey Arena,Trail,Field,Grocery Store
18,Scarborough,0.0,Construction & Landscaping,Gym / Fitness Center,Park,Doctor's Office,Fast Food Restaurant
22,Scarborough,0.0,Park,Korean BBQ Restaurant,Coffee Shop,Business Service,Dog Run
27,North York,0.0,Park,Bus Stop,Residential Building (Apartment / Condo),Doctor's Office,Farmers Market
32,Scarborough,0.0,Spa,Restaurant,Grocery Store,Park,Indian Restaurant
35,East York,0.0,Park,Playground,Intersection,Doctor's Office,Fast Food Restaurant
39,North York,0.0,Trail,Park,Construction & Landscaping,Cosmetics Shop,Creperie
49,North York,0.0,Park,Bakery,Basketball Court,Field,Fast Food Restaurant


In [55]:
# Cluster 2
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
2,Downtown Toronto,1.0,Coffee Shop,Breakfast Spot,Yoga Studio,Thai Restaurant,Italian Restaurant
3,North York,1.0,Clothing Store,Cosmetics Shop,Women's Store,Food Court,Restaurant
4,Downtown Toronto,1.0,Coffee Shop,Sandwich Place,Fried Chicken Joint,Falafel Restaurant,Café
5,Etobicoke,1.0,Pharmacy,Park,Shopping Mall,Café,Bank
7,North York,1.0,Intersection,Coffee Shop,Soccer Field,Spa,Supermarket
8,East York,1.0,Pizza Place,Pharmacy,Intersection,Rock Climbing Spot,Bank
9,Downtown Toronto,1.0,Café,Ramen Restaurant,Coffee Shop,Theater,Lounge
10,North York,1.0,Grocery Store,Pizza Place,Gas Station,Pub,Mediterranean Restaurant
11,Etobicoke,1.0,Pizza Place,Tea Room,Sandwich Place,Chinese Restaurant,Yoga Studio
12,Scarborough,1.0,Home Service,Moving Target,Bar,Yoga Studio,Donut Shop


In [56]:
# Cluster 3
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
28,North York,2.0,Men's Store,Business Service,Yoga Studio,Convenience Store,Field


In [57]:
# Cluster 4
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
45,North York,3.0,Park,Yoga Studio,Dog Run,Field,Fast Food Restaurant
68,Central Toronto,3.0,Park,Yoga Studio,Dog Run,Field,Fast Food Restaurant


In [None]:
# Cluster 5
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]