# In this notebook, we segment the Toronto neighborhood using KNN clustering and show it on the map.

Install folium

In [44]:
!conda install -c conda-forge folium --yes

Solving environment: done

# All requested packages already installed.



Create the dataframe with Toronto neighborhood location information.

In [45]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

wiki_page = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
extract_table = BeautifulSoup(wiki_page,'lxml')
output_table = str(extract_table.table)
temp_df = pd.read_html(output_table)
neighbor_df=temp_df[0]
#neighbor_df.head(10)

temp_df1 = neighbor_df[neighbor_df.Borough != 'Not assigned']

temp_df2 = temp_df1.groupby(['Postcode','Borough'], sort=False).agg(', '.join)
temp_df2.reset_index(inplace=True)

temp_df2['Neighbourhood'] = np.where(temp_df2['Neighbourhood'] == 'Not assigned',temp_df2['Borough'], temp_df2['Neighbourhood'])
#temp_df2.head(10)
#temp_df2.shape

location_info = pd.read_csv('https://cocl.us/Geospatial_data', header=0)
location_info.rename(columns={'Postal Code':'Postcode'}, inplace=True)

final_df=pd.merge(temp_df2,location_info, on='Postcode')

final_df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Queen's Park,Queen's Park,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


We create another dataframe where each row contains Toronto in the "Borough" column.

In [46]:
only_toronto_borough = final_df[final_df['Borough'].str.contains('Toronto',regex=False)]
only_toronto_borough.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


In the following code block we apply KNN clustering to the a new dataframe "five_clusters" with 5 clusters. Since Postcode, Borough, and Neighborhood are not numbers, we drop them in the new dataframe, and only the location longitude and latitude data are sufficient for clsutering purpose. After clustering, we insert the cluster labels to the original dataframe "only_toronto_borough".

In [47]:
from sklearn.cluster import KMeans
k=5
five_clusters = only_toronto_borough.drop(['Postcode','Borough','Neighbourhood'],1)
kmeans = KMeans(n_clusters = k,random_state=0).fit(five_clusters)
only_toronto_borough.insert(0, 'Cluster Labels', kmeans.labels_)

We view the five clusters on the map

In [48]:
import folium
import matplotlib.colors as colors
import matplotlib.cm as cm
# create map
map_clusters = folium.Map(location=[43.651070,-79.347015],zoom_start=12)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, neighbourhood, cluster in zip(only_toronto_borough['Latitude'], only_toronto_borough['Longitude'], only_toronto_borough['Neighbourhood'], only_toronto_borough['Cluster Labels']):
    label = folium.Popup(' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters