# Clustering Neighborhoods and Plotting them on a map

## Installing and Importing required libraries

In [44]:
# Library installation
# Uncomment the line(s) if library(ies) is/are missing

#import sys
#!conda install -c conda-forge --yes --prefix {sys.prefix} matplotlib
#!conda install -c conda-forge --yes --prefix {sys.prefix} scikit-learn
#!conda install -c conda-forge --yes --prefix {sys.prefix} folium
#!conda install -c conda-forge --yes --prefix {sys.prefix} pywget
#!conda install -c conda-forge --yes --prefix {sys.prefix} geopy

# Importing the libraries
import pandas as pd # library for data analsysis
import pathlib # Checking local filesystem
#import matplotlib.cm as cm
#import matplotlib.colors as colors
import folium # map rendering library
from sklearn.cluster import KMeans # KNN model
from IPython.display import display

print('Libraries imported.')

Libraries imported.


<p><b>Note:</b> if you do not use Anaconda, utilize the !pip for installing python packages

## Loading & wrangling the data

<p>'GeoCoordinates.csv' was generated by the GeoCoordinates.ipynb notebook. </p>

In [16]:
neighborhoods = pd.read_csv('GeoCoordinates.csv', usecols=[1,2,3,4,5])
neighborhoods.columns = ['PostalCode', 'Borough', 'Neighborhood', 'Latitude', 'Longitude']
neighborhoods

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


<p>After inspecting the CSV file I noticed unnecessary duplicate columns generated by to_csv() from previous notebook so I only imported the relevant columns.</p>

### We are only looking for Boroughs in Toronto

In [17]:
toronto = neighborhoods[neighborhoods['Borough'].str.contains('toronto', case=False)]
toronto.reset_index(inplace=True, drop=True)
toronto.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259


### Let's do some plotting

<p>Let's see where these neighborhoods are in Toronto</p>

In [18]:
# Toronto, Roselawn coordinates
latitude = 43.679563
longitude = -79.377529

# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto['Latitude'], toronto['Longitude'], toronto['Borough'], toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## Clustering the neighbourhoods

<p>How much diffirent boroughs are there in Toronto</p>

In [34]:
toronto['Borough'].value_counts()

Downtown Toronto    19
Central Toronto      9
West Toronto         6
East Toronto         5
Name: Borough, dtype: int64

<h3>K-Nearest Neighbour Clustering Model</h3>
<p>With 4 boroughs, we have 4 categories for KNN to predict</p>

In [19]:
# set number of clusters
kclusters = 4 # Set to the number of boroughs within Toronto

toronto_coo = toronto[['Latitude', 'Longitude']]

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_coo)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([3, 3, 3, 3, 0, 3, 3, 1, 3, 1, 3, 1, 0, 3, 1, 0, 3, 0, 2, 2, 2, 2,
       1, 2, 3, 1, 2, 3, 1, 2, 3, 2, 3, 3, 3, 3, 3, 3, 0])

In [20]:
## Add the clusters to neighborhoods
#toronto.drop('Cluster Labels', axis=1, inplace=True)
toronto.insert(5, 'Cluster Labels', kmeans.labels_)

<p> Let's see how boroughs and neighborhoods fall in each category </p>

In [45]:
for i in range(4):
    result = toronto.loc[toronto['Cluster Labels'] == i]
    display(result)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,0
12,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,0
15,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572,0
17,M4M,East Toronto,Studio District,43.659526,-79.340923,0
38,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,0


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564,1
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259,1
11,M6J,West Toronto,"Little Portugal, Trinity",43.647927,-79.41975,1
14,M6K,West Toronto,"Brockton, Parkdale Village, Exhibition Place",43.636847,-79.428191,1
22,M6P,West Toronto,"High Park, The Junction South",43.661608,-79.464763,1
25,M6R,West Toronto,"Parkdale, Roncesvalles",43.64896,-79.456325,1
28,M6S,West Toronto,"Runnymede, Swansea",43.651571,-79.48445,1


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels
18,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,2
19,M5N,Central Toronto,Roselawn,43.711695,-79.416936,2
20,M4P,Central Toronto,Davisville North,43.712751,-79.390197,2
21,M5P,Central Toronto,"Forest Hill North & West, Forest Hill Road Park",43.696948,-79.411307,2
23,M4R,Central Toronto,"North Toronto West, Lawrence Park",43.715383,-79.405678,2
26,M4S,Central Toronto,Davisville,43.704324,-79.38879,2
29,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,2
31,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,2


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,3
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,3
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,3
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,3
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,3
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,3
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568,3
10,M5J,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.640816,-79.381752,3
13,M5K,Downtown Toronto,"Toronto Dominion Centre, Design Exchange",43.647177,-79.381576,3
16,M5L,Downtown Toronto,"Commerce Court, Victoria Hotel",43.648198,-79.379817,3


<p>Apart from 2 miscategorized neighborhoods, kmeans worked rather effectively - Christie & The Annex, North Midtown, Yorkville neighborhoods.</p>

## Mapping the boroughs with different colours


In [46]:
# Toronto coordinates
latitude = 43.679563
longitude =  -79.377529

# Settings color spectrum for clusters
colors = {0: 'red', 1: 'green', 2: 'blue', 3: 'yellow', 4: 'purple'}

# create map of Toronto using latitude and longitude values
map_toronto_clustered = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood, cat in zip(toronto['Latitude'], toronto['Longitude'], toronto['Borough'], toronto['Neighborhood'], toronto['Cluster Labels']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=colors[cat],
        fill=True,
        fill_color=colors[cat],
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto_clustered)  
    
map_toronto_clustered

<h2>That's it!</h2>