# Clustering and Analyzing Toronto Neighbourhood Data

### This final notebook is used to cluster and analyze the Toronto scraped neighbourhood data

#### Import all necessary libraries

In [1]:
import pandas as pd
import folium
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors
from geopy.geocoders import Nominatim

#### Import Data and Prepare for Clustering

In [2]:
TO_geo_data = pd.read_csv('TO_Geo_Data.csv')
TO_geo_data.drop(['Unnamed: 0'], axis='columns', inplace=True)
TO_geo_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494


In [4]:
TO_geo_data = TO_geo_data[TO_geo_data['Borough'].str.contains('Toronto')] #using only Borough's with "Toronto"
TO_geo_data = TO_geo_data.reset_index(drop=True)
TO_geo_data = TO_geo_data.drop([9, 34, 38])

#### Cluster and Visualize Data 

In [5]:
TO_geo_data['Borough'].value_counts() #count the different Boroughs in Toronto

Downtown Toronto    17
Central Toronto      9
West Toronto         6
East Toronto         4
Name: Borough, dtype: int64

In [6]:
TO_geo_data['Label'] = TO_geo_data['Borough'].replace(to_replace=['Downtown Toronto','Central Toronto','West Toronto','East Toronto'], 
                                                      value=[1,2,3,4], inplace=False) #create a new column with 4 clusters for each area of Toronto
TO_geo_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Label
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,1
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,1
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,1
3,M4E,East Toronto,The Beaches,43.676357,-79.293031,4
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,1


In [12]:
address = 'Toronto' #Use the nominatim and geolocator package to find the coordinates of Toronto to create clusters against
geo_locator = Nominatim(user_agent="toronto_explorer")
loc = geo_locator.geocode(address)
lat = loc.latitude
long = loc.longitude
print(f'Toronto Coodinates are: {lat}, {long}.')

Toronto Coodinates are: 43.6534817, -79.3839347.


In [15]:
kclusters= 4 #We have 4 clusters as per the above Boroughs

toronto_map = folium.Map(location = [lat, long], zoom_start = 13) #creating Map using Folium package

x = np.arange(kclusters) #setting color scheme for each cluster
y = [i + x + (i*x)**2 for i in range(kclusters)]
color_array = cm.rainbow(np.linspace(0, 1, len(y)))
color_rainbow = [colors.rgb2hex(i) for i in color_array]

marker_color = [] #adding markers for each centroid
for lat, lon, cluster in zip(TO_geo_data['Latitude'], TO_geo_data['Longitude'], TO_geo_data['Label']):
    label = folium.Popup(str(TO_geo_data['Borough']) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius = 5,
        popup = label,
        color = color_rainbow[cluster-1],
        fill = True,
        fill_color = color_rainbow[cluster-1],
        fill_opacity = 0.7).add_to(toronto_map)

In [17]:
toronto_map #print map