### Segmenting and Clustering Neighborhoods in Toronto Part 2 
Using the data from Part1 (saved to a csv file) for clustering and visualization on map
Toni Krowisz

In [39]:
import folium # plotting library
import pandas as pd 

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

from geopy.geocoders import Nominatim
import numpy as np

In [2]:
toronto_df = pd.read_csv('toronto_neighborhoods')
toronto_df

Unnamed: 0,Postal_Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.752935,-79.335641
1,M4A,North York,Victoria Village,43.728102,-79.311890
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.650964,-79.353041
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.723265,-79.451211
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.661790,-79.389390
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653340,-79.509766
99,M4Y,Downtown Toronto,Church and Wellesley,43.666659,-79.381472
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.648700,-79.385450
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.632798,-79.493017


In [28]:
toronto_df.index

RangeIndex(start=0, stop=103, step=1)

##### Check Boroughs with the most neighborhoods (Postal codes)

In [5]:
toronto_groups= toronto_df.groupby(['Borough']).count()
toronto_groups.sort_values(by=['Postal_Code'], ascending=False)

Unnamed: 0_level_0,Postal_Code,Neighborhood,Latitude,Longitude
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
North York,24,24,24,24
Downtown Toronto,19,19,19,19
Scarborough,17,17,17,17
Etobicoke,12,12,12,12
Central Toronto,9,9,9,9
West Toronto,6,6,6,6
East Toronto,5,5,5,5
East York,5,5,5,5
York,5,5,5,5
Mississauga,1,1,1,1


In [9]:
toronto_groups.shape

(10, 4)

In [25]:
toronto_cluster_prep = toronto_df.drop(['Postal_Code','Borough','Neighborhood'], axis = 1)
toronto_cluster_prep

Unnamed: 0,Latitude,Longitude
0,43.752935,-79.335641
1,43.728102,-79.311890
2,43.650964,-79.353041
3,43.723265,-79.451211
4,43.661790,-79.389390
...,...,...
98,43.653340,-79.509766
99,43.666659,-79.381472
100,43.648700,-79.385450
101,43.632798,-79.493017


##### Get coordinates for Toronto, ON for the map

In [33]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
t_latitude = location.latitude
t_longitude = location.longitude
print('The geograpical coordinates of Toronto, Ontario are {}, {}.'.format(t_latitude, t_longitude))

The geograpical coordinates of Toronto, Ontario are 43.6534817, -79.3839347.


#### Create Map with current neighborhoods with Borough as label

In [35]:
# location info for Toronto
tor_lat = t_latitude
tor_long = t_longitude

#initialize map 
tor_map = folium.Map(location=[tor_lat, tor_long], zoom_start=10)

# add the Boroughs as blue circle markers
for lat, lng, label in zip(toronto_df.Latitude, toronto_df.Longitude, toronto_df.Borough):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(tor_map)

#test - view map 
tor_map

##### Cluster the neighborhoods. Setting cluster to 10, since there are 10 Boroughs 


In [36]:
# set number of clusters
kclusters = 10


# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_cluster_prep)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([8, 5, 0, 7, 0, 1, 2, 8, 5, 0])

In [37]:
# add clustering labels
toronto_cluster_labels = toronto_df
toronto_cluster_labels.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_cluster_labels.head() # check the last columns!

Unnamed: 0,Cluster Labels,Postal_Code,Borough,Neighborhood,Latitude,Longitude
0,8,M3A,North York,Parkwoods,43.752935,-79.335641
1,5,M4A,North York,Victoria Village,43.728102,-79.31189
2,0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.650964,-79.353041
3,7,M6A,North York,"Lawrence Manor, Lawrence Heights",43.723265,-79.451211
4,0,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66179,-79.38939


#### Create the clustered map



In [46]:
# create map
toronto_map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_cluster_labels['Latitude'], toronto_cluster_labels['Longitude'], toronto_cluster_labels['Neighborhood'] + ' ' + toronto_cluster_labels['Borough'], toronto_cluster_labels['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(toronto_map_clusters)
       
toronto_map_clusters