Part 3 Exploring and Clustering Neighborhoods in Toronto

In [1]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
import pandas as pd
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim

Collecting package metadata (current_repodata.json): done
Solving environment: - 
The environment is inconsistent, please check the package plan carefully
The following packages are causing the inconsistency:

  - defaults/noarch::ibm-wsrt-py37main-main==custom=2020
  - defaults/noarch::ibm-wsrt-py37main-keep==0.0.0=2020
  - conda-forge/linux-64::pytorch==1.8.0=cpu_py37hafa7651_0
done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: - 
The environment is inconsistent, please check the package plan carefully
The following packages are causing the inconsistency:

  - defaults/noarch::ibm-wsrt-py37main-main==custom=2020
  - defaults/noarch::ibm-wsrt-py37main-keep==0.0.0=2020
  - conda-forge/linux-64::pytorch==1.8.0=cpu_py37hafa7651_0
done

# All requested packages already installed.



In [2]:
df_toronto1=pd.read_csv('Project Part2.csv')

In [3]:
#show relevant data as stated in instructions
df_toronto2=df_toronto1[df_toronto1['Borough'].str.contains('Toronto')]
df_toronto3=df_toronto2.reset_index(drop=True)
df_toronto3

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
3,M4E,East Toronto,The Beaches,43.676357,-79.293031
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
5,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
6,M6G,Downtown Toronto,Christie,43.669542,-79.422564
7,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
8,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259
9,M4J,East York/East Toronto,The Danforth East,43.685347,-79.338106


In [4]:
df_toronto3.shape

(39, 5)

In [5]:
df_toronto3['Borough'].value_counts()

Downtown Toronto          17
Central Toronto            9
West Toronto               6
East Toronto               4
East Toronto Business      1
East York/East Toronto     1
Downtown Toronto Stn A     1
Name: Borough, dtype: int64

In [13]:
# Create a new column as Label with integer value
df_toronto3['Label']=df_toronto3['Borough'].replace(to_replace=['Downtown Toronto','Central Toronto','West Toronto','East Toronto', 'East Toronto Business', 'East York/East Toronto', 'Downtown Toronto Stn A'],value=[1,2,3,4,5,6,7],inplace=False)
df_toronto3.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Label
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,1
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,1
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,1
3,M4E,East Toronto,The Beaches,43.676357,-79.293031,4
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,1


In [14]:
#use geolocator to find toronto coordinates
address = 'Toronto, Ontario'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(f'The geograpical coordinates of Toronto are {latitude}, {longitude}.')

The geograpical coordinates of Toronto are 43.6534817, -79.3839347.


In [15]:
#create the map
map_toronto = folium.Map(location=[43.651070,-79.347015],zoom_start=10)

#create the markers on the map with 7 clusters
for lat,lng,borough,neighborhood in zip(df_toronto3['Latitude'],df_toronto3['Longitude'],df_toronto3['Borough'],df_toronto3['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat,lng],
    radius=5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(map_toronto)
map_toronto

In [16]:
#kmeans to cluster neighborhoods 
k=5
toronto_clustering = df_toronto3.drop(['PostalCode','Borough','Neighborhood'],1)
df_toronto3.reset_index()
kmeans = KMeans(n_clusters = k,random_state=0).fit(toronto_clustering)
kmeans.labels_
df_toronto3.insert(0,'Cluster Labels', kmeans.labels_)
df_toronto3

Unnamed: 0,Cluster Labels,PostalCode,Borough,Neighborhood,Latitude,Longitude,Label
0,0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,1
1,0,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,1
2,0,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,1
3,1,M4E,East Toronto,The Beaches,43.676357,-79.293031,4
4,0,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,1
5,0,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,1
6,0,M6G,Downtown Toronto,Christie,43.669542,-79.422564,1
7,0,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568,1
8,4,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259,3
9,2,M4J,East York/East Toronto,The Danforth East,43.685347,-79.338106,6


In [18]:
import numpy as np 

In [21]:
# create map
map_clusters = folium.Map(location=[43.651070,-79.347015],zoom_start=10)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, neighborhood, cluster in zip(df_toronto3['Latitude'], df_toronto3['Longitude'], df_toronto3['Neighborhood'], df_toronto3['Cluster Labels']):
    label = folium.Popup(' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters