# Segmenting and Clustering Neighborhoods in Toronto -Coursera - Week3 - Part 3

## Exploring and clustering the neighborhoods in Toronto

### Installing required libraries

In [None]:
!pip install geopy
!pip install folium

In [1]:
from geopy.geocoders import Nominatim
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
import pandas as pd
import numpy as np

### Upload the dataset

In [2]:
#upload the dataset where we save it as csv file in part 2
df_main=pd.read_csv('Clustering_part2.csv')
df_main.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494


### working on Toronto's Borough

In [3]:
df=df_main[df_main['Borough'].str.contains('Toronto')]
df_Toronto=df.reset_index(drop=True)
df_Toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
3,M4E,East Toronto,The Beaches,43.676357,-79.293031
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


In [4]:
df_Toronto.shape

(39, 5)

In [5]:
#no duplicate values
len(set(df['PostalCode']))

39

In [6]:
### Clustering Borough into Label and search the number of label
df_Toronto['Borough'].value_counts()

Downtown Toronto                                                17
Central Toronto                                                  9
West Toronto                                                     6
East Toronto                                                     4
Downtown TorontoStn A PO Boxes25 The Esplanade                   1
East TorontoBusiness reply mail Processing Centre969 Eastern     1
East YorkEast Toronto                                            1
Name: Borough, dtype: int64

### rename the duplicates

In [7]:
df_Toronto['Borough'].replace(to_replace=['East TorontoBusiness reply mail Processing Centre969 Eastern','East YorkEast Toronto','Downtown TorontoStn A PO Boxes25 The Esplanade'],value=['East Toronto','East Toronto','Downtown Toronto'],inplace=True)


In [8]:
### Clustering Borough into Label and search the number of label
df_Toronto['Borough'].value_counts()

Downtown Toronto    18
Central Toronto      9
East Toronto         6
West Toronto         6
Name: Borough, dtype: int64

##### We have 4 clusters

In [9]:

df_Toronto['Label']=df_Toronto['Borough'].replace(to_replace=['Downtown Toronto','Central Toronto','West Toronto','East Toronto'],value=[1,2,3,4],inplace=False)
df_Toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Label
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,1
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,1
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,1
3,M4E,East Toronto,The Beaches,43.676357,-79.293031,4
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,1


### Exploring coordinate of toronto

In [10]:
address = 'Toronto'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(f'The geograpical coordinate of Toronto are lat: {latitude} and long: {longitude}')

The geograpical coordinate of Toronto are lat: 43.6534817 and long: -79.3839347


In [11]:
#dtermine cluster number as label number
k_clusters=len(df_Toronto.Label.unique())

# Intiliase map
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(k_clusters)
ys = [i + x + (i*x)**2 for i in range(k_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers 
markers_colors = []
for lat, lon, cluster in zip(df_Toronto['Latitude'], df_Toronto['Longitude'], df_Toronto['Label']):
    label = folium.Popup(str(df_Toronto['Borough']) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_toronto)

#### Displying the 4 clusters of Toronto : 

In [12]:
map_toronto