# Segmenting and Clustering Neighborhoods in Toronto

In [1]:
import pandas as pd
import numpy as np

import folium

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

# read the table and set 'Not assigned' to NaN
df = pd.read_html(url, na_values='Not assigned')[0]

In [3]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,,
1,M2A,,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### Drop rows including NaN in Borough

In [4]:
df = df.loc[df.Borough.notna()]
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### Assign Borough value to NaN Neighbourhood

In [5]:
df.loc[df.Neighbourhood.isna()]

Unnamed: 0,Postal Code,Borough,Neighbourhood


There is no NaN data in Neighbourhood

In [6]:
df.reset_index(inplace=True, drop=True)
df.shape

(103, 3)

## Add Latitude an Longitude to the Dataframe

In [7]:
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
from functools import partial

geolocator = Nominatim(user_agent="toronto_segmenting_clustering")
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=0.001)

### Make a copy of the Dataframe

In [8]:
df_ll = df.copy() # copy DataFrame df to dfll

### Apply geolocator to column Borough to get location

In [9]:
df_ll['location'] = df_ll['Borough'].apply(partial(geocode, country_codes='ca', addressdetails=False))

### Get Latitude, Longitude and Altitude of the location

In [10]:
df_ll['point'] = df_ll['location'].apply(lambda loc: list(loc.point) if loc else None)

df_ll.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,location,point
0,M3A,North York,Parkwoods,"(North York, Toronto, Golden Horseshoe, Ontari...","[43.7543263, -79.44911696639593, 0.0]"
1,M4A,North York,Victoria Village,"(North York, Toronto, Golden Horseshoe, Ontari...","[43.7543263, -79.44911696639593, 0.0]"
2,M5A,Downtown Toronto,"Regent Park, Harbourfront","(CF Toronto Eaton Centre, 220, Yonge Street, D...","[43.6541737, -79.38081164513409, 0.0]"
3,M6A,North York,"Lawrence Manor, Lawrence Heights","(North York, Toronto, Golden Horseshoe, Ontari...","[43.7543263, -79.44911696639593, 0.0]"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government","(CF Toronto Eaton Centre, 220, Yonge Street, D...","[43.6541737, -79.38081164513409, 0.0]"


### Separate columns Latitude, Longitude and Altitude out of column point

In [11]:
df_ll[['Latitude', 'Longitude', 'Altitude']] = pd.DataFrame(df_ll.point.to_list(), index=df_ll.index)

df_ll.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,location,point,Latitude,Longitude,Altitude
0,M3A,North York,Parkwoods,"(North York, Toronto, Golden Horseshoe, Ontari...","[43.7543263, -79.44911696639593, 0.0]",43.754326,-79.449117,0.0
1,M4A,North York,Victoria Village,"(North York, Toronto, Golden Horseshoe, Ontari...","[43.7543263, -79.44911696639593, 0.0]",43.754326,-79.449117,0.0
2,M5A,Downtown Toronto,"Regent Park, Harbourfront","(CF Toronto Eaton Centre, 220, Yonge Street, D...","[43.6541737, -79.38081164513409, 0.0]",43.654174,-79.380812,0.0
3,M6A,North York,"Lawrence Manor, Lawrence Heights","(North York, Toronto, Golden Horseshoe, Ontari...","[43.7543263, -79.44911696639593, 0.0]",43.754326,-79.449117,0.0
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government","(CF Toronto Eaton Centre, 220, Yonge Street, D...","[43.6541737, -79.38081164513409, 0.0]",43.654174,-79.380812,0.0


### Drop columns location, point and Altitude

In [12]:
df_ll = df_ll.drop(columns=['location', 'point', 'Altitude'])

df_ll.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.754326,-79.449117
1,M4A,North York,Victoria Village,43.754326,-79.449117
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654174,-79.380812
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.754326,-79.449117
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.654174,-79.380812


## Explore and cluster the neighborhoods in Toronto
### Show Boroughs containing Toronto

In [13]:
df_toronto = df_ll.loc[df_ll.Borough.str.contains('Toronto')].reset_index(drop=True).copy()

df_toronto.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654174,-79.380812
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.654174,-79.380812
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.654174,-79.380812
3,M5C,Downtown Toronto,St. James Town,43.654174,-79.380812
4,M4E,East Toronto,The Beaches,43.626122,-79.395035


## Get coordinates of Toronto

In [14]:
location = geolocator.geocode('Toronto')
latitude = location.latitude
longitude = location.longitude

print('The coordinates of Toronto are: {}, {}'.format(latitude, longitude))

The coordinates of Toronto are: 43.6534817, -79.3839347


### Create Map of Toronto

The library geopy.geocoders is not sensitive enough. Have a look at the map.

In [15]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_ll['Latitude'], df_ll['Longitude'], df_ll['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### So I decided to use the downloaded Geospitial_Coorinates.csv

In [16]:
df_gc = pd.read_csv('Geospatial_Coordinates.csv')

In [17]:
df_gc.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [18]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_gc['Latitude'], df_gc['Longitude'], df_gc['Postal Code']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto