## Segmenting and Clustering Neighbourhoods in Toronto

In [1]:
import pandas as pd
import numpy as np
import geocoder 

In [2]:
!pip install geocoder



In [3]:
link = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [4]:
# read the table in pandas 

tables = pd.read_html(link)

In [5]:
canada_post_codes = tables[0]

## Data cleaning

In [6]:
# we only want to process the cells that have an assigned borough
# we drop the cells with a borough that is 'Not assigned'

clean_boroughs = canada_post_codes.drop(canada_post_codes[canada_post_codes['Borough'] == 'Not assigned'].index)

In [7]:
# we reset the index 

clean_boroughs = clean_boroughs.reset_index().drop(columns=['index'])

In [8]:
clean_boroughs

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [9]:
# check that there are no duplicates in the 'Postal Code' column

clean_boroughs['Postal Code'].nunique() == len(clean_boroughs)

True

In [10]:
# check if there are boroughs with a neighbourhood that is 'Not assigned' 

print(clean_boroughs[clean_boroughs['Neighbourhood']=='Not assigned'].shape[0])

0


In [11]:
clean_boroughs.shape

(103, 3)

## Getting the latitude and the longitude coordinates for each neighbourhood

In [12]:
# read the csv file that has the geographical coordinates of each postal code 

coordinates = pd.read_csv('Coursera_Capstone/Geospatial_Coordinates.csv')  
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [13]:
# merge the coordinates and clean_borough dataframes 

df2 = clean_boroughs.merge(coordinates, how='left', on='Postal Code')

In [14]:
df2.shape

(103, 5)

In [15]:
df2.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


## Analysis of Toronto neighbourhoods 

In [16]:
# import libraries 

import folium

import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json 

!pip install geopy
from geopy.geocoders import Nominatim

import requests #
from pandas.io.json import json_normalize 



In [17]:
# add my Foursquare credentials 

CLIENT_ID = 'NSUU1BY3OA1Y1OB2K1AJFJGCGFRFIKK2R5F0NU0OKTFOU3BD' # my Foursquare ID
CLIENT_SECRET = 'SRVJJ3ZT2LRZ1Z1BSBWLPANH3UHIBSEG5CIH2D4AOQFUZET2' # my Foursquare Secret
VERSION = '20180604'
LIMIT = 100
RADIUS = 500
print('Your credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentials:
CLIENT_ID: NSUU1BY3OA1Y1OB2K1AJFJGCGFRFIKK2R5F0NU0OKTFOU3BD
CLIENT_SECRET:SRVJJ3ZT2LRZ1Z1BSBWLPANH3UHIBSEG5CIH2D4AOQFUZET2


In [18]:
# find the geographical coordinates for Toronto 

address = "Toronto, ON"

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geographical coordinates of Toronto city are {}, {}.'.format(latitude, longitude))

The geographical coordinates of Toronto city are 43.6534817, -79.3839347.


In [19]:
# create a map of Toronto 

map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
map_toronto

In [20]:
# create a map of Toronto with markers

for lat, lng, borough, neighbourhood in zip(
        df2['Latitude'], 
        df2['Longitude'], 
        df2['Borough'], 
        df2['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.6,
        parse_html=False).add_to(map_toronto)  

map_toronto

In [21]:
# explore boroughs that contain the word Toronto 

toronto_table= df2[df2['Borough'].str.contains("Toronto")].reset_index(drop=True)
toronto_table

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259


In [22]:
# update the map for four neighbourhoods 

for lat, lng, borough, neighbourhood in zip(
        toronto_table['Latitude'], 
        toronto_table['Longitude'], 
        toronto_table['Borough'], 
        toronto_table['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.6,
        parse_html=False).add_to(map_toronto)  

map_toronto

In [23]:
# find the first neighbourhood in Toronto

main_neighbourhood = toronto_table.loc[0, 'Neighbourhood']
main_neighbourhood

'Regent Park, Harbourfront'

In [24]:
# use Foursquare API to search the beaches and get JSON file

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    43.68, 
   -79.29, 
    RADIUS, 
    LIMIT)
results = requests.get(url).json()

In [25]:
# function that extracts venues

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [27]:
# normalise the file and create a new dataframe
venues = results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues)
nearby_venues


# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,The Beech Tree,Gastropub,43.680493,-79.288846
1,Beaches Bake Shop,Bakery,43.680363,-79.289692
2,The Feathers Pub,Pub,43.680501,-79.287522
3,Glen Manor Ravine,Trail,43.676821,-79.293942
4,The Real Jerk Beaches,Caribbean Restaurant,43.680781,-79.285727


In [28]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

18 venues were returned by Foursquare.


In [29]:
# create a map showing venues 

venues_map = folium.Map(location=[43.68,-79.29], zoom_start=15)

# add a blue circle marker to represent The Beaches

folium.features.CircleMarker(
    [43.68, -79.29],
    radius=10,
    color='blue',
    popup='The Beaches',
    fill = True,
    fill_color = 'blue',
    fill_opacity = 0.6
).add_to(venues_map)

# add all venues as red circle markers
for lat, lng, label in zip(nearby_venues.lat, nearby_venues.lng, nearby_venues.categories):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='red',
        popup=label,
        fill = True,
        fill_color='red',
        fill_opacity=0.6
    ).add_to(venues_map)
venues_map