Neighborhood Clustering in Toronto Project

In [113]:
# Import Libraries
import numpy as np
import pandas as pd
import folium
from geopy.geocoders import Nominatim




In [114]:
# View max columns and rows
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [115]:
# Download data
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df_toronto = pd.read_html(url, header = 0)
print(df_toronto)

[    Postal Code           Borough  \
0           M1A      Not assigned   
1           M2A      Not assigned   
2           M3A        North York   
3           M4A        North York   
4           M5A  Downtown Toronto   
5           M6A        North York   
6           M7A  Downtown Toronto   
7           M8A      Not assigned   
8           M9A         Etobicoke   
9           M1B       Scarborough   
10          M2B      Not assigned   
11          M3B        North York   
12          M4B         East York   
13          M5B  Downtown Toronto   
14          M6B        North York   
15          M7B      Not assigned   
16          M8B      Not assigned   
17          M9B         Etobicoke   
18          M1C       Scarborough   
19          M2C      Not assigned   
20          M3C        North York   
21          M4C         East York   
22          M5C  Downtown Toronto   
23          M6C              York   
24          M7C      Not assigned   
25          M8C      Not assigned   


In [116]:
# Select the first table
df_table = df_toronto[0]
print(df_table.head())
print(df_table.shape)

  Postal Code           Borough              Neighbourhood
0         M1A      Not assigned               Not assigned
1         M2A      Not assigned               Not assigned
2         M3A        North York                  Parkwoods
3         M4A        North York           Victoria Village
4         M5A  Downtown Toronto  Regent Park, Harbourfront
(180, 3)


In [117]:
# Filter all rows and remove all Boroughs with 'Not assigned' designations
df_filtered = df_table[df_table['Borough'] != 'Not assigned']
print(df_filtered.head())
print(df_filtered.shape)

  Postal Code           Borough                                Neighbourhood
2         M3A        North York                                    Parkwoods
3         M4A        North York                             Victoria Village
4         M5A  Downtown Toronto                    Regent Park, Harbourfront
5         M6A        North York             Lawrence Manor, Lawrence Heights
6         M7A  Downtown Toronto  Queen's Park, Ontario Provincial Government
(103, 3)


In [118]:
# Reset index
df_filtered.reset_index(inplace = True)
df_filtered = df_filtered.drop('index', axis = 1)
print(df_filtered)


    Postal Code           Borough  \
0           M3A        North York   
1           M4A        North York   
2           M5A  Downtown Toronto   
3           M6A        North York   
4           M7A  Downtown Toronto   
5           M9A         Etobicoke   
6           M1B       Scarborough   
7           M3B        North York   
8           M4B         East York   
9           M5B  Downtown Toronto   
10          M6B        North York   
11          M9B         Etobicoke   
12          M1C       Scarborough   
13          M3C        North York   
14          M4C         East York   
15          M5C  Downtown Toronto   
16          M6C              York   
17          M9C         Etobicoke   
18          M1E       Scarborough   
19          M4E      East Toronto   
20          M5E  Downtown Toronto   
21          M6E              York   
22          M1G       Scarborough   
23          M4G         East York   
24          M5G  Downtown Toronto   
25          M6G  Downtown Toronto   
2

In [119]:
# Print number of rows of dataframe
print(df_filtered.shape)

(103, 3)


In [120]:
# Download geodata
df_geodata = pd.read_csv('http://cocl.us/Geospatial_data')
print(df_geodata.head())
print(df_geodata.shape)

  Postal Code   Latitude  Longitude
0         M1B  43.806686 -79.194353
1         M1C  43.784535 -79.160497
2         M1E  43.763573 -79.188711
3         M1G  43.770992 -79.216917
4         M1H  43.773136 -79.239476
(103, 3)


In [121]:
# Add latitude and longitude data
df_final = df_filtered.merge(df_geodata, on = 'Postal Code', how = 'inner')
print(df_final)

    Postal Code           Borough  \
0           M3A        North York   
1           M4A        North York   
2           M5A  Downtown Toronto   
3           M6A        North York   
4           M7A  Downtown Toronto   
5           M9A         Etobicoke   
6           M1B       Scarborough   
7           M3B        North York   
8           M4B         East York   
9           M5B  Downtown Toronto   
10          M6B        North York   
11          M9B         Etobicoke   
12          M1C       Scarborough   
13          M3C        North York   
14          M4C         East York   
15          M5C  Downtown Toronto   
16          M6C              York   
17          M9C         Etobicoke   
18          M1E       Scarborough   
19          M4E      East Toronto   
20          M5E  Downtown Toronto   
21          M6E              York   
22          M1G       Scarborough   
23          M4G         East York   
24          M5G  Downtown Toronto   
25          M6G  Downtown Toronto   
2

In [122]:
# Convert the address to its latitude and longitude
address = '27 Kings College Cir, Toronto, Canada'
geolocator = Nominatim(user_agent = 'foursquare_agent')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude


In [123]:
# Create map
toronto_map = folium.Map(location = [latitude, longitude], zoom_start = 11)

In [124]:
# Add the Canadian neighborhoods as blue circle markers
for Latitude, Longitude in zip(df_final.Latitude, df_final.Longitude):
	folium.CircleMarker(
		[Latitude, Longitude],
		radius = 2,
		color = 'blue',
		fill = True,
		fill_color = 'blue',
		fill_opacity = 0.6
	).add_to(toronto_map)


In [125]:
# Display map
toronto_map