Importing useful libraries for this task:

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

Using requests to parse the link content to a variable and using Beautiful Soup to read said content:

In [2]:
html_data = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').content

In [3]:
soup = BeautifulSoup(html_data,'html.parser')

Doing a for loop to search the page for the required content and adding them to a list, then creating a dataframe:

In [4]:
table_contents=[]
table=soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

In [5]:
df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

Reading the geocode CSV, renaming the Postal Code Column and merging with the Neighborhoods DF:

In [6]:
geocode = pd.read_csv('https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs_v1/Geospatial_Coordinates.csv')
geocode.rename(columns={"Postal Code": "PostalCode"}, inplace=True)


In [7]:
df_geocode = pd.merge(df, geocode, how ='right', on ='PostalCode')
df_geocode

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


In [8]:
import numpy as np
import random
#!pip install geopy
from geopy.geocoders import Nominatim
from IPython.display import Image 
from IPython.core.display import HTML 
from pandas.io.json import json_normalize
#!pip install folium==0.5.0
import folium

As suggested, let's work with boroughs that contain the word Toronto:

In [10]:
df_toronto = df_geocode[df_geocode['Borough'].str.contains('Toronto')]
df_toronto

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
40,M4J,East York/East Toronto,The Danforth East,43.685347,-79.338106
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
45,M4P,Central Toronto,Davisville North,43.712751,-79.390197
46,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
47,M4S,Central Toronto,Davisville,43.704324,-79.38879
48,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316


Searching for Toronto's coordinates using google and plotting the map using Folium: 

lat: 43.651070

lon: -79.347015


In [20]:
lat = 43.651070
lon = -79.347015

map_toronto = folium.Map(location=[lat, lon],zoom_start=10)

folium.CircleMarker(
                    [lat, lon],
                    radius=10,
                    popup='Toronto',
                    fill=True,
                    color='red',
                    fill_color='red',
                    fill_opacity=0.6).add_to(map_toronto)
map_toronto

Adding the neighborhoods to the plotted map:

In [27]:
for lat, lon, borough, neighborhood in zip(df_toronto['Latitude'],df_toronto['Longitude'],df_toronto['Borough'],df_toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        fill=True,
        color='blue',
        fill_color='yellow',
        fill_opacity=0.5
        ).add_to(map_toronto)
map_toronto

Using K-means to create neighborhood clusters based on latitude and longitude:
k= 5 was chosen as there are 5 different boroughs.

In [29]:
from sklearn.cluster import KMeans

In [33]:
k=5
df_toronto2 = df_toronto
borough_clusters = df_toronto2.drop(['PostalCode','Borough','Neighborhood'],1)
kmeans = KMeans(n_clusters = k,random_state=0).fit(borough_clusters)
kmeans.labels_
df_toronto2.insert(0, 'Cluster Labels', kmeans.labels_)
df_toronto2.head()

Unnamed: 0,Cluster Labels,PostalCode,Borough,Neighborhood,Latitude,Longitude
37,4,M4E,East Toronto,The Beaches,43.676357,-79.293031
40,4,M4J,East York/East Toronto,The Danforth East,43.685347,-79.338106
41,4,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
42,4,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
43,4,M4M,East Toronto,Studio District,43.659526,-79.340923


Creating a list of Colors and generating a new map identifying each cluster with a unique color:

In [47]:
colors={0:'red',1:'blue',2:'green',3:'yellow',4:'purple'}


{0: 'red', 1: 'blue', 2: 'green', 3: 'yellow', 4: 'purple'}

In [51]:
map_toronto_cluster = folium.Map(location=[lat, lon],zoom_start=10)


for lat, lon, neighborhood, clusters in zip(df_toronto2['Latitude'],df_toronto2['Longitude'],df_toronto2['Neighborhood'],df_toronto2['Cluster Labels']):
    label = '{}, {}, Cluster {}'.format(neighborhood, borough, clusters)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        fill=True,
        color=colors[clusters],
        fill_color= colors[clusters],
        fill_opacity=0.5
        ).add_to(map_toronto_cluster)

map_toronto_cluster