In [1]:
import pandas as pd
import numpy as np
import requests
import geocoder

# Part 1

Load data from wikipedia page using `read_html` method of pandas.

In [2]:
df = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")[0]
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


Remove all unnassigned boroughs.

In [3]:
df = df.loc[~df['Borough'].str.startswith('Not ')]
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


Confirm there are no boroughs with `Not assigned` as values.

In [4]:
df.loc[df['Borough'].str.startswith('Not ')]

Unnamed: 0,Postal Code,Borough,Neighbourhood


Set all neighbourhood with values `Not assigned` as the value of its borough.

In [5]:
df.loc[df['Neighbourhood'].str.startswith('Not ')]
# no rows matched, so skip this process.

Unnamed: 0,Postal Code,Borough,Neighbourhood


Print dataframe's shape

In [6]:
df.shape

(103, 3)

# Part 2

In [7]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


Load data from CSV do to failure in getting latitude and longitude from geocoder.

In [8]:
# df['Latitude'] = np.nan
# df['Longitude'] = np.nan

# for postal_code in df['Postal Code']:
#     # initialize your variable to None
#     lat_lng_coords = None

#     # loop until you get the coordinates
#     print(f'Extracting {postal_code}')
#     while(lat_lng_coords is None):
#         g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
#         lat_lng_coords = g.latlng
        
#     df.loc[df['Postal Code'] == postal_code, ['Latitude', 'Longitude']] = lat_lng_coords[0], lat_lng_coords[1]

In [9]:
df = df.merge(pd.read_csv('Geospatial_Coordinates.csv'), on='Postal Code')

In [10]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


# Part 3

In [12]:
import folium

Visualise datapoints

In [13]:
m = folium.Map(location=[43.6532, -79.3832], zoom_start=10, height=500, width=1000)

for index, row in df[['Latitude', 'Longitude']].iterrows():
#     print(row['Latitude'], row['Longitude'])
    folium.CircleMarker([row['Latitude'], row['Longitude']], radius=5, ).add_to(m)

m