In [1]:
import pandas as pd
import numpy as np
import requests

# Read the table from the Wikipedia site

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
data = pd.read_html(url, flavor = 'bs4')

# Define a function to arrange the dataset

In [3]:
def str_extract(x):
    if 'Not assigned' in x:
        postal_code = str(x)[:3]
        borough = neighbor = np.nan
    else:
        postal_code = str(x)[:3]
        bor_neig = str(x)[3:].split('(')
        borough = bor_neig[0]
        neighbor = bor_neig[1].replace(')', '').replace(' /', ',')
        if neighbor == '': neighbor = borough
        
    return [postal_code, borough, neighbor]

# Filter and create a structured dataset

In [4]:
data_stacked = data[0].applymap(str_extract).stack()
new_list = []
for x in data_stacked:
    new_list.append(x)
dataset = pd.DataFrame(new_list, columns = ['PostalCode', 'Borough', 'Neighborhood'])

In [5]:
dataset.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,,
1,M2A,,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Queen's Park,Ontario Provincial Government
7,M8A,,
8,M9A,Etobicoke,Islington Avenue
9,M1B,Scarborough,"Malvern, Rouge"


In [6]:
dataset.shape

(180, 3)

# Read the GeoSpatial Dataset

In [7]:
loc_url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs_v1/Geospatial_Coordinates.csv'
location = pd.read_csv(loc_url, names = ['PostalCode', 'Latitude', 'Longitude'])

# Execute a inner join in both datasets

In [8]:
teste = pd.merge(dataset, location, on = 'PostalCode')
teste.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7532586,-79.3296565
1,M4A,North York,Victoria Village,43.7258823,-79.3155716
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6542599,-79.3606359
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.4647633
4,M7A,Queen's Park,Ontario Provincial Government,43.6623015,-79.3894938
5,M9A,Etobicoke,Islington Avenue,43.6678556,-79.5322424
6,M1B,Scarborough,"Malvern, Rouge",43.8066863,-79.1943534
7,M3B,North York,Don MillsNorth,43.7459058,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.7063972,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.6571618,-79.3789371
