In [1]:
# import all the libraries
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [2]:
# get source code for the wiki page
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text

In [3]:
# creating a BeautifulSoup object for the source page
soup = BeautifulSoup(
    source,
    "lxml"
)

In [4]:
# pulling the table content
table = soup.find(
    "body"
).find(
    "div",
    class_="mw-body"
).find(
    "div",
    class_="mw-body-content",
    id="bodyContent"
).table.tbody

In [5]:
# append all values of the table into a list
all_postcodes = []

for i in table.find_all("tr"):
    k = []
    for j in i.find_all("td"):
        k.append(j.text)
    all_postcodes.append(k)

In [6]:
labels = ["Postcode","Borough","Neighbourhood"]

In [7]:
# creating dataframe
df = pd.DataFrame.from_records(all_postcodes,columns=labels).dropna()

In [8]:
# remove records where Borough is not assigned
df = df[(df["Borough"]!="Not assigned")]

In [9]:
# removing /n value from the Neighbourhood column
df.Neighbourhood = df.Neighbourhood.str.slice(0,-1)

In [10]:
# assign borough to neighbourhood where neighbourhood is not assigned
df.Neighbourhood = np.where(
    df["Neighbourhood"]=="Not assigned", 
    df["Borough"], 
    df["Neighbourhood"]
)

In [11]:
# dropping any duplicates
df = df.drop_duplicates()

In [12]:
# grouping by postcode and borough and concatenating the neighbourhood value
grouped_df = df.groupby(
    ["Postcode","Borough"]
)["Neighbourhood"].apply(lambda x: "%s" % ', '.join(x)).reset_index()

In [13]:
# shape of grouped neighbourhood dataframe
grouped_df.shape

(103, 3)