# Import libraries

In [2]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

# Scrape the wikipedia page

In [3]:
wiki_page = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(wiki_page,'html.parser')

# Find the table on wikipedia page

In [4]:
table = soup.find('table')
colvalues = table.find_all('td')

length = len(colvalues)

PostalCode = []
Borough = []
Neighborhood = []

for i in range(0, length, 3):
    PostalCode.append(colvalues[i].text.strip())
    Borough.append(colvalues[i+1].text.strip())
    Neighborhood.append(colvalues[i+2].text.strip())

# Build dataframe from table

In [5]:
df_postalcodes = pd.DataFrame(data=[PostalCode, Borough, Neighborhood]).transpose()
df_postalcodes.columns = ['PostalCode', 'Borough', 'Neighborhood']

# Cleanse the data in dataframe by reassigning a "Not assigned" neighborhood to the corresponding borough

In [7]:
df_postalcodes.drop(df_postalcodes[df_postalcodes['Borough'] == 'Not assigned'].index, inplace=True)
df_postalcodes.loc[df_postalcodes.Neighborhood == 'Not assigned', "Neighborhood"] = df_postalcodes.Borough

# Group the data by Postal Code and Borough

In [8]:
df_postalcodes_grouped = df_postalcodes.groupby(['PostalCode', 'Borough'])['Neighborhood'].apply(', '.join).reset_index()
df_postalcodes_grouped.columns = ['PostalCode', 'Borough', 'Neighborhood']

# Print number of rows in dataframe

In [10]:
df_postalcodes_grouped.shape

(103, 3)