In [1]:
# import libraries 
import requests
import pandas as pd
import numpy as np

In [2]:
# URL for the wikipedia page 
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
        
# get the page HTML and get the list of the row for the table 
canada =  requests.get(url).text

from bs4 import BeautifulSoup
soup = BeautifulSoup(canada,'lxml')

postal_codes_table = soup.find('table',{'class':'wikitable sortable'})
row_list = postal_codes_table.find_all('tr')

# First row in the table is the header, so extract that separately
header_row = row_list.pop(0)
header_th = header_row.find_all('th')
header = [el.text for el in header_th]

table_dict = {x:[] for x in header}

# Now for the rest of the table...
for row in row_list:
    row_td = row.find_all('td')
    for el,td in zip(header,row_td):
        table_dict[el].append(td.text)

In [3]:
# convert the table into a dataframe 
# The dataframe will consist of three columns: PostalCode, Borough, and Neighborhood
table = pd.DataFrame(table_dict)
table = table.replace('\n','', regex=True)
table.columns = ['Postcode','Borough','Neighbourhood']
table.columns

Index(['Postcode', 'Borough', 'Neighbourhood'], dtype='object')

In [4]:
# Drop Not assigned Borough 
# Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
table = table[table.Borough != 'Not assigned'] 
table

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [5]:
table['Neighbourhood'] = np.where(table['Neighbourhood'] == 'Not assigned', table.Borough.value)


AttributeError: 'Series' object has no attribute 'value'

In [None]:
# Combine neighbourhood with the same postal code and seperate using comma(,) 
table  = table.groupby(['Postcode','Borough'], sort = False).agg(lambda x: ', '.join(x))
table = table.sort_values(by=['Postcode', 'Borough'])
table

In [None]:
table.shape 