### Import Necessary Libraries & Packages

In [1]:
import numpy as np # For vector data
import pandas as pd # For data analsysis
import requests # For web requests
from bs4 import BeautifulSoup # For Web-Scraping

### Web-Scraping to Fetch the Desired Data

In [2]:
# Fetch the webpage having data
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = requests.get(url)
soup = BeautifulSoup(page.text, 'html.parser')

# Find the table headers
headers = []
for thead in soup.find('tr').find_all('th'):
    headers.append(thead.text.strip())

# Scrape the Toronto FSA Data
fsa_data = []
for row in soup.find('tbody').find_all('tr'):
    fsa = []
    for td in row.find_all('td'):
        fsa.append(td.text.strip())
    fsa_data.append(fsa)

### Transform the fetched data into Pandas DataFrame with 3 columns

In [3]:
toronto_df = pd.DataFrame(fsa_data[1:], columns=headers) # Excluded the blank row 1
toronto_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### Drop the rows which do not have any Borough value

In [4]:
toronto_df = toronto_df[toronto_df.Borough != 'Not assigned'].reset_index(drop=True)
toronto_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


### Concatenate the Neighbourhoods for each Postcode

In [5]:
df_aggr = toronto_df.groupby(['Postcode', 'Borough'], sort=False)['Neighbourhood'].apply(','.join).reset_index()
df_aggr.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Not assigned


### Replace 'Not assigned' Neighbourhood with the corrsponding Borough

In [6]:
df_aggr.loc[df_aggr.Neighbourhood == 'Not assigned', 'Neighbourhood']\
= df_aggr[df_aggr['Neighbourhood'] == 'Not assigned'].Borough
df_aggr.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Queen's Park


### Print the Shape of the DataFrame

In [7]:
df_aggr.shape

(103, 3)