In [169]:
import requests
import pandas as pd

### Using pandas to obtain the table in wikipedia article

In [170]:
website_url = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
df = pd.read_html(website_url)[0]

### The table is saved in " df "

In [171]:
df.head()

Unnamed: 0,0,1,2
0,Postcode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village


### Renaming the dataframe column names

In [172]:
df.columns = df.iloc[0]

In [173]:
df.drop(index=0, inplace=True)

In [174]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront


### Dropping all the rows that do not have the Borough value assigned

In [175]:
indexList = df[(df["Borough"] == "Not assigned")].index

In [176]:
df.drop(index=indexList, inplace=True)

### Renaming rows with not assigned Neigbourhood values to their Borough names

In [177]:
df[df["Neighbourhood"] == "Not assigned"]

Unnamed: 0,Postcode,Borough,Neighbourhood
10,M9A,Queen's Park,Not assigned


In [178]:
df.loc[10]["Neighbourhood"] = df.loc[10]["Borough"]

In [179]:
df.loc[10]

0
Postcode                  M9A
Borough          Queen's Park
Neighbourhood    Queen's Park
Name: 10, dtype: object

In [180]:
df.shape

(210, 3)

In [181]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor


### Combining rows with the same Postcode values using group by 

In [183]:
df = df.groupby(["Postcode", "Borough"]).agg(','.join)
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighbourhood
Postcode,Borough,Unnamed: 2_level_1
M1B,Scarborough,"Rouge,Malvern"
M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
M1E,Scarborough,"Guildwood,Morningside,West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


### Converting multi index dataframe to single index 

In [192]:
df = df.reset_index(level=[0,1])
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


### Checking the shape of the dataframe

In [193]:
df.shape

(103, 3)