### Import the necessary libraries

In [173]:
import requests 
import pandas as pd 
import numpy as np
from bs4 import BeautifulSoup

### Scrap the URL and get the table data 

In [174]:
URL = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
res = requests.get(URL).text
soup = BeautifulSoup(res,'lxml')
df_list = []
for items in soup.find('table', class_= 'wikitable sortable').find_all('tr')[1::]:
    data = items.find_all(['td'])
    try:
        postcode = data[0].get_text()       
        borough = data[1].get_text()
        neighbourhood = data[2].get_text().rstrip('\n')
    except IndexError:pass
    df_list.append((postcode,borough,neighbourhood))

print(df_list[0:3])

[('M1A', 'Not assigned', 'Not assigned'), ('M2A', 'Not assigned', 'Not assigned'), ('M3A', 'North York', 'Parkwoods')]


### Convert the above list data into PANDAS DataFrame

In [175]:
df_data = pd.DataFrame(df_list, columns=['Postalcode', 'Borough', 'Neighbourhood'])

In [176]:
df_data.head()

Unnamed: 0,Postalcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### Check the shape of the DataFrame

In [177]:
df_data.shape

(289, 3)

### Get the count of Column Borough = Not assigned

In [178]:
df_data['Borough'].eq('Not assigned').sum()

77

### Get the count of Column Neighbourhood = Not assigned

In [179]:
df_data['Neighbourhood'].eq('Not assigned').sum()

78

### Remove the rows where Borough = Not assigned 

In [180]:
df_data = df_data[df_data.Borough != 'Not assigned']

### Shape of the DataFrame after removing rows, where Borough column equal to Not assigned values

In [181]:
df_data.shape

(212, 3)

### Get the rows where Neighbourhood is equal to Not assigned values

In [182]:
df_data['Neighbourhood'].eq('Not assigned').sum()

1

In [183]:
df_data.loc[df_data['Neighbourhood'] == 'Not assigned']

Unnamed: 0,Postalcode,Borough,Neighbourhood
8,M7A,Queen's Park,Not assigned


### Copy the value of Borough column to the Neighbourhood column, if Neighbourhood == Not assigned 

In [184]:
df_data['Neighbourhood'] = np.where(df_data['Neighbourhood'] == 'Not assigned', df_data['Borough'], df_data['Borough'])

In [185]:
df_data['Neighbourhood'].eq('Not assigned').sum()

0

### Merge the Neighbourhoods for the same Postal Code 

In [186]:
df_data = df_data.groupby(['Postalcode', 'Borough'])['Neighbourhood'].apply(', '.join).reset_index()

## Final Shape of the Notebook

In [188]:
df_data.shape

(103, 3)