In [None]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

### Load the wiki page and parse out the table with ```wikitable``` CSS class

In [None]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source, 'html.parser')
table = soup.find_all("table", class_="wikitable")[0]

### Parse the table rows and columns and convert them into an array

In [23]:
records = []
for row in table.find_all('tr'):
        cols = row.find_all('td')
        if len(cols) == 3:
            records.append((cols[0].text.strip(), cols[1].text.strip(), cols[2].text.strip()))
data_array = np.asarray(records)

289

### Build the data frame using the array with the specified column titles

In [24]:
df = pd.DataFrame(data_array, columns = ['PostalCode', 'Borough', 'Neighborhood'])

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### Drop the items with no ```Borough``` name

In [25]:
df = df[df.Borough != 'Not assigned']

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


### Replace _Not assigned_ values in  ```Neighborhood``` column with the value from  ```Borough```  column 

In [26]:
df['Neighborhood'] = np.where(df['Neighborhood'] == 'Not assigned', df['Borough'], df['Neighborhood'])

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


### Finally group the items by  ```PostalCode``` value and join  ```Neighborhood``` values

In [27]:
df = df.groupby(['PostalCode','Borough'])['Neighborhood'].apply(', '.join).reset_index()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


### Print the shape of data frame

In [29]:
df.shape

(103, 3)