In [1]:
# Importing pandas and numpy
import pandas as pd
import numpy as np

In [58]:
# Importing Beautiful Soup
from bs4 import BeautifulSoup

In [59]:
# Importing requests
import requests

In [62]:
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
soup = BeautifulSoup(source, 'xml')
table = soup.find('table')

In [63]:
# Creating the dataframe
df = pd.DataFrame(columns = ['Postal Code', 'Borough', 'Neighborhood'])

In [65]:
# Putting the data into the data frame
for tr_cell in table.find_all('tr'):
    row_data = []
    for td_cell in tr_cell.find_all('td'):
        row_data.append(td_cell.text.strip())
    if len(row_data) == 3:
        df.loc[len(df)] = row_data

In [67]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [71]:
# Removing data with Borough equal to "Not assigned"
df = df[df['Borough'] != "Not assigned"]
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [75]:
# Grouping based on postal code
df2 = df.groupby('Postal Code')['Neighborhood'].apply(lambda x: "%s" % ', '.join(x))
df2 = df2.reset_index(drop = False)
df2.rename(columns={'Neighborhood' : 'JoinedNeighborhood'}, inplace = True)
df2.head()

Unnamed: 0,Postal Code,JoinedNeighborhood
0,M1B,"Malvern, Rouge"
1,M1C,"Rouge Hill, Port Union, Highland Creek"
2,M1E,"Guildwood, Morningside, West Hill"
3,M1G,Woburn
4,M1H,Cedarbrae


In [77]:
# Merging the 2 data frames
df3 = pd.merge(df, df2, on="Postal Code")
df3.drop(['Neighborhood'], axis = 1, inplace = True)
df3.drop_duplicates(inplace = True)
df3.rename(columns={'JoinedNeighborhood' : 'Neighborhood'}, inplace = True)
df3.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [78]:
df3.shape

(103, 3)