In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup as bs

### Part I Scrape data from wikipedia

In [2]:
# url of wikipedia page: List of postal codes of Canada: M
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [3]:
# use beautifulsoup get the page content
soup = bs(requests.get(url).text,'lxml')

In [4]:
# find out data table
table = soup.find(class_="wikitable sortable")

In [5]:
# get table header
header = [x.get_text().strip() for x in table.findAll('tr')[0].findAll('th')]

In [6]:
# get table content
content = [[y.get_text().strip() for y in x.findAll('td')] for x in table.findAll('tr')[1:]]

In [7]:
# build padas dataframe
df = pd.DataFrame(data=content, columns=header)

In [8]:
# remove 'Not assigned' Borough
df = df[df['Borough']!='Not assigned']

In [9]:
# reindex dataframe
df = df.reset_index(drop=True)

In [10]:
# if a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough
for i in df.index:
    df['Neighbourhood'][i] = df['Borough'][i] if df['Neighbourhood'][i] == 'Not assigned' else df['Neighbourhood'][i]

In [11]:
# stack neighborhood use groupby and separate by comma
df['Neighbourhood'] = df['Neighbourhood'].apply(lambda x:x+',')
df = df.groupby(['Postcode','Borough'], as_index=False)['Neighbourhood'].sum()
df['Neighbourhood'] = df['Neighbourhood'].apply(lambda x:x[:-1])

In [12]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [13]:
df.shape

(103, 3)

### Part II Add coordinates

In [14]:
# read coordinates data
geo = pd.read_csv('Geospatial_Coordinates.csv')

In [26]:
# add coordinates data to dataframe
df = df.merge(geo[['Postal Code','Latitude','Longitude']], left_on=['Postcode'], right_on=['Postal Code'], how='left')
df = df.drop(['Postal Code'],axis=1)

In [29]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
