In [9]:
#import relevant libraries
import requests
import pandas as pd

In [10]:
from bs4 import BeautifulSoup

In [11]:
#downloads webpage
page = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text

In [12]:
#creates a BeautifulSoup object
soup = BeautifulSoup(page, "html.parser")

In [13]:
#creates table_contents list
table_contents=[]

#scans webpage for table
table=soup.find('table')

#for each <td> tag, 
for row in table.findAll('td'):
    
    #creates a dictionary called cell
    cell = {}
    
    #skips the cells with a burough that is not assigned
    if row.span.text=='Not assigned':
        pass
    
    else:
        #in cells without 'not assigned', adds 3 keys to the dictionary (PostalCode, Borough, and Neighborhood) and extracts relevant information
        #extracts 3 characters for the PostalCode
        cell['PostalCode'] = row.p.text[:3]
        
        #extracts the borough from before the left/opening bracket in the text
        cell['Borough'] = (row.span.text).split('(')[0]
        
        #extracts the neighborhood from after the left/opening bracket in the text and replaces '/' with a comma between neighborhoods
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        
        #appends the cell to the table_contents list
        table_contents.append(cell)

#converts table_contents to a pandas dataframe
df=pd.DataFrame(table_contents)

#cleans borough names for display
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})


In [14]:
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto Business,Enclave of M4L
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [15]:
df.shape

(103, 3)

In [18]:
#links to geospatial coordinates
URL = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs_v1/Geospatial_Coordinates.csv"

In [19]:
#creates dataframe with geospatial coordinates
df2=pd.read_csv(URL)

In [20]:
df2

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [27]:

df3=pd.merge(df, df2, left_on='PostalCode', right_on='Postal Code')

In [28]:
df3

Unnamed: 0,PostalCode,Borough,Neighborhood,Postal Code,Latitude,Longitude
0,M3A,North York,Parkwoods,M3A,43.753259,-79.329656
1,M4A,North York,Victoria Village,M4A,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",M5A,43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",M6A,43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,M7A,43.662301,-79.389494
...,...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",M8X,43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,M4Y,43.665860,-79.383160
100,M7Y,East Toronto Business,Enclave of M4L,M7Y,43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",M8Y,43.636258,-79.498509
