### Preparing and formatting dataframe

In [1]:
#!pip install bs4
from urllib.request import urlopen
from bs4 import BeautifulSoup

html = urlopen("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
bs_src = BeautifulSoup(html.read(),'lxml');

import pandas as pd
import numpy as np

xml_table = bs_src.find('table',{'class':'wikitable sortable'})

headers=xml_table.find_all('th')
headers=[x.text.strip() for x in headers]
table=pd.DataFrame([])

rows = xml_table.find_all('tr')
for row in rows:
    cols=row.find_all('td')
    cols=[x.text.strip() for x in cols]
    table=pd.concat([table,pd.DataFrame([cols])],axis=0)

table.columns=headers

table=table.dropna(axis=0)
table=table[table.Borough!='Not assigned']
for j in range(len(table)):
    if table.Neighbourhood.iloc[j] == 'Not assigned':
        table.Neighbourhood.iloc[j]=table.Borough.iloc[j]
table.reset_index(drop=True, inplace=True)
table = table.groupby(['Postcode','Borough'],as_index=False).agg( ', '.join)
table.head(50)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


### Acquiring latitudes and longitudes 

In [2]:
lat_lng_df=pd.read_csv('https://cocl.us/Geospatial_data')
lat_lng_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Appending information to table

In [3]:
table['Latitude']=pd.DataFrame(['0']*len(table))
table['Longitude']=pd.DataFrame(['0']*len(table))

# I introduce a double for loop because I do not assume the ordering 
# of lag_lng_df.
for j in range(len(table)):
    for k in range(len(lat_lng_df)):
        if table.Postcode.iloc[j]==lat_lng_df['Postal Code'].iloc[k]:
            table.Latitude.iloc[j]=lat_lng_df['Latitude'].iloc[k]
            table.Longitude.iloc[j]=lat_lng_df['Longitude'].iloc[k]
            break
table.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.8067,-79.1944
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.7845,-79.1605
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.7636,-79.1887
3,M1G,Scarborough,Woburn,43.771,-79.2169
4,M1H,Scarborough,Cedarbrae,43.7731,-79.2395
