### 1) Importing the Necessary Libraries

In [1]:
import pandas as pd
import numpy as np
import bs4
import urllib.request

### 2) Using urllib to read the html and using Beautiful Soup to parse it

In [2]:
url='http://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html=urllib.request.urlopen(url).read()
soup=bs4.BeautifulSoup(html,'html.parser')

### 3) Filtering table tags and getting the html for the first table, then reading this html into a DataFrame df

In [3]:
tags=soup('table')
torontoTable=tags[0]

In [4]:
df=pd.read_html(str(torontoTable))
df=df[0]
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### 4) Processing the DataFrame to exclude rows with unassigned boroughs

In [5]:
df['Borough']=df['Borough'].replace({'Not assigned':np.nan})
df=df.dropna(axis=0).reset_index()
df.drop('index',axis=1,inplace=True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### Here I have printed the number of rows.

In [6]:
print('There are {} rows in the DataFrame'.format(df.shape[0]))

There are 103 rows in the DataFrame


________________________________________________________________________________________________________________________________________

### 1) Initializing Latitude and Longitude columns so can access them in the next step

In [10]:
df['Latitude']=0.0
df['Longitude']=0.0

### 2) Using CSV to add latitude and longitude values

In [11]:
temp=pd.read_csv('https://cocl.us/Geospatial_data').set_index('Postal Code')

for index,postal_code in zip(range(len(df)),df['Postal Code'].values):
    lat=temp.loc[postal_code,'Latitude']
    lng=temp.loc[postal_code,'Longitude']
    df.at[index,['Latitude']]=lat
    df.at[index,['Longitude']]=lng

df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
