#### Read Toronto Postal Codes from Wiki page by making use of BeautifulSoup library

In [1]:
import requests
url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
from bs4 import BeautifulSoup
soup = BeautifulSoup(url,'lxml')

#### Read the table from soup object and process it to form data frame

In [20]:
torontoTable = soup.find('table', {'class':'wikitable sortable'})
import pandas as pd
rows = torontoTable.findAll('tr')

allData = []
for row in rows:
    td = row.findAll('td')
    if td != []:
        data = [tr.text.strip() for tr in td]
        allData.append(data)            
    
df = pd.DataFrame(allData, columns=['PostalCode', 'Borough', 'Neighbourhood'])
df.head(10)

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


#### Clean the data by removing 'Not assigned' value and assigning borough value to neighbourhood where it is not available and group all neighbourhood having same postal codes

In [21]:
neighbourNA = df.index[df['Neighbourhood'] == 'Not assigned']
df['Neighbourhood'][neighbourNA] = df['Borough'][neighbourNA]


boroughNA = df.index[df['Borough'] == 'Not assigned']
df.drop(boroughNA, inplace=True)

combined = df.groupby(['PostalCode', 'Borough'])['Neighbourhood'].apply(lambda x: "%s" % ', '.join(x))
df2 = pd.DataFrame(combined)

df2 = df2.reset_index()
df2.head(10)

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


#### Read the csv file having the postal code to latitude and longitude mapping information

In [None]:
csv = pd.read_csv('http://cocl.us/Geospatial_data')
csv.head()

#### Rename the postal code from csv

In [26]:
csv.rename(columns={'Postal Code': 'LLPostalCode'}, inplace=True)

#### Merge both the data frame and drop the extra postal code column

In [32]:
result = pd.concat([df2, csv], axis=1, join = 'inner')
result.drop(['LLPostalCode'], axis=1, inplace=True)
result.head(10)

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848
