Import all required libraries

In [1]:
from bs4 import BeautifulSoup
import requests

In [2]:
import pandas as pd # library for data analsysis

Use requests to get the source of the webpage

In [15]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source, 'lxml')
    

Create a dataframe for holding the data with 3 columns

In [16]:
# define the dataframe columns
column_names = ['PostalCode', 'Borough','Neighborhood'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

Iterate through all rows and columns in the first table from the webpage, and save the data to dataframe

In [17]:
l = []
for tr in soup.table.find_all('tr'):
    td = tr.find_all('td')
    row = [tr.text.rstrip() for tr in td]
    l.append(row)
neighborhoods = pd.DataFrame(l, columns=column_names)

Remove rows where Borough was not assigned

In [18]:
neighborhoods = neighborhoods[neighborhoods.Borough != 'Not assigned']

Remove the first row, since it contains the column headers

In [19]:
neighborhoods = neighborhoods[1:]

For neighborhoods that were not assigned, replace the name with borough name

In [20]:
neighborhoods["Neighborhood"].replace("Not assigned", neighborhoods.Borough, inplace=True)

In [21]:
neighborhoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M5A,Downtown Toronto,Regent Park
7,M6A,North York,Lawrence Heights


Group based on the postal code and merge the neighborhoods separating with commas

In [22]:
neighborhoods = neighborhoods.groupby(['PostalCode','Borough'])['Neighborhood'].apply(', '.join).reset_index()


Check the dimensions of the resulting data frame

In [23]:
neighborhoods.shape

(103, 3)

In [24]:
filename = "http://cocl.us/Geospatial_data"
headers = ["PostalCode","Latitude", "Longitude"]
df = pd.read_csv(filename, names = headers)

In [25]:
neighborhood_full = pd.merge(neighborhoods, df, on='PostalCode')

neighborhood_full.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.8066863,-79.1943534
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.7845351,-79.1604971
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.7635726,-79.1887115
3,M1G,Scarborough,Woburn,43.7709921,-79.2169174
4,M1H,Scarborough,Cedarbrae,43.773136,-79.2394761
