### Importing and installing necessary packages

In [34]:
import pandas as pd
import numpy as np
import json
from urllib import request
!pip install bs4
from bs4 import BeautifulSoup



### Scraping table from url with beautiful soup

In [35]:

url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
response = request.urlopen(url)
soup = BeautifulSoup(response)
response.close()

In [36]:
table = soup.find('table', attrs = {'class':'wikitable sortable'})

In [37]:
table_data = [[cell.text for cell in row("td")]
                         for row in table("tr")]
del table_data[0]

### converting table to dataframe and assigning columns, removing unnecessary letters

In [38]:
canada_frame = pd.DataFrame(table_data)
canada_frame
print(np.shape(canada_frame))
canada_frame.columns = ['PostalCode','Borough','Neighborhood']
for index,row in canada_frame.iterrows():
    ends = len(row['PostalCode']) - 1
    canada_frame.iloc[index,0] = (canada_frame.iloc[index,0])[0:ends]
    ends = len(row['Borough']) - 1
    canada_frame.iloc[index,1] = (canada_frame.iloc[index,1])[0:ends]
    ends = len(row['Neighborhood']) - 1
    canada_frame.iloc[index,2] = (canada_frame.iloc[index,2])[0:ends]

canada_frame

(180, 3)


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


### removing data with unassiged borough

In [39]:
indexes = canada_frame[canada_frame['Borough'] == 'Not assigned'].index
print(np.shape(indexes))
canada_frame.drop(indexes,inplace=True)
canada_frame.reset_index(inplace=True,drop=True)
print(np.shape(canada_frame))

(77,)
(103, 3)


In [40]:
canada_frame[canada_frame['Neighborhood'] == 'Not assigned']

Unnamed: 0,PostalCode,Borough,Neighborhood


### All non asssigned neighbors seemed to be removed when non assigned boroughs were removed.Data with unassigned neighborhood also has unassigned borough

In [41]:
for index,row in canada_frame.iterrows():
    if row['Neighborhood'] == 'Not assigned':
        canada_frame.iloc[index,2] = canada_frame.iloc[index,1]

canada_frame

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [42]:
print(np.shape(canada_frame))

(103, 3)


In [43]:
canada_frame.to_csv('torontoneighborhood.csv')