# Neighbourhoods in Toronto

In [106]:
import pandas as pd
import numpy as np
import requests


In [107]:
from bs4 import BeautifulSoup
website_text = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(website_text,'xml')


In [108]:
table = soup.find('table',{'class':'wikitable sortable'})
table_rows = table.find_all('tr')


In [109]:
data = []
for row in table_rows:
    data.append([t.text.strip() for t in row.find_all('td')])

In [110]:
df = pd.DataFrame(data, columns=['PostalCode', 'Borough', 'Neighbourhood'])
df = df[~df['PostalCode'].isnull()]  # to filter out bad rows

In [111]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront


In [112]:
df2 = df.set_index("Borough")
df2 = df2.drop("Not assigned", axis=0)
df2=df2.reset_index()
df2.head(10)

Unnamed: 0,Borough,PostalCode,Neighbourhood
0,North York,M3A,Parkwoods
1,North York,M4A,Victoria Village
2,Downtown Toronto,M5A,Harbourfront
3,Downtown Toronto,M5A,Regent Park
4,North York,M6A,Lawrence Heights
5,North York,M6A,Lawrence Manor
6,Queen's Park,M7A,Not assigned
7,Etobicoke,M9A,Islington Avenue
8,Scarborough,M1B,Rouge
9,Scarborough,M1B,Malvern


In [113]:
df3=df2.groupby(['PostalCode','Borough'])['Neighbourhood'].apply(list)

df3=df3.to_frame()
df3=df3.reset_index()
#Convert from list to string with commas
df3['Neighbourhood'] = [','.join(map(str, l)) for l in df3['Neighbourhood']]
df3.Neighbourhood = df3.Borough.where(df3.Neighbourhood == 'Not assigned', df3.Neighbourhood)
df3.head(100)


Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park"
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge"
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff,Cliffside West"


In [114]:
print('The shape of the dataframe is:', df3.shape)

The shape of the dataframe is: (103, 3)
