# Neighbourhoods in Toronto

In [2]:
import pandas as pd
import numpy as np
import requests


In [3]:
from bs4 import BeautifulSoup
website_text = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(website_text,'xml')


In [4]:
table = soup.find('table',{'class':'wikitable sortable'})
table_rows = table.find_all('tr')


In [5]:
data = []
for row in table_rows:
    data.append([t.text.strip() for t in row.find_all('td')])

In [6]:
df = pd.DataFrame(data, columns=['PostalCode', 'Borough', 'Neighbourhood'])
df = df[~df['PostalCode'].isnull()]  # to filter out bad rows

In [7]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront


In [8]:
df2 = df.set_index("Borough")
df2 = df2.drop("Not assigned", axis=0)
df2=df2.reset_index()
df2.head(10)

Unnamed: 0,Borough,PostalCode,Neighbourhood
0,North York,M3A,Parkwoods
1,North York,M4A,Victoria Village
2,Downtown Toronto,M5A,Harbourfront
3,Downtown Toronto,M5A,Regent Park
4,North York,M6A,Lawrence Heights
5,North York,M6A,Lawrence Manor
6,Queen's Park,M7A,Not assigned
7,Etobicoke,M9A,Islington Avenue
8,Scarborough,M1B,Rouge
9,Scarborough,M1B,Malvern


In [13]:
df3=df2.groupby(['PostalCode','Borough'])['Neighbourhood'].apply(list)

df3=df3.to_frame()
df3=df3.reset_index()
#Convert from list to string with commas
df3['Neighbourhood'] = [','.join(map(str, l)) for l in df3['Neighbourhood']]
df3.Neighbourhood = df3.Borough.where(df3.Neighbourhood == 'Not assigned', df3.Neighbourhood)
df3.head()


Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [10]:
print('The shape of the dataframe is:', df3.shape)

The shape of the dataframe is: (103, 3)


# Task 2

In [11]:
df4=pd.read_csv('https://cocl.us/Geospatial_data')

In [12]:
df4.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [23]:
df5=df3

In [34]:
df4.columns = [c.replace(' ', '_') for c in df4.columns]
df4.head()

Unnamed: 0,Postal_Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [43]:
df5['Latitude'] = df.apply(lambda _: '', axis=1)
df5['Longitude'] = df.apply(lambda _: '', axis=1)
df5.head()


Unnamed: 0,PostalCode,Borough,Age,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,True,"Rouge,Malvern",,
1,M1C,Scarborough,True,"Highland Creek,Rouge Hill,Port Union",,
2,M1E,Scarborough,True,"Guildwood,Morningside,West Hill",,
3,M1G,Scarborough,True,Woburn,,
4,M1H,Scarborough,True,Cedarbrae,,


In [44]:
df5.Latitude = df4.Latitude.where(df5.PostalCode == df4.Postal_Code, df5.Latitude)
df5.Longitude = df4.Longitude.where(df5.PostalCode == df4.Postal_Code, df5.Longitude)
df5.head()

Unnamed: 0,PostalCode,Borough,Age,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,True,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,True,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,True,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,True,Woburn,43.770992,-79.216917
4,M1H,Scarborough,True,Cedarbrae,43.773136,-79.239476


In [48]:
df5.head()

Unnamed: 0,PostalCode,Borough,Age,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,True,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,True,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,True,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,True,Woburn,43.770992,-79.216917
4,M1H,Scarborough,True,Cedarbrae,43.773136,-79.239476
