### Neighborhoods in Toronto

Import libraries

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

Download wikipedia page containing the neighborhoods in Toronto

In [2]:
page = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

Find the table html code of the webpage

In [3]:
soup = BeautifulSoup(page.content,'html.parser')

In [4]:
wiki_table = soup.find('table',{'class':'wikitable sortable'})

In [5]:
postal_code = []
borough = []
neighborhood = []

Store the values of the html table in lists, skip row if the borough is not assigned

In [6]:
for row in wiki_table.findAll('tr'):
    cell = row.findAll('td')
    if len(cell) == 3: # check if there are 3 cells
        if  cell[1].find(text=True) != 'Not assigned': #ignore unassigned neighborhoods
            postal_code.append(cell[0].find(text=True))
            borough.append(cell[1].find(text=True))
            neighborhood.append(cell[2].find(text=True).replace('\n',''))

Convert the lists into a pandas dataframe

In [8]:
rows = list(zip(postal_code,borough,neighborhood))

In [9]:
wiki = pd.DataFrame(rows,columns=['PostalCode','Borough','Neighborhood'])

Combine Neighborhoods based on the post code and borough

In [10]:
df = wiki.groupby(['PostalCode','Borough'])['Neighborhood'].apply(', '.join).reset_index()
df.head(11)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


Display the dataframe shape

In [11]:
df.shape

(103, 3)

Load latitude and longitude csv file

In [20]:
geo = pd.read_csv('Geospatial_Coordinates.csv')

Merge dataframes

In [32]:
nt = pd.merge(df,geo, left_on = 'PostalCode',right_on ='Postal Code')
nt.drop(['Postal Code'],axis=1,inplace=True)

In [37]:
nt.head(11)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848
