# Toronto Neighbourhoods and Postal Codes

In [None]:
import pandas as pd
from bs4 import BeautifulSoup # this module helps in web scrapping.
import requests  # this module helps us to download a web page

In [2]:
#!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

This step define a function to geolocate and address or postal code

In [3]:
import geocoder 

In [4]:
def geolocate(address):
    lat_lng_coords = None
    while(lat_lng_coords is None):
      g = geocoder.arcgis(address)
      lat_lng_coords = g.latlng
    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    return latitude,longitude

URL where Toronto's Postal Codes are taken 

In [5]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

In [6]:
data  = requests.get(url).text 

In [7]:
soup = BeautifulSoup(data,"html5lib")  # create a soup object using the variable 'data'

Using the read_html method to extract the tables from the URL

In [8]:
dataframe_list = pd.read_html(url, flavor='bs4')

## Toronto's Postal Code table

In [9]:
postal_codes = dataframe_list[0]
postal_codes

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,M1ANot assigned,M2ANot assigned,M3ANorth York(Parkwoods),M4ANorth York(Victoria Village),M5ADowntown Toronto(Regent Park / Harbourfront),M6ANorth York(Lawrence Manor / Lawrence Heights),M7AQueen's Park(Ontario Provincial Government),M8ANot assigned,M9AEtobicoke(Islington Avenue)
1,M1BScarborough(Malvern / Rouge),M2BNot assigned,M3BNorth York(Don Mills)North,M4BEast York(Parkview Hill / Woodbine Gardens),"M5BDowntown Toronto(Garden District, Ryerson)",M6BNorth York(Glencairn),M7BNot assigned,M8BNot assigned,M9BEtobicoke(West Deane Park / Princess Garden...
2,M1CScarborough(Rouge Hill / Port Union / Highl...,M2CNot assigned,M3CNorth York(Don Mills)South(Flemingdon Park),M4CEast York(Woodbine Heights),M5CDowntown Toronto(St. James Town),M6CYork(Humewood-Cedarvale),M7CNot assigned,M8CNot assigned,M9CEtobicoke(Eringate / Bloordale Gardens / Ol...
3,M1EScarborough(Guildwood / Morningside / West ...,M2ENot assigned,M3ENot assigned,M4EEast Toronto(The Beaches),M5EDowntown Toronto(Berczy Park),M6EYork(Caledonia-Fairbanks),M7ENot assigned,M8ENot assigned,M9ENot assigned
4,M1GScarborough(Woburn),M2GNot assigned,M3GNot assigned,M4GEast York(Leaside),M5GDowntown Toronto(Central Bay Street),M6GDowntown Toronto(Christie),M7GNot assigned,M8GNot assigned,M9GNot assigned
5,M1HScarborough(Cedarbrae),M2HNorth York(Hillcrest Village),M3HNorth York(Bathurst Manor / Wilson Heights ...,M4HEast York(Thorncliffe Park),M5HDowntown Toronto(Richmond / Adelaide / King),M6HWest Toronto(Dufferin / Dovercourt Village),M7HNot assigned,M8HNot assigned,M9HNot assigned
6,M1JScarborough(Scarborough Village),M2JNorth York(Fairview / Henry Farm / Oriole),M3JNorth York(Northwood Park / York University),M4JEast YorkEast Toronto(The Danforth East),M5JDowntown Toronto(Harbourfront East / Union ...,M6JWest Toronto(Little Portugal / Trinity),M7JNot assigned,M8JNot assigned,M9JNot assigned
7,M1KScarborough(Kennedy Park / Ionview / East B...,M2KNorth York(Bayview Village),M3KNorth York(Downsview)East (CFB Toronto),M4KEast Toronto(The Danforth West / Riverdale),M5KDowntown Toronto(Toronto Dominion Centre / ...,M6KWest Toronto(Brockton / Parkdale Village / ...,M7KNot assigned,M8KNot assigned,M9KNot assigned
8,M1LScarborough(Golden Mile / Clairlea / Oakridge),M2LNorth York(York Mills / Silver Hills),M3LNorth York(Downsview)West,M4LEast Toronto(India Bazaar / The Beaches West),M5LDowntown Toronto(Commerce Court / Victoria ...,M6LNorth York(North Park / Maple Leaf Park / U...,M7LNot assigned,M8LNot assigned,M9LNorth York(Humber Summit)
9,M1MScarborough(Cliffside / Cliffcrest / Scarbo...,M2MNorth York(Willowdale / Newtonbrook),M3MNorth York(Downsview)Central,M4MEast Toronto(Studio District),M5MNorth York(Bedford Park / Lawrence Manor East),M6MYork(Del Ray / Mount Dennis / Keelsdale and...,M7MNot assigned,M8MNot assigned,M9MNorth York(Humberlea / Emery)


Exploring the data of Postal Code to select Boroughs and Neighbourhoods

In [10]:
postal_codes.shape

(20, 9)

In [11]:
postal_codes.iloc[0,0][3:17] # Exploring how to identify the Postal Code assigment

'Not assigned'

In [12]:
Toronto = pd.DataFrame(columns = ['postal_code', 'borough', 'neighbourhoods']) # Create Dataframe for data
Toronto

Unnamed: 0,postal_code,borough,neighbourhoods


The following code , extract borough and neighbourhood info from the postal code table - includes to change separator of neighbourhoods with ","

In [13]:
x = 0
borough=''
neighbour=''
complete = True
for index, row in postal_codes.iterrows():
    for region in list(row):
        Toronto.at[x,'postal_code'] = region[0:3]
        for char in region[3:]:
            if (char != '(') & complete:
                borough = borough + char
            else:
                complete = False
                if char != ')':
                    if char != '(':
                        neighbour = neighbour+char
        Toronto.at[x,'borough'] = borough
        Toronto.at[x,'neighbourhoods'] = neighbour.replace('/',',')
        neighbour=''
        borough=''
        complete=True
        x=x+1        

## Segmenting and cleanning the data to identify values to be corrected or eliminated

In [14]:
Toronto.borough.value_counts()

Not assigned                                                    77
North York                                                      24
Scarborough                                                     17
Downtown Toronto                                                17
Etobicoke                                                       11
Central Toronto                                                  9
West Toronto                                                     6
York                                                             5
East Toronto                                                     4
East York                                                        4
East TorontoBusiness reply mail Processing Centre969 Eastern     1
Queen's Park                                                     1
Downtown TorontoStn A PO Boxes25 The Esplanade                   1
East YorkEast Toronto                                            1
MississaugaCanada Post Gateway Processing Centre              

In [15]:
Toronto.postal_code.value_counts()

M3G    1
M7P    1
M4C    1
M4T    1
M8T    1
      ..
M9R    1
M2T    1
M3X    1
M5A    1
M9Z    1
Name: postal_code, Length: 180, dtype: int64

Remove from dataframe the values with 'Not assigened'

In [16]:
Toronto = Toronto[Toronto.borough != 'Not assigned']

In [17]:
Toronto = Toronto.reset_index(drop=True)

In [18]:
Toronto

Unnamed: 0,postal_code,borough,neighbourhoods
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
...,...,...,...
98,M8X,Etobicoke,"The Kingsway , Montgomery Road , Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East TorontoBusiness reply mail Processing Cen...,Enclave of M4L
101,M8Y,Etobicoke,"Old Mill South , King's Mill Park , Sunnylea ,..."


These are the boroughs that have an invalid name, and they are corrected

In [19]:
Toronto['borough'].replace({'EtobicokeNorthwest': 'Etobicoke Northwest','East YorkEast Toronto':'East York','MississaugaCanada Post Gateway Processing Centre': 'Mississauga','East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto','Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto'}, inplace=True)

In [20]:
Toronto.borough.value_counts()

North York             24
Downtown Toronto       18
Scarborough            17
Etobicoke              11
Central Toronto         9
West Toronto            6
East Toronto            5
York                    5
East York               5
Queen's Park            1
Etobicoke Northwest     1
Mississauga             1
Name: borough, dtype: int64

In [21]:
Toronto

Unnamed: 0,postal_code,borough,neighbourhoods
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
...,...,...,...
98,M8X,Etobicoke,"The Kingsway , Montgomery Road , Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,Enclave of M4L
101,M8Y,Etobicoke,"Old Mill South , King's Mill Park , Sunnylea ,..."


In [22]:
Toronto.shape

(103, 3)

## Geolocate the Postal codes , getting the latitude and Longitude

In [23]:
addresses = list(Toronto['postal_code'])
lat=[]
lon=[]
for code in addresses:
    location = geolocate(code +', Toronto , Ontario')
    lat.append(location[0])
    lon.append(location[1])
Toronto['latitude']  = lat
Toronto['longitude'] = lon

In [24]:
Toronto.sort_values('longitude').head(30)

Unnamed: 0,postal_code,borough,neighbourhoods,latitude,longitude
89,M9V,Etobicoke,"South Steeles , Silverstone , Humbergate , Jam...",43.74453,-79.58624
94,M9W,Etobicoke Northwest,"Clairville , Humberwood , Woodbine Downs , Wes...",43.71174,-79.57941
17,M9C,Etobicoke,"Eringate , Bloordale Gardens , Old Burnhamthor...",43.64857,-79.57825
77,M9R,Etobicoke,"Kingsview Village , St. Phillips , Martin Grov...",43.68681,-79.55728
50,M9L,North York,Humber Summit,43.75948,-79.55707
11,M9B,Etobicoke,"West Deane Park , Princess Gardens , Martin Gr...",43.65034,-79.55362
93,M8W,Etobicoke,"Alderwood , Long Branch",43.60124,-79.53879
57,M9M,North York,"Humberlea , Emery",43.73367,-79.53769
70,M9P,Etobicoke,Westmount,43.6963,-79.52926
5,M9A,Etobicoke,Islington Avenue,43.66263,-79.52831


In [25]:
Toronto.borough.value_counts()

North York             24
Downtown Toronto       18
Scarborough            17
Etobicoke              11
Central Toronto         9
West Toronto            6
East Toronto            5
York                    5
East York               5
Queen's Park            1
Etobicoke Northwest     1
Mississauga             1
Name: borough, dtype: int64

## MAP of Toronto with Boroughs/Postal Codes

In [26]:
city = 'Toronto, Ontario'

coords = geolocate(city)
latitude_tor = coords[0]
longitude_tor = coords[1]

In [27]:
print('The geograpical coordinate of {} are {}, {}.'.format(city, latitude_tor, longitude_tor))

The geograpical coordinate of Toronto, Ontario are 43.648690000000045, -79.38543999999996.


In [28]:
map_toronto = folium.Map(location=[latitude_tor, longitude_tor], zoom_start=10)

In [29]:
for lat, lng, borough, neighborhood in zip(Toronto['latitude'], Toronto['longitude'], Toronto['borough'], Toronto['neighbourhoods']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  

In [30]:
map_toronto

In [31]:
Toronto

Unnamed: 0,postal_code,borough,neighbourhoods,latitude,longitude
0,M3A,North York,Parkwoods,43.75245,-79.32991
1,M4A,North York,Victoria Village,43.73057,-79.31306
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65512,-79.36264
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.72327,-79.45042
4,M7A,Queen's Park,Ontario Provincial Government,43.66253,-79.39188
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway , Montgomery Road , Old Mill North",43.65319,-79.51113
99,M4Y,Downtown Toronto,Church and Wellesley,43.66659,-79.38133
100,M7Y,East Toronto,Enclave of M4L,43.64869,-79.38544
101,M8Y,Etobicoke,"Old Mill South , King's Mill Park , Sunnylea ,...",43.63278,-79.48945


In [32]:
Toronto.shape

(103, 5)