# List of postal codes of Canada: M

In [1]:
import requests
from bs4 import BeautifulSoup
import re
import numpy as np

In [2]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
response=requests.get(url)
page=response.content
page=BeautifulSoup(page,'html.parser')

### Obtain the data from wikipedia

In [3]:
data1=page.find_all('table',class_="wikitable sortable")
data2=[i.get_text() for i in data1]
data2=[i.split('\n\n\n') for i in data2]
data2=re.sub("\'", "", str(data2), flags=re.I)
data2=re.sub("\"", "", str(data2), flags=re.I)
data2=str(data2).split(',')
data2

['[[\\n\\nPostcode\\nBorough\\nNeighbourhood',
 ' M1A\\nNot assigned\\nNot assigned',
 ' M2A\\nNot assigned\\nNot assigned',
 ' M3A\\nNorth York\\nParkwoods',
 ' M4A\\nNorth York\\nVictoria Village',
 ' M5A\\nDowntown Toronto\\nHarbourfront',
 ' M5A\\nDowntown Toronto\\nRegent Park',
 ' M6A\\nNorth York\\nLawrence Heights',
 ' M6A\\nNorth York\\nLawrence Manor',
 ' M7A\\nQueens Park\\nNot assigned',
 ' M8A\\nNot assigned\\nNot assigned',
 ' M9A\\nEtobicoke\\nIslington Avenue',
 ' M1B\\nScarborough\\nRouge',
 ' M1B\\nScarborough\\nMalvern',
 ' M2B\\nNot assigned\\nNot assigned',
 ' M3B\\nNorth York\\nDon Mills North',
 ' M4B\\nEast York\\nWoodbine Gardens',
 ' M4B\\nEast York\\nParkview Hill',
 ' M5B\\nDowntown Toronto\\nRyerson',
 ' M5B\\nDowntown Toronto\\nGarden District',
 ' M6B\\nNorth York\\nGlencairn',
 ' M7B\\nNot assigned\\nNot assigned',
 ' M8B\\nNot assigned\\nNot assigned',
 ' M9B\\nEtobicoke\\nCloverdale',
 ' M9B\\nEtobicoke\\nIslington',
 ' M9B\\nEtobicoke\\nMartin Grove',

### Get separate lists for postal codes, borough and neighborhood

In [129]:
data=[i.split('\\n') for i in data2]
PostalCode=[]
for i in data:
    PostalCode.append(i[0])
PostalCode=PostalCode[1:]
Borough=[]
for i in data:
    Borough.append(i[1])
Borough=Borough[1:]
Neighborhood=[]
for i in data:
    Neighborhood.append(i[2])

### Tranform the data into a pandas dataframe

In [44]:
import pandas as pd
table=list(zip(PostalCode,Borough,Neighborhood))
df = pd.DataFrame(table, columns=['PostalCode','Borough','Neighborhood'])
df['PostalCode'] = df['PostalCode'].str.lstrip() #remove white space  at the beginning  of PostalCode
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Postcode
1,M2A,Not assigned,Not assigned
2,M3A,North York,Not assigned
3,M4A,North York,Parkwoods
4,M5A,Downtown Toronto,Victoria Village


In [45]:
df=df[df.Borough != 'Not assigned']   #drop not assigned 
df['Neighborhood'].replace(['Not assigned'], np.nan ,inplace=True) #replace not assigned with nun
df.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,
3,M4A,North York,Parkwoods
4,M5A,Downtown Toronto,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M6A,North York,Regent Park
7,M6A,North York,Lawrence Heights
8,M7A,Queens Park,Lawrence Manor
10,M9A,Etobicoke,
11,M1B,Scarborough,Islington Avenue
12,M1B,Scarborough,Rouge


In [46]:
 #fill null values of the neighborhood column with the corresponding values of borough column
df['Neighborhood'] = df['Neighborhood'].fillna(df['Neighborhood']).fillna(df['Borough'])
df.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,North York
3,M4A,North York,Parkwoods
4,M5A,Downtown Toronto,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M6A,North York,Regent Park
7,M6A,North York,Lawrence Heights
8,M7A,Queens Park,Lawrence Manor
10,M9A,Etobicoke,Etobicoke
11,M1B,Scarborough,Islington Avenue
12,M1B,Scarborough,Rouge


In [128]:
# Combine two rows of Neighborhood column into one row grouping by Postcode, separated with a comma, the first value of Borough
# was taken as it is the same for the same postcodes
df = df.groupby('PostalCode').agg({'Borough':'first', 
                             'Neighborhood': ', '.join}).reset_index()
df.head(10)
import csv
df.to_csv('Postal codes of Canada.csv')

# Get the latitude and the longitude coordinates of each neighborhood

In [48]:
geo=pd.read_csv('Geospatial_Coordinates.csv')
geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [120]:
# Merge two DataFrames
geo.rename(columns={'Postal Code':'PostalCode'}, inplace=True) ## replace the name of a column
final=pd.merge(df,geo, on='PostalCode')
final.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Islington Avenue, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"West Deane Park, Highland Creek, Rouge Hill",43.784535,-79.160497
2,M1E,Scarborough,"Old Burnhamthorpe, Guildwood, Morningside",43.763573,-79.188711
3,M1G,Scarborough,Scarborough,43.770992,-79.216917
4,M1H,Scarborough,Scarborough,43.773136,-79.239476
5,M1J,Scarborough,Scarborough,43.744734,-79.239476
6,M1K,Scarborough,"Scarborough, East Birchmount Park, Ionview",43.727929,-79.262029
7,M1L,Scarborough,"Scarborough, Clairlea, Golden Mile",43.711112,-79.284577
8,M1M,Scarborough,"Humber Summit, Cliffcrest, Cliffside",43.716316,-79.239476
9,M1N,Scarborough,"Humberlea, Birch Cliff",43.692657,-79.264848


## Create a map of Toronto with neighborhoods superimposed on top

In [27]:
import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans
#!pip install folium
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: C:\Users\FASTcomputer\Anaconda3

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          90 KB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.49-py_0
  geopy              conda-forge/noarch::geopy-1.20.0-py_0



Downloading and Extracting Packages

geographiclib-1.49   | 32 KB     |            |   0% 
geographiclib-1.49   | 32 KB     | ####9      |  50% 
geographiclib-1.49   | 32 KB 

In [37]:
address=final['Borough']
geolocator = Nominatim(user_agent="canada")
for i in address:
    location = geolocator.geocode(i)
latitude = location.latitude
longitude = location.longitude
#print(latitude, longitude)
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

43.67145915 -79.5524920661167


In [38]:
# add markers to map
for lat, lng, borough, neighborhood in zip(final['Latitude'], final['Longitude'], final['Borough'],final['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto