In [1]:
!pip install beautifulsoup4
from bs4 import BeautifulSoup


[33mYou are using pip version 18.0, however version 18.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
import numpy as np 
import pandas as pd
import requests

In [3]:
wikilink = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [4]:
rawwiki = requests.get(wikilink)

In [5]:
#with open (url) as html_file:
soup = BeautifulSoup(rawwiki.text,'lxml')
#print(soup.prettify())


In [6]:
wikitable = soup.find('table',{'class':'wikitable sortable'})
#wikitable

In [7]:
table_rows = wikitable.find_all('tr')

res = []
for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td if tr.text.strip()]
    if row:
        res.append(row)


df = pd.DataFrame(res, columns=["PostalCode", "Borough", "Neighborhood"])
df.head(15)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


In [8]:
df = df[df.Borough != 'Not assigned']
df.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [9]:
#' '.join(x) returns the values in a string

df = df.groupby(['PostalCode','Borough'],as_index=False,sort=False).agg(lambda x: ', '.join(x))
df.head(15)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Not assigned
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Rouge, Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson, Garden District"


In [10]:
df['Neighborhood'] = df['Neighborhood'].replace('Not assigned', df['Borough'])
df.head(12)


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Rouge, Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson, Garden District"


In [11]:
df.shape 

(103, 3)

In [12]:
geocodes = pd.read_csv('Geospatial_Coordinates.csv')

In [13]:
geocodes.head(4)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917


In [14]:
geocodes.columns = ['PostalCode', 'Lat', 'Long']
geocodes.head(2)

Unnamed: 0,PostalCode,Lat,Long
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497


In [15]:
#pd.concat([df, geocodes],axis=1)
df = df.merge(geocodes, left_on='PostalCode', right_on='PostalCode', how='inner')

df.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood,Lat,Long
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


In [16]:
!pip install folium
import folium 
from folium import plugins

[33mYou are using pip version 18.0, however version 18.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [17]:
# define the world map
wmap = folium.Map(location=[43.6, -79], zoom_start=9)

In [18]:
# Plot all the postal code locations using markers
map_osm = folium.Map(location=[43.6, -79], zoom_start = 9)
df.apply(lambda row:folium.Marker(location=[row['Lat'], 
            row['Long']]).add_to(map_osm),axis=1)
map_osm

In [19]:
# convert to (n, 2) nd-array format for heatmap
torontohoods = df[['Lat', 'Long']].as_matrix()

# plot heatmap of postal codes - 
#see where the concentration is the greatest
# based on heat map you can see the highest concentation of postal codes
# are located in the center of the city.

wmap.add_children(plugins.HeatMap(torontohoods, radius=15))
wmap

  from ipykernel import kernelapp as app
