In [10]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
import numpy as np
import io

In [11]:
# Defining and downloading the url
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
source = requests.get(url).text

In [12]:
# Parsing the downloaded contents
soup = BeautifulSoup(source, 'html.parser')

In [17]:
# Table heading
table = soup.find_all('table', class_='sortable')
th = table[0].find_all('th')
headings = [th.text.strip() for th in th]
headings[0] = 'Postal code'
headings

['Postal code', 'Borough', 'Neighbourhood']

In [20]:
tb_rows = []
for i in table[0].find_all('tr'):
    rows = i.find_all('td')
    if not rows:
        continue
    row = [td.text.strip() for td in rows[:3]]
    tb_rows.append(row)
tb_rows

[['M1A', 'Not assigned', 'Not assigned'],
 ['M2A', 'Not assigned', 'Not assigned'],
 ['M3A', 'North York', 'Parkwoods'],
 ['M4A', 'North York', 'Victoria Village'],
 ['M5A', 'Downtown Toronto', 'Harbourfront'],
 ['M5A', 'Downtown Toronto', 'Regent Park'],
 ['M6A', 'North York', 'Lawrence Heights'],
 ['M6A', 'North York', 'Lawrence Manor'],
 ['M7A', "Queen's Park", 'Not assigned'],
 ['M8A', 'Not assigned', 'Not assigned'],
 ['M9A', 'Etobicoke', 'Islington Avenue'],
 ['M1B', 'Scarborough', 'Rouge'],
 ['M1B', 'Scarborough', 'Malvern'],
 ['M2B', 'Not assigned', 'Not assigned'],
 ['M3B', 'North York', 'Don Mills North'],
 ['M4B', 'East York', 'Woodbine Gardens'],
 ['M4B', 'East York', 'Parkview Hill'],
 ['M5B', 'Downtown Toronto', 'Ryerson'],
 ['M5B', 'Downtown Toronto', 'Garden District'],
 ['M6B', 'North York', 'Glencairn'],
 ['M7B', 'Not assigned', 'Not assigned'],
 ['M8B', 'Not assigned', 'Not assigned'],
 ['M9B', 'Etobicoke', 'Cloverdale'],
 ['M9B', 'Etobicoke', 'Islington'],
 ['M9B', 

In [21]:
Postal_Codes = pd.DataFrame(tb_rows, columns=headings)
Postal_Codes.head()

Unnamed: 0,Postal code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [22]:
Postal_Codes.replace(to_replace='Not assigned', value=np.NaN, inplace=True)
Postal_Codes.reset_index(drop = True, inplace = True)
Postal_Codes.dropna(axis=0, subset=['Borough'], inplace=True)

In [23]:
Postal_Codes.head()

Unnamed: 0,Postal code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


In [24]:
nbn = Postal_Codes['Neighbourhood'].isna()
print (Postal_Codes['Borough'].loc[nbn])
Postal_Codes['Neighbourhood'].loc[nbn] = Postal_Codes['Borough'].loc[nbn]

8    Queen's Park
Name: Borough, dtype: object


In [27]:
groupby = Postal_Codes.groupby(['Postal code','Borough'])
rows = []
for i,j in groupby:
    r = [ i[0], i[1], (', '.join(j['Neighbourhood']) )]
    rows.append(r)
Postal_Codes = pd.DataFrame(rows, columns=headings)
Postal_Codes.shape

(103, 3)

In [28]:
import json
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

Solving environment: ...working... done

# All requested packages already installed.



In [29]:
gurl = "https://cocl.us/Geospatial_data/Geospatial_Coordinates.csv"
file =requests.get(gurl).content
gf=pd.read_csv(io.StringIO(file.decode('utf-8')))
gf.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [31]:
gf.columns = ['Postal code','Latitude','Longitude']
gt = pd.merge(Postal_Codes, gf, on='Postal code')
gt.head(11)

Unnamed: 0,Postal code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [34]:
ad = 'Toronto'

geolocator = Nominatim()
location = geolocator.geocode(ad)
lat = location.latitude
lgt = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(lat, lgt))



The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [36]:
Toronto = gt[gt['Borough'].str.contains("Toronto", case=False)].reset_index(drop=True)

print(Toronto.shape)
Toronto.head()

(38, 5)


Unnamed: 0,Postal code,Borough,Neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [37]:
location = Nominatim().geocode('Toronto, Canada')



In [52]:
map_toronto = folium.Map(location=[location.latitude, location.longitude], zoom_start=12)

In [53]:
for lat, lng, neighborhood in zip(Toronto['Latitude'], Toronto['Longitude'], Toronto['Neighbourhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=7,
        popup=label,
        color='pink',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.9).add_to(map_toronto) 
map_toronto