In [56]:
import pandas as pd
import numpy as np

import csv

from bs4 import BeautifulSoup
import requests

import folium # map rendering library

# from urllib.request import urlopen as uReq

---

## Get html code accessible for manipulation

In [46]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

soup = BeautifulSoup(source, 'lxml')
# print(soup.prettify())

In [47]:
table = soup.find('table')
# print(table.prettify())

---

## Write to csv file

In [48]:
with open('canadian_postal_codes_scrape.csv', 'w', newline='') as f:
    
    csv_file = csv.writer(f)
    csv_file.writerow(['Postal Code', 'Borough', 'Neighbourhood'])
    
    tag = table.find_all('td')
    co = 1
    
    for i in tag:
        if (co==1):
            postal = i.text
            co += 1
        elif (co==2):
            borough = i.text
            co += 1
        elif (co==3):
            neighbourhood = i.text
            co += 1
        else:
            co = 2
            csv_file.writerow([postal, borough, neighbourhood])
            postal = i.text
        

In [49]:
pc = pd.read_csv('canadian_postal_codes_scrape.csv')
pc

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned\n
1,M2A,Not assigned,Not assigned\n
2,M3A,North York,Parkwoods\n
3,M4A,North York,Victoria Village\n
4,M5A,Downtown Toronto,Harbourfront\n
5,M6A,North York,Lawrence Heights\n
6,M6A,North York,Lawrence Manor\n
7,M7A,Downtown Toronto,Queen's Park\n
8,M8A,Not assigned,Not assigned\n
9,M9A,Queen's Park,Not assigned\n


---

## Cleaning the data and deleting unwanted rows

In [50]:
pc['Neighbourhood'] = pc['Neighbourhood'].str.replace('\n', '')
pc = pc[pc['Borough'] != 'Not assigned']
pc

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
9,M9A,Queen's Park,Not assigned
10,M1B,Scarborough,Rouge
11,M1B,Scarborough,Malvern
13,M3B,North York,Don Mills North


---
### Create a dictionary that has updated values for the Neighbourhood column..
### Then update the dataframe to replace its Neighbourhood values with that of the dictionary

In [51]:
# Create a dictionary that has updated values for the Neighbourhood column.
pCodes = {}
i = 0
while i < pc.shape[0]:
    if (pc.iloc[i]['Postal Code'] in pCodes):
        pCodes[pc.iloc[i]['Postal Code']] = pCodes[pc.iloc[i]['Postal Code']] + ', ' + pc.iloc[i]['Neighbourhood']
        pc.iloc[i]['Neighbourhood'] = pCodes[pc.iloc[i]['Postal Code']]
    else:
        pCodes[pc.iloc[i]['Postal Code']] = pc.iloc[i]['Neighbourhood']
    i += 1

# Change Neighbourhood column to be in accordance with the values in pCodes dictionary.
# i = 0
# while i < pc.shape[0]:
#     pc.iloc[i]['Neighbourhood'] = pCodes[pc.iloc[i]['Postal Code']]
#     i += 1

---
## Giving un-named Neighbourhoods the same name as their respective borough

In [52]:
i = 0
while i < pc.shape[0]:
    if (pc.iloc[i]['Neighbourhood'] == 'Not assigned'):
        print(pc.iloc[i]['Neighbourhood'])
        pc.iloc[i]['Neighbourhood'] = pc.iloc[i]['Borough']
    i = i+1
pc    

Not assigned


Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,"Lawrence Heights, Lawrence Manor"
7,M7A,Downtown Toronto,Queen's Park
9,M9A,Queen's Park,Queen's Park
10,M1B,Scarborough,Rouge
11,M1B,Scarborough,"Rouge, Malvern"
13,M3B,North York,Don Mills North


In [53]:
pc.shape

(210, 3)

---
## Read in latitude / longitude data from another .csv file.  Merge multiple dataframes on their shared column.

In [54]:
gs = pd.read_csv('Geospatial_Coordinates.csv')
gs

pc = pd.merge(pc, gs, on='Postal Code')
pc

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.654260,-79.360636
3,M6A,North York,Lawrence Heights,43.718518,-79.464763
4,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
5,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
6,M9A,Queen's Park,Queen's Park,43.667856,-79.532242
7,M1B,Scarborough,Rouge,43.806686,-79.194353
8,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
9,M3B,North York,Don Mills North,43.745906,-79.352188


---
#### Create a map of Toronto with neighborhoods superimposed on top.

In [90]:
# create map of New York using latitude and longitude values
# map_toronto = folium.Map(location=[79, 43], zoom_start=10)
map_toronto = folium.Map(location=[43.7, -79.5], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(pc['Latitude'], pc['Longitude'], pc['Borough'], pc['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto