# Welcome to the notebook

#### Let's start importing libraries

In [19]:
import pandas as pd
import warnings
import numpy as np
import folium # plotting library (maps)

from bs4 import BeautifulSoup

warnings.filterwarnings('ignore')

#### Then, let's get a copy of the wikipedia article

In [2]:
import urllib.request

url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
req = urllib.request.urlopen(url)
article = req.read().decode()

with open('postal_codes.html', 'w') as code:
    code.write(article)


#### Now, let's begin reading and parsing

In [3]:
article = open('postal_codes.html').read()
soup = BeautifulSoup(article, 'html.parser')

In [4]:
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of postal codes of Canada: M - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"XptG7wpAMNAAAUaw2@sAAAES","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":951325562,"wgRevisionId":951325562,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Communications in Ontario","Postal codes in Canada","Toronto","Ontario

In [5]:
#get that table
tables = soup.find_all('table', class_='sortable')
tables

[<table class="wikitable sortable">
 <tbody><tr>
 <th>Postal code
 </th>
 <th>Borough
 </th>
 <th>Neighborhood
 </th></tr>
 <tr>
 <td>M1A
 </td>
 <td>Not assigned
 </td>
 <td>
 </td></tr>
 <tr>
 <td>M2A
 </td>
 <td>Not assigned
 </td>
 <td>
 </td></tr>
 <tr>
 <td>M3A
 </td>
 <td>North York
 </td>
 <td>Parkwoods
 </td></tr>
 <tr>
 <td>M4A
 </td>
 <td>North York
 </td>
 <td>Victoria Village
 </td></tr>
 <tr>
 <td>M5A
 </td>
 <td>Downtown Toronto
 </td>
 <td>Regent Park / Harbourfront
 </td></tr>
 <tr>
 <td>M6A
 </td>
 <td>North York
 </td>
 <td>Lawrence Manor / Lawrence Heights
 </td></tr>
 <tr>
 <td>M7A
 </td>
 <td>Downtown Toronto
 </td>
 <td>Queen's Park / Ontario Provincial Government
 </td></tr>
 <tr>
 <td>M8A
 </td>
 <td>Not assigned
 </td>
 <td>
 </td></tr>
 <tr>
 <td>M9A
 </td>
 <td>Etobicoke
 </td>
 <td>Islington Avenue
 </td></tr>
 <tr>
 <td>M1B
 </td>
 <td>Scarborough
 </td>
 <td>Malvern / Rouge
 </td></tr>
 <tr>
 <td>M2B
 </td>
 <td>Not assigned
 </td>
 <td>
 </td></tr>
 <tr>

#### Getting the correct table

In [6]:
# Search through the tables for the one with the headings we want.
for table in tables:
    ths = table.find_all('th')
    headings = [th.text.strip() for th in ths]
    if headings[:5] == ['Postal Code', 'Borough', 'Neighborhood']:
        break

#### Parsing the table and writing the .csv file:

In [7]:
# Extract the columns we want and write to a semicolon-delimited text file.
with open('data.csv', 'w') as data:
    print('postal_code,', 'borough,', 'neighborhood', file = data)
    for tr in table.find_all('tr'):
        tds = tr.find_all('td')
        if (not tds) or (len(tds)<3) :
            continue
        pcode, borough, neighborhood = [td.text.strip() for td in tds[:3]]
        if ',' in neighborhood:
            neighborhood = neighborhood.replace(',', ' in')
        if neighborhood == '' :
            continue
        # Wikipedia does something funny with country names containing
        # accented characters: extract the correct string form.
        #if '!' in country:
            #country = country[country.index('!')+1:]
        print(', '.join([pcode, borough, neighborhood]), file=data)

#### Verifying the database

In [8]:
df = pd.read_csv('data.csv', sep=r'\s*,\s*', header=0, encoding='ascii', engine='python', error_bad_lines=False)
df.head()

Unnamed: 0,postal_code,borough,neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [9]:
df.tail()

Unnamed: 0,postal_code,borough,neighborhood
98,M8X,Etobicoke,The Kingsway / Montgomery Road / Old Mill North
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,Business reply mail Processing CentrE
101,M8Y,Etobicoke,Old Mill South / King's Mill Park / Sunnylea /...
102,M8Z,Etobicoke,Mimico NW / The Queensway West / South of Bloo...


#### Adding Latitude and Longitude to the DataFrame

In [10]:
#!pip3 install geocoder

In [11]:
import geocoder
g = geocoder.google('Mountain View, CA')
print(g.latlng)

None


#### Well, that did not work. Let's try to use the data provided, since this package does not seem to be working

In [12]:
coord = pd.read_csv('Geospatial_Coordinates.csv')
coord.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [13]:
#assembling the final DataFrame
df_final = pd.DataFrame(columns = ['postal_code', 'borough', 'neighborhood', 'latitude', 'longitude'])

for postal_code in df['postal_code']:
    new_postal = postal_code
    new_borough = df['borough'][df['postal_code'] == postal_code]
    new_neigh = df['neighborhood'][df['postal_code'] == postal_code]
    new_lat = coord['Latitude'][coord['Postal Code'] == postal_code]
    new_long = coord['Longitude'][coord['Postal Code'] == postal_code]
    a = df.index[df['postal_code'] == postal_code]
    b = coord.index[coord['Postal Code'] == postal_code]
    control = {'postal_code': new_postal, 'borough': new_borough[a.tolist()[0]], 'neighborhood': new_neigh[a.tolist()[0]], 'latitude': new_lat[b.tolist()[0]], 'longitude': new_long[b.tolist()[0]]}
    df_final = df_final.append(control, ignore_index = True)
                    
df_final.head(10)

Unnamed: 0,postal_code,borough,neighborhood,latitude,longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636
3,M6A,North York,Lawrence Manor / Lawrence Heights,43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,Malvern / Rouge,43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,Parkview Hill / Woodbine Gardens,43.706397,-79.309937
9,M5B,Downtown Toronto,Garden District in Ryerson,43.657162,-79.378937


In [18]:
#Renaming the columns for a better presentation of the DataFrame
df_final.columns = ['Postal Code', 'Borough', 'Neighborhood', 'Latitude', 'Longitude']
df_final.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636
3,M6A,North York,Lawrence Manor / Lawrence Heights,43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494


#### Adding a map and marking the Boroughs with 'Toronto' in the name

In [64]:
toronto_lat = 43.6529
toronto_long = -79.3849
map_tor = folium.Map(location = [toronto_lat, toronto_long], zoom_start = 11) #drawing the map
map_tor

##### Preparing a DataFrame with the data from the Boroughs with 'Toronto' in the name

In [62]:
i = 0
tor_bor = pd.DataFrame(columns = ['Borough', 'Latitude', 'Longitude'])

for e in df_final['Borough']:
    if 'Toronto' in e:
        tor_bor = tor_bor.append({'Borough':df_final['Borough'][i], 'Latitude':df_final['Latitude'][i], 'Longitude':df_final['Longitude'][i]}, ignore_index = True)
    i = i + 1
    
tor_bor.head()

Unnamed: 0,Borough,Latitude,Longitude
0,Downtown Toronto,43.65426,-79.360636
1,Downtown Toronto,43.662301,-79.389494
2,Downtown Toronto,43.657162,-79.378937
3,Downtown Toronto,43.651494,-79.375418
4,East Toronto,43.676357,-79.293031


##### Adding the markers to the map

In [65]:
for lat, lng, label in zip(tor_bor.Latitude, tor_bor.Longitude, tor_bor.Borough):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(map_tor)

# display map
map_tor

# Aaaaaaand that's it! Thanks for visiting!