# Assignment

In [1]:
from bs4 import BeautifulSoup
import requests
from pandas.io.json import json_normalize 
import pandas as pd

## Request the html and Use BeautifulSoup to extract the content

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
results = requests.get(url)
soup = BeautifulSoup(results.content)

## Extract the table and its headings

In [3]:
post_code_list = soup.find("table", attrs={"class": "wikitable sortable"})
post_code_list_data = post_code_list.tbody.find_all("tr")  

# Get all the headings of the table
headings = []
for th in post_code_list_data[0].find_all("th"):
    # remove any newlines and extra spaces from left and right
    headings.append(th.text.replace('\n', ' ').strip())

print(headings)

['Postal Code', 'Borough', 'Neighbourhood']


## Generate the table on the web

In [4]:
data = []

for row in post_code_list_data[1:]:
    t_row = {}
    for td, th in zip(row.find_all('td'), headings):
        t_row[th] = td.text.replace('\n', '').strip()
    data.append(t_row)
post_table = pd.DataFrame(data)
post_table

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


## Drop the lines with Borough not assigned

In [5]:
new_table = post_table.loc[post_table['Borough']!='Not assigned',:]
new_table

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [6]:
# It looks there is no repeated Postal Code line in the table
len(pd.unique(new_table['Postal Code']))

103

## Assign the not-assigned neighborhood with the borough, clean table and generate the final table and it's shape 

In [7]:
for neighbor, i in enumerate(new_table['Neighbourhood']):
    if neighbor == 'Not assigned':
        new_table.loc[i,'Neighbourhood'] = new_table.loc[i,'Borough']
new_table.reset_index(drop=True, inplace=True)
print('The number of rows of my dataframe is',new_table.shape[0])
new_table

The number of rows of my dataframe is 103


Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


# Merge lat and lon Data

In [8]:
!pip install geocoder

Collecting geocoder
  Downloading geocoder-1.38.1-py2.py3-none-any.whl (98 kB)
[K     |████████████████████████████████| 98 kB 5.6 MB/s eta 0:00:011
Collecting ratelim
  Downloading ratelim-0.1.6-py2.py3-none-any.whl (4.0 kB)
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


In [9]:
import geocoder

In [83]:
lat = []
lng = []
for i in range(new_table.shape[0]):
    postal_code = new_table.loc[i,'Postal Code']
    Borough = new_table.loc[i,'Borough']
    address = '{}, Toronto, ON {}'.format(Borough, postal_code)
    g = geocoder.arcgis(address)
    lat.append(g.lat)
    lng.append(g.lng)

[43.75245000000007,
 43.73057000000006,
 43.65820000000008,
 43.72327000000007,
 43.66253000000006,
 43.662630000000036,
 43.811390000000074,
 43.74923000000007,
 43.70718000000005,
 43.66139000000004,
 43.70687000000004,
 43.65034000000003,
 43.78574000000003,
 43.72168000000005,
 43.68970000000007,
 43.65215000000006,
 43.69211000000007,
 43.64857000000006,
 43.765750000000025,
 43.68786003845179,
 43.64536000000004,
 43.68784000000005,
 43.76812000000007,
 43.709020000000066,
 43.65486000000004,
 43.668690000000026,
 43.76944000000003,
 43.80225000000007,
 43.75788000000006,
 43.70142000000004,
 43.64970000000005,
 43.665050000000065,
 43.74446000000006,
 43.780970000000025,
 43.76476000000008,
 43.68811000000005,
 43.64285000000007,
 43.64848000000006,
 43.725820000000056,
 43.781120000000044,
 43.73384000000004,
 43.68375000000003,
 43.64710000000008,
 43.64918110016615,
 43.71289000000007,
 43.756980000000055,
 43.720710000000054,
 43.667970000000025,
 43.64840000000004,
 43.7138

In [84]:
new_table['Latitude'] = lat
new_table['Longitude'] = lng

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


In [85]:
new_table

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.75245,-79.32991
1,M4A,North York,Victoria Village,43.73057,-79.31306
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65820,-79.36842
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66253,-79.39188
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.65319,-79.51113
99,M4Y,Downtown Toronto,Church and Wellesley,43.66659,-79.38133
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.64869,-79.38544
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.63278,-79.48945
