# Part 1

In [18]:
#Import Packages

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import folium
#! pip install BeautifulSoup
from bs4 import BeautifulSoup
import requests

In [19]:
import warnings
warnings.filterwarnings("ignore")

In [20]:
#Extract data from wikipedia
website_url = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
soup = BeautifulSoup(website_url,"lxml")
My_table = soup.find("table",{"class":"wikitable sortable"})
#My_table

In [21]:
table_data = My_table.tbody.find_all("tr")
table_data[0]


<tr>
<th>Postcode</th>
<th>Borough</th>
<th>Neighbourhood
</th></tr>

In [22]:
#Extract column headers
column = [x.text.strip() for x in table_data[0].find_all("th")]
column

['Postcode', 'Borough', 'Neighbourhood']

In [23]:
data_table = pd.DataFrame(columns = column)

In [24]:
#Create basic data table
rows = []
for i in range(1,len(table_data)-1):
    rows = [x.text.strip() for x in table_data[i].find_all("td")]
    data_table.loc[i] = rows

data_table.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront


In [25]:
data_table.reset_index(inplace = True)
data_table.head()

Unnamed: 0,index,Postcode,Borough,Neighbourhood
0,1,M1A,Not assigned,Not assigned
1,2,M2A,Not assigned,Not assigned
2,3,M3A,North York,Parkwoods
3,4,M4A,North York,Victoria Village
4,5,M5A,Downtown Toronto,Harbourfront


In [26]:
data_table = data_table[data_table["Borough"] != "Not assigned"]
data_table["Neighbourhood"][data_table["Neighbourhood"] == "Not assigned"] = data_table["Borough"]

In [27]:
# Get postcodes with more than 1 neighbourhood

abc = data_table.pivot_table(index = "Postcode", values = "Neighbourhood", aggfunc = "count")
postcode_more = abc.index[abc["Neighbourhood"] > 1]
postcode_more

Index(['M1B', 'M1C', 'M1E', 'M1K', 'M1L', 'M1M', 'M1N', 'M1P', 'M1R', 'M1T',
       'M1V', 'M2J', 'M2L', 'M2M', 'M3C', 'M3H', 'M3J', 'M3K', 'M4B', 'M4K',
       'M4L', 'M4T', 'M4V', 'M4X', 'M5B', 'M5H', 'M5J', 'M5K', 'M5L', 'M5M',
       'M5P', 'M5R', 'M5S', 'M5T', 'M5V', 'M5X', 'M6A', 'M6H', 'M6J', 'M6K',
       'M6L', 'M6M', 'M6N', 'M6P', 'M6R', 'M6S', 'M8V', 'M8W', 'M8X', 'M8Y',
       'M8Z', 'M9B', 'M9C', 'M9M', 'M9R', 'M9V'],
      dtype='object', name='Postcode')

In [28]:
#Get postcodes with 1 neighbourhood
postcode_one = abc.index[abc["Neighbourhood"] == 1]
postcode_one

Index(['M1G', 'M1H', 'M1J', 'M1S', 'M1W', 'M1X', 'M2H', 'M2K', 'M2N', 'M2P',
       'M2R', 'M3A', 'M3B', 'M3L', 'M3M', 'M3N', 'M4A', 'M4C', 'M4E', 'M4G',
       'M4H', 'M4J', 'M4M', 'M4N', 'M4P', 'M4R', 'M4S', 'M4W', 'M4Y', 'M5A',
       'M5C', 'M5E', 'M5G', 'M5N', 'M5W', 'M6B', 'M6C', 'M6E', 'M6G', 'M7A',
       'M7R', 'M7Y', 'M9A', 'M9L', 'M9N', 'M9P', 'M9W'],
      dtype='object', name='Postcode')

In [29]:
#Table with more than 1 Neighbourhood
t1 = data_table.loc[data_table.Postcode.isin(postcode_more),:].sort_values(by='Postcode').reset_index(drop=True)
t1
#Table with 1 Neighbourhood
t2 = data_table.loc[data_table.Postcode.isin(postcode_one),:].sort_values(by='Postcode').reset_index(drop=True)
t2

Unnamed: 0,index,Postcode,Borough,Neighbourhood
0,53,M1G,Scarborough,Woburn
1,62,M1H,Scarborough,Cedarbrae
2,76,M1J,Scarborough,Scarborough Village
3,180,M1S,Scarborough,Agincourt
4,236,M1W,Scarborough,L'Amoreaux West
5,246,M1X,Scarborough,Upper Rouge
6,63,M2H,North York,Hillcrest Village
7,94,M2K,North York,Bayview Village
8,142,M2N,North York,Willowdale South
9,154,M2P,North York,York Mills West


In [30]:
#Neighbours List
codes = []
boroughs = []
neighbours = []

for code in postcode_more:
    
    table = t1.loc[t1.Postcode == code, :] # split 't1' to specific table by 'postcode'
    
    code = np.unique(table.Postcode) # extract unique 'postcode' in the column
    codes.append(code[0])
    
    borough = np.unique(table.Borough) # extract unique 'borough' in the column
    boroughs.append(borough[0])
    
    neighbour = table.Neighbourhood.to_list() # extract all 'neighbourhood' & convert to list format
    neighbour = ', '.join(neighbour) # use .join() method to combine each 'neighbourhood' with ", "
    neighbours.append(neighbour)

neighbours

['Rouge, Malvern',
 'Port Union, Rouge Hill, Highland Creek',
 'Guildwood, West Hill, Morningside',
 'East Birchmount Park, Ionview, Kennedy Park',
 'Clairlea, Golden Mile, Oakridge',
 'Scarborough Village West, Cliffside, Cliffcrest',
 'Cliffside West, Birch Cliff',
 'Dorset Park, Scarborough Town Centre, Wexford Heights',
 'Wexford, Maryvale',
 "Tam O'Shanter, Sullivan, Clarks Corners",
 "Agincourt North, Steeles East, L'Amoreaux East, Milliken",
 'Henry Farm, Fairview, Oriole',
 'York Mills, Silver Hills',
 'Newtonbrook, Willowdale',
 'Flemingdon Park, Don Mills South',
 'Wilson Heights, Downsview North, Bathurst Manor',
 'Northwood Park, York University',
 'CFB Toronto, Downsview East',
 'Woodbine Gardens, Parkview Hill',
 'Riverdale, The Danforth West',
 'India Bazaar, The Beaches West',
 'Summerhill East, Moore Park',
 'Forest Hill SE, Rathnelly, Summerhill West, Deer Park, South Hill',
 'St. James Town, Cabbagetown',
 'Garden District, Ryerson',
 'Richmond, King, Adelaide',
 'To

In [31]:
# a new dataframe for 'Postcode' with > 1 'Neighbourhood'

table_new = pd.DataFrame({'Postcode': codes, 'Borough': boroughs, 'Neighbourhood': neighbours})
table_new.head()


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Port Union, Rouge Hill, Highland Creek"
2,M1E,Scarborough,"Guildwood, West Hill, Morningside"
3,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
4,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"


In [32]:
# combine 'tbl_new' with 't2' (table with single neighbourhood for each postcode) with concat()

new_data = pd.concat([table_new, t2], axis=0).sort_values(by="Postcode").reset_index(drop=True)
new_data.drop("index", axis = 1, inplace = True)
new_data.head()

Unnamed: 0,Borough,Neighbourhood,Postcode
0,Scarborough,"Rouge, Malvern",M1B
1,Scarborough,"Port Union, Rouge Hill, Highland Creek",M1C
2,Scarborough,"Guildwood, West Hill, Morningside",M1E
3,Scarborough,Woburn,M1G
4,Scarborough,Cedarbrae,M1H


In [33]:
new_data.shape

(103, 3)

# Part 2

In [35]:
# import library for Geocoding
! pip install geopy
! pip install geopandas
import geopy
import geopandas

Collecting geopy
  Downloading https://files.pythonhosted.org/packages/53/fc/3d1b47e8e82ea12c25203929efb1b964918a77067a874b2c7631e2ec35ec/geopy-1.21.0-py2.py3-none-any.whl (104kB)
Collecting geographiclib<2,>=1.49 (from geopy)
  Downloading https://files.pythonhosted.org/packages/8b/62/26ec95a98ba64299163199e95ad1b0e34ad3f4e176e221c40245f211e425/geographiclib-1.50-py3-none-any.whl
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-1.50 geopy-1.21.0


In [37]:
# note: not requirement of assignment

geo_lat = {}
geo_lon = {}

geo = geopy.Nominatim(user_agent="Detector", timeout=20)

for x in np.unique(new_data.Borough):
    
    loc = geo.geocode("{}, Toronto, Ontario".format(x))
    
    geo_lat[x] = loc.latitude
    geo_lon[x] = loc.longitude

In [38]:
# Checking latitudes and longitudes
geo_lat

{'Central Toronto': 43.6449033,
 'Downtown Toronto': 43.6563221,
 'East Toronto': 43.626243,
 'East York': 43.699971000000005,
 'Etobicoke': 43.671459150000004,
 'Mississauga': 43.6668555,
 'North York': 43.7543263,
 'Scarborough': 43.773077,
 'West Toronto': 43.6449033,
 'York': 43.67910515}

In [39]:
# Checking latitudes and longitudes
geo_lon

{'Central Toronto': -79.3818364,
 'Downtown Toronto': -79.3809161,
 'East Toronto': -79.396962,
 'East York': -79.33251996261595,
 'Etobicoke': -79.55249206611668,
 'Mississauga': -79.5879563,
 'North York': -79.44911696639593,
 'Scarborough': -79.257774,
 'West Toronto': -79.3818364,
 'York': -79.49118414007154}

In [43]:
# instruction given by assignment: to extract latitude & longitutde based on "Postcode"
# API failed to search for geo-location for some postcodes.
# will use csv file provided in coursera link instead

geo = geopy.Nominatim(user_agent="Detector", timeout=50)

for x, y in zip(new_data.Postcode[0:5], new_data.Borough[0:5]):
    
    loc = geo.geocode("{}, {}, Toronto, Canada".format(x, y))
    print("{}, {}: latitude {}, longitude {}".format(x, y, loc.latitude, loc.longitude))

M1B, Scarborough: latitude 43.773077, longitude -79.257774
M1C, Scarborough: latitude 43.773077, longitude -79.257774


AttributeError: 'NoneType' object has no attribute 'latitude'

In [44]:
# Link provided by coursera link
geo_data = pd.read_csv("http://cocl.us/Geospatial_data")
geo_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [46]:


geo_df = geo_data.loc[geo_data["Postal Code"].isin(new_data.Postcode.values), :]
print(geo_df.shape)

geo_table = new_data.merge(geo_df, left_on="Postcode", right_on="Postal Code")
geo_table.head()


(103, 3)


Unnamed: 0,Borough,Neighbourhood,Postcode,Postal Code,Latitude,Longitude
0,Scarborough,"Rouge, Malvern",M1B,M1B,43.806686,-79.194353
1,Scarborough,"Port Union, Rouge Hill, Highland Creek",M1C,M1C,43.784535,-79.160497
2,Scarborough,"Guildwood, West Hill, Morningside",M1E,M1E,43.763573,-79.188711
3,Scarborough,Woburn,M1G,M1G,43.770992,-79.216917
4,Scarborough,Cedarbrae,M1H,M1H,43.773136,-79.239476


In [51]:
geo_table = geo_table[["Postcode", "Borough", "Neighbourhood", "Latitude", "Longitude"]]

In [52]:
geo_table

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Port Union, Rouge Hill, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, West Hill, Morningside",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Scarborough Village West, Cliffside, Cliffcrest",43.716316,-79.239476
9,M1N,Scarborough,"Cliffside West, Birch Cliff",43.692657,-79.264848
