In [2]:
import folium

In [3]:
#importing necessary packages
import requests 
from bs4 import BeautifulSoup

#Define URL - text format
url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

# Using beautisoup to fetch the data 
soup = BeautifulSoup(url, 'lxml')

In [4]:
#explore fetched data
soup

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
<head>
<meta charset="utf-8"/>
<title>List of postal codes of Canada: M - Wikipedia</title>
<script>document.documentElement.className = document.documentElement.className.replace( /(^|\s)client-nojs(\s|$)/, "$1client-js$2" );</script>
<script>(window.RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":867606113,"wgRevisionId":867606113,"wgArticleId":539066,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Communications in Ontario","Postal codes in Canada","Toronto","Ontario-related lists"],"wgBreakFrames":false,"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","w

In [5]:
#find headers in fetched data
Header = soup.find('table', class_ = 'wikitable sortable').tr.text
#print header
print(Header)


Postcode
Borough
Neighbourhood



In [6]:
#extract data from soup
allRow = soup.find('table', class_ = 'wikitable sortable').find_all('tr')

In [7]:
#create empty shell
tmp_post = []
tmp_borough = []
tmp_neighbourhood=[]

In [8]:
#iterate over fetched data
for row in allRow:
    txt = row.text
    txt = txt.strip()
    a,b,c = txt.split("\n")
    tmp_post.append(a)
    tmp_borough.append(b)
    tmp_neighbourhood.append(c)

In [9]:
#import packages
import pandas as pd

#merge all fetched data into a single data frame 
df_merge = pd.DataFrame({'Postcode':tmp_post,
                        'Borough':tmp_borough,
                        'Neighbourhood':tmp_neighbourhood})

In [10]:
df_merge.head(5)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,Postcode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village


In [11]:
df_merge = df_merge.iloc[1:]
print(df_merge.head(5))
print(df_merge.shape)

  Postcode           Borough     Neighbourhood
1      M1A      Not assigned      Not assigned
2      M2A      Not assigned      Not assigned
3      M3A        North York         Parkwoods
4      M4A        North York  Victoria Village
5      M5A  Downtown Toronto      Harbourfront
(289, 3)


In [12]:
#removing 'Not Assigned' values from Borough column
df_merge_1 = df_merge[df_merge['Borough']!='Not assigned']

print(df_merge_1.head(5))
print(df_merge_1.shape)

  Postcode           Borough     Neighbourhood
3      M3A        North York         Parkwoods
4      M4A        North York  Victoria Village
5      M5A  Downtown Toronto      Harbourfront
6      M5A  Downtown Toronto       Regent Park
7      M6A        North York  Lawrence Heights
(212, 3)


In [13]:
#creating another data frame
df_merge_2 = df_merge_1.groupby(['Postcode','Borough'])['Neighbourhood'].apply(lambda x: ','.join(x)).reset_index()

In [14]:
print(df_merge_2.head(5))
print(df_merge_2.shape)

  Postcode      Borough                         Neighbourhood
0      M1B  Scarborough                         Rouge,Malvern
1      M1C  Scarborough  Highland Creek,Rouge Hill,Port Union
2      M1E  Scarborough       Guildwood,Morningside,West Hill
3      M1G  Scarborough                                Woburn
4      M1H  Scarborough                             Cedarbrae
(103, 3)


In [15]:
#importing package
import numpy as np


#reshaping dataframe
df_merge_2['Neighbourhood'] = np.where(df_merge_2['Neighbourhood'] == 'Not assigned', 
                                                  df_merge_2['Borough'],
                                                  df_merge_2['Neighbourhood'])


df_merged = df_merge_2.copy()
print('Final Shape of the dataset: ', df_merged.shape)

Final Shape of the dataset:  (103, 3)


In [16]:
#print the first 11 rows of the Final working data frame
df_merged.head(12)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park"
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge"
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff,Cliffside West"


In [17]:
df_merged['Borough'].value_counts()

North York          24
Downtown Toronto    18
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
York                 5
East York            5
East Toronto         5
Mississauga          1
Queen's Park         1
Name: Borough, dtype: int64

In [18]:
df_merged.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 103 entries, 0 to 102
Data columns (total 3 columns):
Postcode         103 non-null object
Borough          103 non-null object
Neighbourhood    103 non-null object
dtypes: object(3)
memory usage: 2.5+ KB


In [19]:
df_merged.sort_values('Neighbourhood')

Unnamed: 0,Postcode,Borough,Neighbourhood
58,M5H,Downtown Toronto,"Adelaide,King,Richmond"
12,M1S,Scarborough,Agincourt
14,M1V,Scarborough,"Agincourt North,L'Amoreaux East,Milliken,Steel..."
101,M9V,Etobicoke,"Albion Gardens,Beaumond Heights,Humbergate,Jam..."
89,M8W,Etobicoke,"Alderwood,Long Branch"
28,M3H,North York,"Bathurst Manor,Downsview North,Wilson Heights"
19,M2K,North York,Bayview Village
62,M5M,North York,"Bedford Park,Lawrence Manor East"
56,M5E,Downtown Toronto,Berczy Park
9,M1N,Scarborough,"Birch Cliff,Cliffside West"


## Adding Latitude & Longitude from Geo Spatial data

In [20]:
#importing csv from my computer
path = 'C:\\Users\\drrak\\OneDrive\Desktop\\Geospatial_Coordinates.csv'
df = pd.read_csv(path)
df.shape

(103, 3)

In [21]:
df.head(5)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [22]:
#merge two data sets
df_combined = pd.concat([df_merged,df], axis=1)
df_combined.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",M1B,43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",M1C,43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",M1E,43.763573,-79.188711
3,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476


In [23]:
#dropping off the extra column
df_combined = df_combined.drop(['Postcode'], axis =1)

In [24]:
df_combined.head(5)

Unnamed: 0,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,Scarborough,"Rouge,Malvern",M1B,43.806686,-79.194353
1,Scarborough,"Highland Creek,Rouge Hill,Port Union",M1C,43.784535,-79.160497
2,Scarborough,"Guildwood,Morningside,West Hill",M1E,43.763573,-79.188711
3,Scarborough,Woburn,M1G,43.770992,-79.216917
4,Scarborough,Cedarbrae,M1H,43.773136,-79.239476


In [25]:
#just printing name of the columns
df_combined.columns

Index(['Borough', 'Neighbourhood', 'Postal Code', 'Latitude', 'Longitude'], dtype='object')

In [26]:
#rearranging column names to match the assignment screenshot
df_combined = df_combined[['Postal Code', 'Borough', 'Neighbourhood', 'Latitude', 'Longitude']]

In [27]:
#Aha Moment! 
df_combined.head(5)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### Exploring maps with 'West Toronto' from Borough column

In [28]:
#Exploring maps with 'West Toronto' from Borough column
df_combined['Borough'].value_counts()

North York          24
Downtown Toronto    18
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
York                 5
East York            5
East Toronto         5
Mississauga          1
Queen's Park         1
Name: Borough, dtype: int64

In [31]:
#importing necessary packages
import folium
from geopy.geocoders import Nominatim

In [32]:
#Converting West Toronto Canada location into Latitude and Longitude
address = 'West Toronto, Canada'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of West Toronto, Canada are {}, {}.'.format(latitude, longitude))



The geograpical coordinate of West Toronto, Canada are 43.653963, -79.387207.


In [35]:
# create map of West Toronto using latitude and longitude values
map_w_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
map_w_toronto

In [39]:
# add markers to map
for lat, lng, label in zip(df_combined['Latitude'], df_combined['Longitude'], df_combined['Neighbourhood']):
    label = folium.Popup(label)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_w_toronto)  
    
map_w_toronto