In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
from geopy.geocoders import Nominatim
import folium

with open('List of postal codes of Canada_ M - Wikipedia.htm') as html_file:
    soup = BeautifulSoup(html_file, 'lxml')

table = soup.find_all('table')[0] 
df = pd.read_html(str(table))
borough_df = df[0]
borough_df.describe()

Unnamed: 0,Postcode,Borough,Neighbourhood
count,289,289,289
unique,180,12,210
top,M9V,Not assigned,Not assigned
freq,8,77,78


<b><i>Get names of indexes for which Borough has value 'Not assigned'</i></b>

In [2]:
indexNames = borough_df[ borough_df['Borough'] == 'Not assigned' ].index

print (indexNames)

Int64Index([  0,   1,   9,  13,  20,  21,  30,  36,  37,  45,  46,  50,  51,
             52,  54,  55,  59,  60,  61,  73,  74,  75,  88,  89,  90, 104,
            105, 106, 120, 121, 136, 137, 148, 149, 155, 161, 162, 167, 175,
            181, 182, 188, 189, 190, 194, 195, 201, 202, 203, 204, 209, 210,
            223, 224, 238, 239, 242, 243, 248, 249, 254, 255, 259, 260, 261,
            262, 264, 265, 275, 276, 277, 278, 279, 280, 281, 282, 288],
           dtype='int64')


<b><i>Delete these row indexes from dataFrame</i></b>

In [3]:
borough_df.drop(indexNames , inplace=True)
borough_df.describe()

Unnamed: 0,Postcode,Borough,Neighbourhood
count,212,212,212
unique,103,11,210
top,M8Y,Etobicoke,Runnymede
freq,8,45,2


<b><i>Replace not assigned Neighborhood with Borough column value</i></b>

In [4]:
borough_df.Neighbourhood.replace('Not assigned',borough_df.Borough,inplace=True)

<b><i>Group and sort by postcode adding commas to Neighbourhood</i></b>

In [5]:
pd_series = borough_df.groupby(["Postcode","Borough"])['Neighbourhood'].apply(lambda comma: ','.join(comma))
borough_df = pd_series.to_frame().reset_index().sort_values(by='Postcode')

<b><i>Merge borough and coordinate dataframes, drop extra columns</i></b>

In [6]:
geo_df = pd.read_csv("Geospatial_Coordinates.csv")
toronto_data = pd.merge(borough_df, geo_df,left_on='Postcode',right_on='Postal Code',how='inner')
del toronto_data['Postal Code']
toronto_data.columns = ['PostalCode','Borough','Neighborhood','Latitude','Longitude']
toronto_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


<b><i>Get Toronto latitude and longitude</i></b>

In [8]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


<b><i>Create and show folium map, highlighting with red neighbourhoods containing 'Toronto'</i></b>

In [14]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighborhood']):
    if 'Toronto' in label:
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color='red',
            fill=True,
            fill_color='red',
            fill_opacity=0.7,
            parse_html=False).add_to(map_toronto)  
    else:
        label = folium.Popup(label, parse_html=True)        
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.7,
            parse_html=False).add_to(map_toronto)         
    
map_toronto