### Install Packages

In [73]:
import sys

# !{sys.executable} -m pip install tabulate
# !{sys.executable} -m pip install requests
# !{sys.executable} -m pip install beautifulsoup4
# !{sys.executable} -m pip install geocoder
# !{sys.executable} -m pip install folium
# !{sys.executable} -m pip install geopy

### Import Library

In [74]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from tabulate import tabulate
import matplotlib.pyplot as plt
import warnings
import geocoder
import folium
from geopy.geocoders import Nominatim 

plt.rcParams["figure.figsize"] = [15,8]
warnings.filterwarnings("ignore")
%matplotlib inline

### Parse and Clean Data

In [75]:
res = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0] 
df = pd.read_html(str(table))[0]

# drop Borough == Not assigned
df = df[df['Borough'] != 'Not assigned']

# fill Neighbourhood == Not assigned with Borough
df['Neighbourhood'] = df.apply(lambda row: row['Borough'] if row['Neighbourhood'] == 'Not assigned' 
                                                          else row['Neighbourhood'], axis = 1)

# aggregate Neighbourhood with same Borough
df = df.drop_duplicates().groupby(['Postcode','Borough'])['Neighbourhood'].agg(', '.join).reset_index()

In [76]:
print (df.head())
print (df.shape)

  Postcode      Borough                           Neighbourhood
0      M1B  Scarborough                          Rouge, Malvern
1      M1C  Scarborough  Highland Creek, Rouge Hill, Port Union
2      M1E  Scarborough       Guildwood, Morningside, West Hill
3      M1G  Scarborough                                  Woburn
4      M1H  Scarborough                               Cedarbrae
(103, 3)


### Retrieve Geo Data and Merge with df

In [77]:
df_geo = pd.read_csv('http://cocl.us/Geospatial_data')

In [78]:
df_clean = df.merge(df_geo, left_on = ['Postcode'], right_on = ['Postal Code'], how = 'left').drop(['Postal Code'], axis=1)

In [79]:
df_clean.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### Visualization

In [80]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [81]:
df_Toronto = df_clean[df_clean['Borough'].str.contains('Toronto')==True]
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_Toronto['Latitude'], 
                                           df_Toronto['Longitude'], 
                                           df_Toronto['Borough'], 
                                           df_Toronto['Neighbourhood']):
    
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork