Import required Library

In [5]:
from bs4 import BeautifulSoup
import requests


Return HTML of the Wikipage, create Beautiful Soup object, parse the HTML file

In [6]:
page = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

In [7]:
soup = BeautifulSoup(page, 'lxml')

1 - Find class wikitable in the parsed script

2 - Find all the tr tags

3 - Extract relevant values within each tr/td tag, append into lists

4 - Create a dataframe based on lists created

5 - Exclude rows where Borough = 'Not assigned'

6 - Set Neighbourhood = Borough where Neighbourhood is not assigned

7 - Combine rows with the same postcode & borough, then combine Neighbourhoods

In [14]:
## 1 - Find class wikitable in the parsed script
toronto_table = soup.find('table',{'class':'wikitable sortable'})

## 2 - Find all the tr tags
postcode = toronto_table.find_all('tr')

## 3 - Extract relevant values within each tr/td tag, append into lists
list_postcode = []
list_borough = []
list_neighbour = []

for tr in range(1,len(postcode[2:])):
    second_set = postcode[tr].find_all('td')
    list_postcode.append(second_set[0].text.strip('\n'))
    list_borough.append(second_set[1].text.strip('\n'))
    list_neighbour.append(second_set[2].text.strip('\n'))

## 4 - Create a dataframe based on lists created
df_toronto = pd.DataFrame({'Postcode': list_postcode, 'Borough': list_borough, 'Neighbourhood': list_neighbour})

## 5 - Exclude rows where Borough = 'Not assigned'
df_toronto = df_toronto[(df_toronto['Borough'] != 'Not assigned')].reset_index(drop=True)

## 6 - Set Neighbourhood = Borough where Neighbourhood is not assigned
df_toronto['Neighbourhood'] = df_toronto.apply(lambda df_toronto: df_toronto['Borough'] if df_toronto['Neighbourhood'] == 'Not assigned' else df_toronto['Neighbourhood'], axis=1)

## 7 - Combine rows with the same postcode & borough, then combine Neighbourhoods
join = lambda a: ", ".join(a) 
df_toronto_merge = df_toronto.groupby(by=['Postcode','Borough']).agg({'Neighbourhood':join}).sort_values(by='Postcode').reset_index()

df_toronto_merge.shape

                          

(103, 3)

Combine coordinate data with the dataframe, filtered to include only boroughs with "Toronto". create a new dataframe

In [26]:
import pandas as pd

df_cord = pd.read_csv('/Users/Chai/Desktop/projects/Geospatial_Coordinates.csv')

df_cord.rename(columns={'Postal Code':'Postcode'}, inplace=True)

df_new = pd.merge(df_toronto_merge, df_cord, on='Postcode', how='left')

df_new_toronto_boroughs = df_new[df_new['Borough'].str.contains('Toronto')]

df_new_toronto_boroughs.head()


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


Get Toronto Longitude and Latitude

In [24]:
import folium

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

address = 'Toronto, Ontario, Canada'

geolocator = Nominatim(user_agent="toronto_map")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print('The geograpical coordinate of Toronto, Canada are {}, {}.'.format(latitude, longitude))

Collecting package metadata (repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /Users/Chai/anaconda3

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    conda-4.7.12               |           py36_0         3.0 MB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.1 MB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.50-py_0
  geopy              conda-forge/noarch::geopy-1.20.0-py_0

The following packages will be UPDATED:

  conda                                       4.7.11-py36_0 --> 4.7.12-py36_0



Downloading an

Show Toronto Map and mark the boroughs

In [28]:
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(df_new_toronto_boroughs['Latitude'], df_new_toronto_boroughs['Longitude']
                                           , df_new_toronto_boroughs['Borough'], df_new_toronto_boroughs['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

In [32]:
toronto_table

<table class="wikitable sortable">
<tbody><tr>
<th>Postcode</th>
<th>Borough</th>
<th>Neighbourhood
</th></tr>
<tr>
<td>M1A</td>
<td>Not assigned</td>
<td>Not assigned
</td></tr>
<tr>
<td>M2A</td>
<td>Not assigned</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Parkwoods" title="Parkwoods">Parkwoods</a>
</td></tr>
<tr>
<td>M4A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Victoria_Village" title="Victoria Village">Victoria Village</a>
</td></tr>
<tr>
<td>M5A</td>
<td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
<td><a href="/wiki/Harbourfront_(Toronto)" title="Harbourfront (Toronto)">Harbourfront</a>
</td></tr>
<tr>
<td>M5A</td>
<td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
<td><a href="/wiki/Regent_Park" title="Regent Park">Regent Park</a>
</td></tr>
<tr>
<td>M6A</td>