In [1]:
from bs4 import BeautifulSoup
from tabulate import tabulate
import pandas as pd
import urllib.request
import requests

Use the Notebook to build the code to scrape the following Wikipedia page, https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M, in order to obtain the data that is in the table of postal codes and to transform the data into a pandas dataframe

In [2]:
url = r'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

filename = requests.get(url)

html_content = filename.text

soup = BeautifulSoup(html_content,'html.parser')

df_table = soup.table

table_rows = df_table.find_all('tr')

res = []

for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td if tr.text.strip()]
    
    if row:
        res.append(row)
        
df_final = pd.DataFrame(res, columns=['Postcode','Borough','Neighbourhood'])

df_final = df_final[df_final.Borough != 'Not assigned']

df_final.loc[df_final['Neighbourhood'] == 'Not assigned', 'Neighbourhood'] = df_final['Borough']

df_final.reset_index(drop=True, inplace=True)

df_final

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights
5,M6A,North York,Lawrence Manor
6,M7A,Queen's Park,Queen's Park
7,M9A,Etobicoke,Islington Avenue
8,M1B,Scarborough,Rouge
9,M1B,Scarborough,Malvern


In [3]:
df_uniques = df_final.drop_duplicates(['Postcode'])

df_duplicates = df_final[df_final.duplicated(['Postcode'], keep='first')]

for index, each_value in df_duplicates.iterrows():
    df_uniques.loc[df_uniques['Postcode'] == each_value['Postcode'], 'Neighbourhood'] = df_uniques['Neighbourhood']+', '+each_value['Neighbourhood']

df_uniques.reset_index(drop=True, inplace=True)

df_uniques

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Rouge, Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson, Garden District"


In [4]:
df_uniques.shape

(103, 3)

Pip install the geocoder library

In [5]:
!pip -q install geocoder

In [6]:
# The code was removed by Watson Studio for sharing.

In [7]:
import geocoder # import geocoder

# lists used for latitude and longitude
list_latitude = []
list_longitude = []

for index, each_value in df_uniques.iterrows():
    # initialize your variable to None
    lat_lng_coords = None

    # loop until you get the coordinates
    while(lat_lng_coords is None):
        postal_code = each_value['Postcode']
        g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng

    latitude = lat_lng_coords[0]
    list_latitude.append(latitude)
    longitude = lat_lng_coords[1]
    list_longitude.append(longitude)

df_uniques['Latitude'] = list_latitude
df_uniques['Longitude'] = list_longitude

df_uniques

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.654260,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


Pip install the folium library

In [8]:
!pip -q install folium

Create a Map from Toronto using the values of latitude & longitude

In [9]:
import folium # map rendering library

# Latitude & Longitude for Toronto
latitude = 43.651070
longitude = -79.347015

# Create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

Using the map above created and the information about the neighborhoods that we have in the df_uniques dataframe, we can show the map with all the labels using all the data

In [10]:
for lat, lng, borough, neighborhood in zip(df_uniques['Latitude'], df_uniques['Longitude'], df_uniques['Borough'], df_uniques['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto