## Import all libraries and install things needed

In [1]:
# install BeautifulSoup and lxml parser
! pip install beautifulsoup4
! pip install lxml
from bs4 import BeautifulSoup as soup

! pip install geopy

import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import warnings
warnings.filterwarnings('ignore')
import folium # map rendering library
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
import requests # library to handle requests
import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# webscraping imports
import requests
import urllib.request

print('\nLibraries imported.\n')


Libraries imported.



# Part 1

In [2]:
# fetch web page
from urllib.request import urlopen
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
html = urllib.request.urlopen(url).read() 

# web scrapper section
bs = soup(html, 'html.parser')
# identify html data and...
# convert into dataframe with pandas method
bs = bs.find_all(class_="wikitable sortable")
postal_codes = pd.read_html(str(bs))[0]  #convert scraped web data to dataframe

codes = postal_codes.rename(columns= {'Postal Code':'PostalCode', 'Neighbourhood':'Neighborhood'})

In [3]:
print("\nThe data frame's shape is:", codes.shape, '\n')
codes.head(15)


The data frame's shape is: (180, 3) 



Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
7,M8A,Not assigned,Not assigned
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"


In [4]:
# remove unassigned postal codes
codes2 = codes[codes.Borough != 'Not assigned']

print("\nThe data frame's shape is:", codes2.shape, '\n')
codes2.head(15)


The data frame's shape is: (103, 3) 



Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills
12,M4B,East York,"Parkview Hill, Woodbine Gardens"
13,M5B,Downtown Toronto,"Garden District, Ryerson"


In [5]:
codes2.to_csv(r'codes.csv', index = False)

# Part 2

In [6]:
# Read class provided .csv file
geo_data = pd.read_csv(r'Geospatial_Coordinates.csv')
geo_data = geo_data.rename(columns= {'Postal Code':'PostalCode'})
geo_data.head(5)

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [7]:
toronto_data = pd.merge(codes2, geo_data[['PostalCode','Latitude','Longitude']], on='PostalCode', how='left')
toronto_data.head(15)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [8]:
toronto_data.to_csv(r'toronto.csv')

# Part 3

In [9]:
print('\n')
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(toronto_data['Borough'].unique()),
        toronto_data.shape[0]
    )
)
print('\n')



The dataframe has 10 boroughs and 103 neighborhoods.




In [10]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('\n')
print('The geograpical coordinate of Toronto, CA are {}, {}.'.format(latitude, longitude))
print('\n')



The geograpical coordinate of Toronto, CA are 43.6534817, -79.3839347.




In [11]:
# create map of Toronto, CA using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for Latitude, Longitude, Borough, Neighborhood in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighborhood']):
    label = '{}, {}'.format(Neighborhood, Borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [Latitude, Longitude],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [12]:
toronto_city = toronto_data[toronto_data['Borough'].str.contains("Toronto")]
toronto_city.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [13]:
print('\n')
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(toronto_city['Borough'].unique()),
        toronto_city.shape[0]
    )
)
print('\n')



The dataframe has 4 boroughs and 39 neighborhoods.




In [14]:
# create map of Toronto, CA using latitude and longitude values
map_torontoCity = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for Latitude, Longitude, Borough, Neighborhood in zip(toronto_city['Latitude'],\
toronto_city['Longitude'], toronto_city['Borough'], toronto_city['Neighborhood']):
    label = '{}, {}'.format(Neighborhood, Borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [Latitude, Longitude],
        radius=7,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_torontoCity)  
    
map_torontoCity