# Import libraries

In [9]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

from geopy.geocoders import Nominatim

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

import folium 

# Scrape the wikipedia page

In [10]:
wiki_page = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(wiki_page,'html.parser')

# Find the table on wikipedia page

In [11]:
table = soup.find('table')
colvalues = table.find_all('td')

length = len(colvalues)

PostalCode = []
Borough = []
Neighborhood = []

for i in range(0, length, 3):
    PostalCode.append(colvalues[i].text.strip())
    Borough.append(colvalues[i+1].text.strip())
    Neighborhood.append(colvalues[i+2].text.strip())

# Build dataframe from table

In [12]:
df_postalcodes = pd.DataFrame(data=[PostalCode, Borough, Neighborhood]).transpose()
df_postalcodes.columns = ['PostalCode', 'Borough', 'Neighborhood']

# Cleanse the data in dataframe by reassigning a "Not assigned" neighborhood to the corresponding borough

In [13]:
df_postalcodes.drop(df_postalcodes[df_postalcodes['Borough'] == 'Not assigned'].index, inplace=True)
df_postalcodes.loc[df_postalcodes.Neighborhood == 'Not assigned', "Neighborhood"] = df_postalcodes.Borough

# Group the data by Postal Code and Borough

In [14]:
df_postalcodes_grouped = df_postalcodes.groupby(['PostalCode', 'Borough'])['Neighborhood'].apply(', '.join).reset_index()
df_postalcodes_grouped.columns = ['PostalCode', 'Borough', 'Neighborhood']

# Read in geospatial data from csv file and join dataframes based on "PostalCodes" column; this will create a single dataframe providing the geographical coordinates of each postal code

In [15]:
df_lat_long = pd.read_csv('http://cocl.us/Geospatial_data')
df_lat_long.columns = ['PostalCode', 'Latitude', 'Longitude']

In [16]:
df_lat_long_join = pd.merge(df_postalcodes_grouped, df_lat_long, on=['PostalCode'], how='inner')


In [17]:
df_lat_long_join

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


# Make a copy of the dataframe above and use it for clustering the different neighborhoods in Toronto

In [23]:
neighborhoods = df_lat_long_join[['Borough', 'Neighborhood', 'Latitude', 'Longitude']].copy()

# Use geopy library to get the latitude and longitude values of Toronto

In [24]:
address = 'Toronto, Canada'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

  This is separate from the ipykernel package so we can avoid doing imports until


The geograpical coordinate of Toronto are 43.653963, -79.387207.


# Create map of Toronto using latitude and longitude values

In [25]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto