<h1>Lat Long Segmenting and Clustering Neighborhoods in Toronto</h1>

In [44]:
import requests as req
import pandas as pd
import geocoder
import folium
from geopy.geocoders import Nominatim

We will be using the <strong>BeautifulSoup</strong> package to scrape the postal code Wikipedia page.

In [2]:
from bs4 import BeautifulSoup
wiki_url = req.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(wiki_url, 'lxml')

Extracting data from the webpage into a <strong>pandas DataFrame</strong>.

In [3]:
columns = ['Postalcode', 'Borough', 'Neighborhood']
postalDf = pd.DataFrame(columns = columns)
postalTable = soup.find("table",{"class":"wikitable sortable"})

for row in postalTable.findAll('tr')[1:]:
    cells = row.findAll('td')
    postalcode = str(cells[0].find(text = True)).strip()
    borough = str(cells[1].find(text = True)).strip()
    neighborhood = str(cells[2].find(text = True)).strip()
    if(borough != 'Not assigned'):
        if(neighborhood != 'Not assigned'):
            data = {'Postalcode':postalcode, 'Borough':borough, 'Neighborhood':neighborhood}
        else:
            data = {'Postalcode':postalcode, 'Borough':borough, 'Neighborhood':borough}
        postalDf = postalDf.append(data, ignore_index = True)

In [4]:
postalDf.shape

(103, 3)

In [5]:
coordinates = pd.read_csv("Geospatial_Coordinates.csv", index_col = 'Postal Code')
coordinates.head()

Unnamed: 0_level_0,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.806686,-79.194353
M1C,43.784535,-79.160497
M1E,43.763573,-79.188711
M1G,43.770992,-79.216917
M1H,43.773136,-79.239476


In [6]:
postalDf = postalDf.join(other = coordinates.loc['Latitude':], on = 'Postalcode')
postalDf.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [7]:
postalDf.shape

(103, 5)

In [63]:
toronto = Nominatim(user_agent = 'toronto').geocode('Central Toronto, Toronto,  Ottawa')
torontoMap = folium.Map([toronto.latitude, toronto.longitude], zoom_start = 11)
for lat, long, label in zip(postalDf['Latitude'], postalDf['Longitude'], postalDf['Borough']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        location = [lat, long],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(torontoMap)
torontoMap