# Capstone assignment - Toronto neighborhood clustering

In [1]:
from bs4 import BeautifulSoup

In [2]:
import requests

In [3]:
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text

In [4]:
soup = BeautifulSoup(source, 'lxml')

In [5]:
#print(soup.prettify())   

In [6]:
table = soup.find("table",{"class":"wikitable sortable"})

Function to parses a html segment started with tag table followed by multiple 'tr' (table rows) and inner 'td' (table data) tags. It returns a list of rows with inner columns.  Accepts only one 'th' (table header/data) in the first row.
   

In [7]:
def tableDataText(table):    
    
    def rowgetDataText(tr, coltag='td'): # td (data) or th (header)       
        return [td.get_text(strip=True) for td in tr.find_all(coltag)]  
    rows = []
    trs = table.find_all('tr')
    headerow = rowgetDataText(trs[0], 'th')
    if headerow: # if there is a header row include first
        rows.append(headerow)
        trs = trs[1:]
    for tr in trs: # for every table row
        rows.append(rowgetDataText(tr, 'td') ) # data row       
    return rows

In [8]:
list_table = tableDataText(table)

In [9]:
import pandas as pd

dftable = pd.DataFrame(list_table[1:], columns=list_table[0])

Drop the "Not assigned" rows

In [10]:
dftable_notassigned = dftable[dftable['Borough']=='Not assigned']

dftable = dftable.drop(dftable_notassigned.index, axis=0)


In [11]:
dftable.shape

(103, 3)

In [12]:
geodata = pd.read_csv("http://cocl.us/Geospatial_data")


Convent the column name, so that the two data frame have identical column 'Postal code'
Merge the two data frames on 'Postal code'

In [13]:
geodata.rename(columns={'Postal Code':'Postal code'}, inplace=True)
geodata.columns

Index(['Postal code', 'Latitude', 'Longitude'], dtype='object')

In [16]:
neighbourhoods=pd.merge(dftable, geodata, on='Postal code')
neighbourhoods

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Regent Park / Harbourfront,43.654260,-79.360636
3,M6A,North York,Lawrence Manor / Lawrence Heights,43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,Malvern / Rouge,43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,Parkview Hill / Woodbine Gardens,43.706397,-79.309937
9,M5B,Downtown Toronto,Garden District / Ryerson,43.657162,-79.378937


In [20]:
# Matplotlib and associated plotting modules
#import matplotlib.cm as cm
#import matplotlib.colors as colors

# import k-means from clustering stage
#from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0  
!pip install folium
import folium # map rendering library

print('Libraries imported.')

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/fd/a0/ccb3094026649cda4acd55bf2c3822bb8c277eb11446d13d384e5be35257/folium-0.10.1-py2.py3-none-any.whl (91kB)
[K     |████████████████████████████████| 92kB 5.6MB/s eta 0:00:011
Collecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/81/6d/31c83485189a2521a75b4130f1fee5364f772a0375f81afff619004e5237/branca-0.4.0-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.4.0 folium-0.10.1
Libraries imported.


In [21]:
latitude=43.662696
longitude=-79.400049
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(neighbourhoods['Latitude'], neighbourhoods['Longitude'], neighbourhoods['Borough'], neighbourhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto) 
map_toronto