# Segmenting and Clustering Neighborhoods in Toronto

First, it's needed to import pandas and BeautifulSoup to get the table from wikipedia:

In [267]:
import pandas as pd

import requests

from bs4 import BeautifulSoup

req = requests.get("https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=945633050")

soup = BeautifulSoup(req.content,'lxml')

table = soup.find_all('table')[0]

df = pd.read_html(str(table))

nbh=pd.DataFrame(df[0])

In [268]:
nbh.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


Let's see how many rows and columns exist in this table without processing it's data.

In [269]:
nbh.shape

(287, 3)

Now i need to process the data, removing the rows that contains "Not assigned" in the Borough column and concatenating the neighborhoods from the same borough. 

First, let's remove the "Not Assigned" rows.

In [270]:
nbh.set_index('Borough', inplace=True) 
nbh.drop('Not assigned', axis=0, inplace=True)
nbh.reset_index(inplace=True)
nbh.head()

Unnamed: 0,Borough,Postcode,Neighbourhood
0,North York,M3A,Parkwoods
1,North York,M4A,Victoria Village
2,Downtown Toronto,M5A,Harbourfront
3,North York,M6A,Lawrence Heights
4,North York,M6A,Lawrence Manor


In [271]:
nbh = nbh[['Postcode','Borough','Neighbourhood']]
nbh.sort_values(by='Postcode', inplace=True)
print(nbh.shape)
nbh.head()

(210, 3)


Unnamed: 0,Postcode,Borough,Neighbourhood
7,M1B,Scarborough,Rouge
8,M1B,Scarborough,Malvern
22,M1C,Scarborough,Port Union
21,M1C,Scarborough,Rouge Hill
20,M1C,Scarborough,Highland Creek


Now, let's concatenate the neighborhoods from the same Borough.

In [303]:
nbh2=nbh.groupby(['Postcode','Borough']).apply(lambda x: ','.join(x['Neighbourhood']))
nbh2 = nbh2.reset_index()
nbh2.columns = ['Postcode','Borough','Neighbourhood']
nbh2.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Port Union,Rouge Hill,Highland Creek"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park"
7,M1L,Scarborough,"Golden Mile,Oakridge,Clairlea"
8,M1M,Scarborough,"Cliffcrest,Scarborough Village West,Cliffside"
9,M1N,Scarborough,"Cliffside West,Birch Cliff"


In [304]:
nbh2.shape

(103, 3)

In [281]:
!pip install geocoder
import geocoder



In [298]:
geodata = pd.read_csv('http://cocl.us/Geospatial_data')
geodata.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [299]:
geodata.shape

(103, 3)

In [307]:
nbh2['Latitude']=""
nbh2['Longitude']=""
nbh2.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",,
1,M1C,Scarborough,"Port Union,Rouge Hill,Highland Creek",,
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",,
3,M1G,Scarborough,Woburn,,
4,M1H,Scarborough,Cedarbrae,,


In [311]:
for i in range(102):
    for j in range(102):
        if nbh2.loc[i,'Postcode']==geodata.loc[j,'Postal Code']:
            nbh2.loc[i,'Latitude']=geodata.loc[j, 'Latitude']
            nbh2.loc[i,'Longitude']=geodata.loc[j,'Longitude']

In [312]:
nbh2.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.8067,-79.1944
1,M1C,Scarborough,"Port Union,Rouge Hill,Highland Creek",43.7845,-79.1605
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.7636,-79.1887
3,M1G,Scarborough,Woburn,43.771,-79.2169
4,M1H,Scarborough,Cedarbrae,43.7731,-79.2395


In [314]:
nbh2.head(20)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.8067,-79.1944
1,M1C,Scarborough,"Port Union,Rouge Hill,Highland Creek",43.7845,-79.1605
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.7636,-79.1887
3,M1G,Scarborough,Woburn,43.771,-79.2169
4,M1H,Scarborough,Cedarbrae,43.7731,-79.2395
5,M1J,Scarborough,Scarborough Village,43.7447,-79.2395
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.7279,-79.262
7,M1L,Scarborough,"Golden Mile,Oakridge,Clairlea",43.7111,-79.2846
8,M1M,Scarborough,"Cliffcrest,Scarborough Village West,Cliffside",43.7163,-79.2395
9,M1N,Scarborough,"Cliffside West,Birch Cliff",43.6927,-79.2648
