# Segmenting and Clustering Neighborhoods in Toronto

In [1]:
#if an error involving lxml returns, uncomment the following line
# !conda update -n base -c defaults conda --yes
#!conda install -c anaconda lxml --yes
import pandas as pd
#html location of table data to scrape
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

#converting to dataframe
df=pd.read_html(url, header=0)[0]

df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


In [2]:
#sets the value of Neighbourhood to equal Borough "Queen's Park" on row 8
df.iat[8, 2] = "Queen's Park"

#drops all rows with a Borough value == "not assigned"
df = df.drop(df[df.Borough == 'Not assigned'].index)

#groups by Postcode and aggregates multiple neighborhoods in a comma delimited format
df1 = df.groupby('Postcode').agg({'Borough':'first', 'Neighbourhood': ', '.join}).reset_index()

#quick check to make sure the code did what I wanted
df1.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [3]:
#returns the number of rows, columns
df1.shape

(103, 3)

In [4]:
#contains postal code, latitude and longitude of each postal code
df2 = pd.read_csv("Geospatial_Coordinates.csv")

#merges df1 and df2 around the column "Postcode"
df3 = pd.merge(df1, df2, on='Postcode', how='outer')
df3.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848
