# Segmenting and Clustering Neighborhoods in Toronto

#### Import libraries

In [1]:
import pandas as pd
import numpy as np
#from geopy.geocoders import Nominatim

## Getting and Cleaning Data. Part 1

#### Load data from wikipedia

In [2]:
#read data
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
data = pd.read_html(url)
df = data[0]
df.rename(columns={"Postal Code" : "PostalCode"}, inplace = True)

#### Cleaning data

In [3]:
#Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
df = df[df.Borough != "Not assigned"]

#More than one neighborhood can exist in one postal code area. 
#These two rows will be combined into one row with the neighborhoods separated with a comma
df = df.groupby(["PostalCode","Borough"], sort=False).agg(lambda x: ", ".join(x))
df.reset_index(inplace=True)

#If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
df.Neighbourhood.replace("Not assigned", df.Borough, inplace=True)

df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [4]:
df.shape

(103, 3)

## Getting Cordenates. Part 2

In [6]:
#load file with postal code and coordenates
cord = pd.read_csv("http://cocl.us/Geospatial_data")
cord.rename(columns={"Postal Code" : "PostalCode"}, inplace = True)
cord.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [7]:
#merge to get coordenates for postal code
df = pd.merge(df, cord, on="PostalCode", how="inner")
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
