# Toronto Clustering

## Obtaining and cleaning data

In [60]:
#import libraries
import pandas as pd
import numpy as np

#Read html making "Not Assigned" as NaN
tables = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M', header=0,
                      na_values='Not assigned')

#Defines table expected heading
headings = ['Postal Code', 'Borough', 'Neighborhood']

#loop through read tables to identify target
for table in tables:
    current_headings = table.columns.values[:3]
    if len(current_headings) != len(headings):
        continue
    if all(current_headings == headings):
        break

#Drops NaN Boroughs
df = table.dropna(subset=['Borough'])

#Renames columns do keep a standard
df.columns = ['PostalCode','Borough','Neighborhood']

#Iterate to find empty Neighborhoods and insert the Borough name if found
for index, row in df.iterrows():
    if df.loc[index]['Neighborhood'] == "":
        df.loc[index]['Neighborhood'] = df.loc[index]['Borough']
    
#Group Neighborhoods by Postal Code
df = df.groupby(['PostalCode']).sum()

df.shape

(103, 2)

## Acquiring Coordinates

In [47]:
geospat = pd.read_csv('https://cocl.us/Geospatial_data')
geospat.columns = ['PostalCode','Latitude','Longitude']
geospat.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [61]:
#Merging geospatial data with neighborhoods
df = pd.merge(df,geospat, on='PostalCode')
df.head(11)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848
