### Segmenting and Clustering Neighborhoods in Toronto with Coordinates

In [2]:
import numpy as np 
import pandas as pd 
import json 
import requests 
from pandas.io.json import json_normalize 
import matplotlib.cm as cm
import matplotlib.colors as colors 
from sklearn.cluster import KMeans 

#!conda install -c conda-forge beautifulsoup4 --yes
from bs4 import BeautifulSoup 

#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim  

import folium 

print("Libraries downloaded and imported.")

Libraries downloaded and imported.


##### Dataset

In [3]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
data = requests.get(url).text
# data

In [4]:
soup = BeautifulSoup(data, 'html.parser')

In [5]:
soup

<!DOCTYPE html>

<html class="client-nojs" dir="ltr" lang="en">
<head>
<meta charset="utf-8"/>
<title>List of postal codes of Canada: M - Wikipedia</title>
<script>document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"X@HFwQpAAEEAAAvkm@0AAABY","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":995657573,"wgRevisionId":995657573,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Short description is different from Wikidata","Communications in Ontario","Postal codes

In [6]:
postalcode = []
borough = []
neighborhood = []

In [7]:
# find all the rows of the table
soup.find('table').find_all('tr')

# for each row of the table, find all the table data
for row in soup.find('table').find_all('tr'):
    cells = row.find_all('td')

In [8]:
for row in soup.find('table').find_all('tr'):
    cells = row.find_all('td')
    if(len(cells) > 0):
        postalcode.append(cells[0].text.rstrip('\n'))
        borough.append(cells[1].text.rstrip('\n'))
        neighborhood.append(cells[2].text.rstrip('\n'))

##### Transformig into Dataframe

In [9]:
toronto_df = pd.DataFrame({"PostalCode": postalcode,
                           "Borough": borough,
                           "Neighborhood": neighborhood})

toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


##### Removing 'Not assogned values'

In [10]:
toronto_df_drop = toronto_df[toronto_df.Borough != "Not assigned"].reset_index(drop=True)
toronto_df_drop.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


##### Combinig the same Postalcodes into one Row

In [11]:
toronto_df_grouped = toronto_df_drop.groupby(["PostalCode", "Borough"], as_index=False).agg(lambda x: ", ".join(x))
toronto_df_grouped.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


##### Removing 'Not assogned values'

In [12]:
for index, row in toronto_df_grouped.iterrows():
    if row["Neighborhood"] == "Not assigned":
        row["Neighborhood"] = row["Borough"]
        
toronto_df_grouped.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


##### Making the last Arrangmements

In [13]:
column_names = ["PostalCode", "Borough", "Neighborhood"]
test_df = pd.DataFrame(columns=column_names)

test_list = ["M5G", "M2H", "M4B", "M1J", "M4G", "M4M", "M1R", "M9V", "M9L", "M5V", "M1B", "M5A"]

for postcode in test_list:
    test_df = test_df.append(toronto_df_grouped[toronto_df_grouped["PostalCode"]==postcode], ignore_index=True)
    
test_df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M5G,Downtown Toronto,Central Bay Street
1,M2H,North York,Hillcrest Village
2,M4B,East York,"Parkview Hill, Woodbine Gardens"
3,M1J,Scarborough,Scarborough Village
4,M4G,East York,Leaside
5,M4M,East Toronto,Studio District
6,M1R,Scarborough,"Wexford, Maryvale"
7,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."
8,M9L,North York,Humber Summit
9,M5V,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Har..."


##### Shape of the Dataframe

In [14]:
test_df.shape

(12, 3)

#### Dowloading Coordinates using 'pd.read_csv'

In [15]:
coordinates = pd.read_csv('https://cocl.us/Geospatial_data')
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### Correcting the Column Names

In [16]:
coordinates.rename(columns={"Postal Code": "PostalCode"}, inplace=True)
coordinates.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### Adding Coordinate Information into our DataFrame

In [17]:
toronto_df_new = toronto_df_grouped.merge(coordinates, on="PostalCode", how="left")
toronto_df_new.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


#### Shape of the DataFrame

In [18]:
toronto_df_new.shape

(103, 5)

#### Check

In [19]:
column_names = ["PostalCode", "Borough", "Neighborhood", "Latitude", "Longitude"]
test_df = pd.DataFrame(columns=column_names)

test_list = ["M5G", "M2H", "M4B", "M1J", "M4G", "M4M", "M1R", "M9V", "M9L", "M5V", "M1B", "M5A"]

for postcode in test_list:
    test_df = test_df.append(toronto_df_new[toronto_df_new["PostalCode"]==postcode], ignore_index=True)
    
test_df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
1,M2H,North York,Hillcrest Village,43.803762,-79.363452
2,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
3,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
4,M4G,East York,Leaside,43.70906,-79.363452
5,M4M,East Toronto,Studio District,43.659526,-79.340923
6,M1R,Scarborough,"Wexford, Maryvale",43.750072,-79.295849
7,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437
8,M9L,North York,Humber Summit,43.756303,-79.565963
9,M5V,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442


#### Create a map and markers

In [23]:
address = 'Toronto'

geolocator = Nominatim(user_agent="myapplication")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [24]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_df_new['Latitude'], toronto_df_new['Longitude'], toronto_df_new['Borough'], toronto_df_new['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto

#### See the unique values

In [25]:
toronto_df_new.groupby('Borough').count()

Unnamed: 0_level_0,PostalCode,Neighborhood,Latitude,Longitude
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Central Toronto,9,9,9,9
Downtown Toronto,19,19,19,19
East Toronto,5,5,5,5
East York,5,5,5,5
Etobicoke,12,12,12,12
Mississauga,1,1,1,1
North York,24,24,24,24
Scarborough,17,17,17,17
West Toronto,6,6,6,6
York,5,5,5,5


In [26]:
print('There are {} uniques categories.'.format(len(toronto_df_new['Borough'].unique())))

There are 10 uniques categories.


#### Defining Foursquare Credentials and Version

In [40]:
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = ''  # your Foursquare Secret
VERSION = '20180604' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 
CLIENT_SECRET:


In [28]:
radius = 500
LIMIT = 100

venues = []

for lat, long, post, borough, neighborhood in zip(toronto_df_new['Latitude'], toronto_df_new['Longitude'], toronto_df_new['PostalCode'], toronto_df_new['Borough'], 
                                                  toronto_df_new['Neighborhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id=ZYLC4Q3I000O4R32DVJWJJTOTHCGC4O02TXYEPLDAS211SPQ&client_secret=OPKHF1MTRWKRHVR2DAV0IT1IK2H2XZDXJYTCNHVY5L44T55H&v=20180605 \
     &ll=43.653963,-79.387207&radius=500&limit=100".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            post, 
            borough,
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

#### Converting venues list to new dataframe

In [29]:
venues_df = pd.DataFrame(venues)


venues_df.columns = ['PostalCode', 'Borough', 'Neighborhood', 'BoroughLatitude', 'BoroughLongitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(4738, 9)


Unnamed: 0,PostalCode,Borough,Neighborhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,Downtown Toronto,43.653232,-79.385296,Neighborhood
1,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,Japango,43.655268,-79.385165,Sushi Restaurant
2,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,Textile Museum of Canada,43.654396,-79.3865,Art Museum
3,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,Cafe Plenty,43.654571,-79.38945,Café
4,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,Poke Guys,43.654895,-79.385052,Poke Place


#### Analysing each Area

In [30]:
# one hot encoding
toronto_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add postal, borough and neighborhood column back to dataframe
toronto_onehot['PostalCode'] = venues_df['PostalCode'] 
toronto_onehot['Borough'] = venues_df['Borough'] 
toronto_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move postal, borough and neighborhood column to the first column
fixed_columns = list(toronto_onehot.columns[-3:]) + list(toronto_onehot.columns[:-3])
toronto_onehot = toronto_onehot[fixed_columns]

print(toronto_onehot.shape)
toronto_onehot.head()

(4738, 38)


Unnamed: 0,PostalCode,Borough,Neighborhoods,Art Gallery,Art Museum,Arts & Crafts Store,Bar,Breakfast Spot,Bubble Tea Shop,Café,...,Plaza,Poke Place,Pub,Ramen Restaurant,Salon / Barbershop,Seafood Restaurant,Smoke Shop,Sushi Restaurant,University,Vegetarian / Vegan Restaurant
0,M1B,Scarborough,"Malvern, Rouge",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M1B,Scarborough,"Malvern, Rouge",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,M1B,Scarborough,"Malvern, Rouge",0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M1B,Scarborough,"Malvern, Rouge",0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
4,M1B,Scarborough,"Malvern, Rouge",0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0


#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [31]:
toronto_grouped = toronto_onehot.groupby(["PostalCode", "Borough", "Neighborhoods"]).mean().reset_index()
toronto_grouped.head()

Unnamed: 0,PostalCode,Borough,Neighborhoods,Art Gallery,Art Museum,Arts & Crafts Store,Bar,Breakfast Spot,Bubble Tea Shop,Café,...,Plaza,Poke Place,Pub,Ramen Restaurant,Salon / Barbershop,Seafood Restaurant,Smoke Shop,Sushi Restaurant,University,Vegetarian / Vegan Restaurant
0,M1B,Scarborough,"Malvern, Rouge",0.086957,0.021739,0.021739,0.021739,0.021739,0.021739,0.065217,...,0.021739,0.021739,0.021739,0.043478,0.021739,0.021739,0.021739,0.043478,0.021739,0.021739
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",0.086957,0.021739,0.021739,0.021739,0.021739,0.021739,0.065217,...,0.021739,0.021739,0.021739,0.043478,0.021739,0.021739,0.021739,0.043478,0.021739,0.021739
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",0.086957,0.021739,0.021739,0.021739,0.021739,0.021739,0.065217,...,0.021739,0.021739,0.021739,0.043478,0.021739,0.021739,0.021739,0.043478,0.021739,0.021739
3,M1G,Scarborough,Woburn,0.086957,0.021739,0.021739,0.021739,0.021739,0.021739,0.065217,...,0.021739,0.021739,0.021739,0.043478,0.021739,0.021739,0.021739,0.043478,0.021739,0.021739
4,M1H,Scarborough,Cedarbrae,0.086957,0.021739,0.021739,0.021739,0.021739,0.021739,0.065217,...,0.021739,0.021739,0.021739,0.043478,0.021739,0.021739,0.021739,0.043478,0.021739,0.021739


#### Let's confirm the new size

In [32]:
toronto_grouped.shape

(103, 38)

#### Let's put that into a *pandas* dataframe

In [33]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
areaColumns = ['PostalCode', 'Borough', 'Neighborhoods']
freqColumns = []
for ind in np.arange(num_top_venues):
    try:
        freqColumns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        freqColumns.append('{}th Most Common Venue'.format(ind+1))
columns = areaColumns+freqColumns

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['PostalCode'] = toronto_grouped['PostalCode']
neighborhoods_venues_sorted['Borough'] = toronto_grouped['Borough']
neighborhoods_venues_sorted['Neighborhoods'] = toronto_grouped['Neighborhoods']

for ind in np.arange(toronto_grouped.shape[0]):
    row_categories = toronto_grouped.iloc[ind, :].iloc[3:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    neighborhoods_venues_sorted.iloc[ind, 3:] = row_categories_sorted.index.values[0:num_top_venues]

# neighborhoods_venues_sorted.sort_values(freqColumns, inplace=True)
print(neighborhoods_venues_sorted.shape)
neighborhoods_venues_sorted

(103, 13)


Unnamed: 0,PostalCode,Borough,Neighborhoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",Art Gallery,Coffee Shop,Café,Ramen Restaurant,Sushi Restaurant,Hotel,Art Museum,Arts & Crafts Store,Bar,Breakfast Spot
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",Art Gallery,Coffee Shop,Café,Ramen Restaurant,Sushi Restaurant,Hotel,Art Museum,Arts & Crafts Store,Bar,Breakfast Spot
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",Art Gallery,Coffee Shop,Café,Ramen Restaurant,Sushi Restaurant,Hotel,Art Museum,Arts & Crafts Store,Bar,Breakfast Spot
3,M1G,Scarborough,Woburn,Art Gallery,Coffee Shop,Café,Ramen Restaurant,Sushi Restaurant,Hotel,Art Museum,Arts & Crafts Store,Bar,Breakfast Spot
4,M1H,Scarborough,Cedarbrae,Art Gallery,Coffee Shop,Café,Ramen Restaurant,Sushi Restaurant,Hotel,Art Museum,Arts & Crafts Store,Bar,Breakfast Spot
...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,M9N,York,Weston,Art Gallery,Coffee Shop,Café,Ramen Restaurant,Sushi Restaurant,Hotel,Art Museum,Arts & Crafts Store,Bar,Breakfast Spot
99,M9P,Etobicoke,Westmount,Art Gallery,Coffee Shop,Café,Ramen Restaurant,Sushi Restaurant,Hotel,Art Museum,Arts & Crafts Store,Bar,Breakfast Spot
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",Art Gallery,Coffee Shop,Café,Ramen Restaurant,Sushi Restaurant,Hotel,Art Museum,Arts & Crafts Store,Bar,Breakfast Spot
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",Art Gallery,Coffee Shop,Café,Ramen Restaurant,Sushi Restaurant,Hotel,Art Museum,Arts & Crafts Store,Bar,Breakfast Spot


<a id='item4'></a>

### Clustering

In [34]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_df_new[['PostalCode', 'Neighborhood']], prefix="", prefix_sep="")
toronto_df_new_ = toronto_df_new.drop(['PostalCode', 'Neighborhood'], 1)
toronto_df_new2 = pd.concat([toronto_df_new_, toronto_onehot], axis=1)
toronto_df_new2.head()

Unnamed: 0,Borough,Latitude,Longitude,M1B,M1C,M1E,M1G,M1H,M1J,M1K,...,Westmount,Weston,"Wexford, Maryvale","Willowdale, Newtonbrook","Willowdale, Willowdale East","Willowdale, Willowdale West",Woburn,Woodbine Heights,York Mills West,"York Mills, Silver Hills"
0,Scarborough,43.806686,-79.194353,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Scarborough,43.784535,-79.160497,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Scarborough,43.763573,-79.188711,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Scarborough,43.770992,-79.216917,0,0,0,1,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,Scarborough,43.773136,-79.239476,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [35]:
kclusters = 3

toronto_clustering = toronto_df_new2.drop(["Borough"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:50]

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0,
       0, 0, 0, 2, 2, 2, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 2, 0,
       0, 0, 0, 0, 0, 0], dtype=int32)

In [36]:
#create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
toronto_merged = toronto_df_new.copy()

# add clustering labels
toronto_merged["Cluster Labels"] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
# toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.drop(["Borough"], 1))

print(toronto_merged.shape)
toronto_merged.head(50) # check the last columns!

(103, 6)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,2
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,2
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,2
3,M1G,Scarborough,Woburn,43.770992,-79.216917,2
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,2
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,2
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029,2
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577,2
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476,2
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848,2


#### Examine Clusters

#### Cluster 1

In [37]:
df_toronto1 = toronto_merged.loc[toronto_merged['Cluster Labels'] == 0]
df_toronto1

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels
19,M2K,North York,Bayview Village,43.786947,-79.385975,0
20,M2L,North York,"York Mills, Silver Hills",43.75749,-79.374714,0
21,M2M,North York,"Willowdale, Newtonbrook",43.789053,-79.408493,0
22,M2N,North York,"Willowdale, Willowdale East",43.77012,-79.408493,0
23,M2P,North York,York Mills West,43.752758,-79.400049,0
24,M2R,North York,"Willowdale, Willowdale West",43.782736,-79.442259,0
28,M3H,North York,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259,0
38,M4G,East York,Leaside,43.70906,-79.363452,0
39,M4H,East York,Thorncliffe Park,43.705369,-79.349372,0
40,M4J,East York,"East Toronto, Broadview North (Old East York)",43.685347,-79.338106,0


#### Cluster 2

In [38]:
df_toronto2 = toronto_merged.loc[toronto_merged['Cluster Labels'] == 1]
df_toronto2

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels
29,M3J,North York,"Northwood Park, York University",43.76798,-79.487262,1
30,M3K,North York,Downsview,43.737473,-79.464763,1
31,M3L,North York,Downsview,43.739015,-79.506944,1
32,M3M,North York,Downsview,43.728496,-79.495697,1
33,M3N,North York,Downsview,43.761631,-79.520999,1
79,M6L,North York,"North Park, Maple Leaf Park, Upwood Park",43.713756,-79.490074,1
81,M6N,York,"Runnymede, The Junction North",43.673185,-79.487262,1
84,M6S,West Toronto,"Runnymede, Swansea",43.651571,-79.48445,1
86,M7R,Mississauga,Canada Post Gateway Processing Centre,43.636966,-79.615819,1
88,M8V,Etobicoke,"New Toronto, Mimico South, Humber Bay Shores",43.605647,-79.501321,1


#### Cluster 3

In [39]:
df_toronto3 = toronto_merged.loc[toronto_merged['Cluster Labels'] == 2]
df_toronto3

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,2
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,2
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,2
3,M1G,Scarborough,Woburn,43.770992,-79.216917,2
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,2
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,2
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029,2
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577,2
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476,2
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848,2
