In [2]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

from bs4 import BeautifulSoup

print('Libraries imported.')

Libraries imported.


### Getting Toronto Data from Wikipedia page

In [3]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
res = requests.get(url)
soup = BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0] 
df = pd.read_html(str(table))
df_toronto = df[0]
#Clensing Data
df_toronto_temp = df_toronto[df_toronto['Borough'] != 'Not assigned']

#More than one neighborhood can exist in one postal code area
df_toronto_temp = df_toronto_temp.groupby(['Postcode', 'Borough'], as_index=False).agg(lambda x: ', '.join(set(x.dropna())))

#cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough
for i in range(df_toronto_temp.shape[0]):
    if df_toronto_temp.iloc[i]['Neighbourhood'] == 'Not assigned':
        df_toronto_temp.iloc[i]['Neighbourhood'] = df_toronto_temp.iloc[i]['Borough']
        
#getting geospatial data for toronto 
df_latlong = pd.read_csv('http://cocl.us/Geospatial_data')
df_latlong.columns = ['Postcode', 'Latitude', 'Longitude'] #Changing the column names to match with initial dataset

#Merging and creating final dataframe to be worked on
df_toronto = pd.merge(df_toronto_temp, df_latlong, on=['Postcode'])
df_toronto.head()

df_toronto = df_toronto[['Borough','Neighbourhood','Latitude','Longitude']] #selecting only the required columns
df_toronto.columns = ['Borough','Neighborhood','Latitude','Longitude'] #setting column names same as newyork dataset
df_toronto['Place'] = 'Toronto'
df_toronto.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Place
0,Scarborough,"Malvern, Rouge",43.806686,-79.194353,Toronto
1,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Toronto
2,Scarborough,"Morningside, Guildwood, West Hill",43.763573,-79.188711,Toronto
3,Scarborough,Woburn,43.770992,-79.216917,Toronto
4,Scarborough,Cedarbrae,43.773136,-79.239476,Toronto


In [9]:
df_toronto['Borough'].count()

103

### Getting preprocessed Newyork Data

In [4]:

!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
print('Data downloaded!')
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)
neighborhoods_data = newyork_data['features']
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)
    
df_newyork = neighborhoods
df_newyork['Place'] = 'Newyork'
df_newyork.head()

Data downloaded!


Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Place
0,Bronx,Wakefield,40.894705,-73.847201,Newyork
1,Bronx,Co-op City,40.874294,-73.829939,Newyork
2,Bronx,Eastchester,40.887556,-73.827806,Newyork
3,Bronx,Fieldston,40.895437,-73.905643,Newyork
4,Bronx,Riverdale,40.890834,-73.912585,Newyork


In [5]:
df_tor_new = pd.concat([df_newyork, df_toronto], axis = 0)

In [6]:
#Setting up the credentials for Foursquare Api

CLIENT_ID = 'UON0PNKNVXPXSIYFLDMNJEAD1D1TGERY1HT5IK0PJ1P22JPO' # your Foursquare ID
CLIENT_SECRET = 'JHQBJRMZS32WYPVWPNZLWIBS1IBB33KHXSWRYWTYL24N11T2' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [7]:
# create map of Toronto using latitude and longitude values
tor_lat = 43.6532
tor_long = -79.3832
map_tor = folium.Map(location=[tor_lat, tor_long], zoom_start=10)
df_tor = df_tor_new[df_tor_new['Place']=='Toronto']
# add markers to map
for lat, lng, borough, neighborhood in zip(df_tor['Latitude'], df_tor['Longitude'], df_tor['Borough'], df_tor['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_tor)  
    
map_tor

In [8]:
# create map of Newyork using latitude and longitude values
new_lat = 40.7128
new_long = -74.0060
map_new = folium.Map(location=[new_lat, new_long], zoom_start=10)
df_new = df_tor_new[df_tor_new['Place']=='Newyork']
# add markers to map
for lat, lng, borough, neighborhood in zip(df_new['Latitude'], df_new['Longitude'], df_new['Borough'], df_new['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_new)  
    
map_new

In [8]:
CLIENT_ID = 'UON0PNKNVXPXSIYFLDMNJEAD1D1TGERY1HT5IK0PJ1P22JPO' # your Foursquare ID
CLIENT_SECRET = 'JHQBJRMZS32WYPVWPNZLWIBS1IBB33KHXSWRYWTYL24N11T2' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version


In [None]:
LIMIT = 50
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&section=coffee'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)


# type your answer here

new_tor_venues = getNearbyVenues(names=df_tor_new['Neighborhood'],
                                   latitudes=df_tor_new['Latitude'],
                                   longitudes=df_tor_new['Longitude']
                                  )
new_tor_venues.columns =  ['Neighborhood', 
                  'Latitude', 
                  'Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']

In [13]:
new_tor_venues.head()

Unnamed: 0,Neighborhood,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Wakefield,40.894705,-73.847201,Dunkin',40.890459,-73.849089,Donut Shop
1,Eastchester,40.887556,-73.827806,Dunkin',40.885384,-73.828099,Donut Shop
2,Kingsbridge,40.881687,-73.902818,Mon Amour Coffee & Wine,40.885009,-73.900332,Coffee Shop
3,Kingsbridge,40.881687,-73.902818,Sugarboy Bakery Cafe,40.877832,-73.902669,Bakery
4,Kingsbridge,40.881687,-73.902818,Dunkin',40.879308,-73.905066,Donut Shop


In [12]:
new_tor_venues = pd.read_csv('./new_tor_venues.csv')
new_tor_venues_merged = new_tor_venues.merge(df_tor_new, on=['Latitude', 'Longitude'], how = 'inner', left_index=False, right_index=False)
new_tor_venues_merged.head(5)

Unnamed: 0,Neighborhood_x,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Borough,Neighborhood_y,Place
0,Eastchester,40.887556,-73.827806,Dunkin',40.885384,-73.828099,Donut Shop,Bronx,Eastchester,Newyork
1,Marble Hill,40.876551,-73.91066,Dunkin',40.877136,-73.906666,Donut Shop,Manhattan,Marble Hill,Newyork
2,Marble Hill,40.876551,-73.91066,Starbucks,40.877531,-73.905582,Coffee Shop,Manhattan,Marble Hill,Newyork
3,Marble Hill,40.876551,-73.91066,Starbucks,40.873755,-73.908613,Coffee Shop,Manhattan,Marble Hill,Newyork
4,Marble Hill,40.876551,-73.91066,Starbucks,40.873234,-73.90873,Coffee Shop,Manhattan,Marble Hill,Newyork


In [None]:
new_tor_venues_merged['Venue Category'].unique()
new_tor_venues_merged[(new_tor_venues_merged['Place']=='Newyork') & \
                      ((new_tor_venues_merged['Venue Category'] == 'Coffee Shop') |
                      (new_tor_venues_merged['Venue Category'] == 'Café'))]

In [14]:
# create map of Newyork using latitude and longitude values
new_lat = 40.7128
new_long = -74.0060
map_new = folium.Map(location=[new_lat, new_long], zoom_start=10)
df_new_coffee = new_tor_venues_merged[(new_tor_venues_merged['Place']=='Newyork') & \
                      ((new_tor_venues_merged['Venue Category'] == 'Coffee Shop') | \
                      (new_tor_venues_merged['Venue Category'] == 'Café'))]
# add markers to map
for lat, lng, borough, venue_cat in zip(df_new_coffee['Venue Latitude'], df_new_coffee['Venue Longitude'], df_new_coffee['Borough'], df_new_coffee['Venue Category']):
    label = '{}, {}'.format(venue_cat, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=2,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_new)
    
df_new_no_coffee = new_tor_venues_merged[(new_tor_venues_merged['Place']=='Newyork') & \
                      ((new_tor_venues_merged['Venue Category'] != 'Coffee Shop') & \
                      (new_tor_venues_merged['Venue Category'] != 'Café'))]
# add markers to map
for lat, lng, borough, venue_cat in zip(df_new_no_coffee['Venue Latitude'], df_new_no_coffee['Venue Longitude'], df_new_no_coffee['Borough'], df_new_no_coffee['Venue Category']):
    label = '{}, {}'.format(venue_cat, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=2,
        popup=label,
        color='red',
        fill=True,
        fill_color='#ff8080',
        fill_opacity=0.7,
        parse_html=False).add_to(map_new) 
    
map_new

### Clustering New York and Toronto Neighborhoods

In [15]:
new_tor_venues_merged.head()

Unnamed: 0,Neighborhood_x,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Borough,Neighborhood_y,Place
0,Eastchester,40.887556,-73.827806,Dunkin',40.885384,-73.828099,Donut Shop,Bronx,Eastchester,Newyork
1,Marble Hill,40.876551,-73.91066,Dunkin',40.877136,-73.906666,Donut Shop,Manhattan,Marble Hill,Newyork
2,Marble Hill,40.876551,-73.91066,Starbucks,40.877531,-73.905582,Coffee Shop,Manhattan,Marble Hill,Newyork
3,Marble Hill,40.876551,-73.91066,Starbucks,40.873755,-73.908613,Coffee Shop,Manhattan,Marble Hill,Newyork
4,Marble Hill,40.876551,-73.91066,Starbucks,40.873234,-73.90873,Coffee Shop,Manhattan,Marble Hill,Newyork


In [20]:
# one hot encoding
new_tor_venues_onehot = pd.get_dummies(new_tor_venues_merged[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
new_tor_venues_onehot['Neighborhood'] = new_tor_venues_merged['Neighborhood_x'] 
new_tor_venues_onehot['Place'] = new_tor_venues_merged['Place']
print(new_tor_venues_onehot.shape)
new_tor_venues_onehot.head()

(1697, 76)


Unnamed: 0,Accessories Store,American Restaurant,Art Gallery,Arts & Crafts Store,Australian Restaurant,Bagel Shop,Bakery,Bank,Bar,Bike Shop,Bookstore,Breakfast Spot,Bubble Tea Shop,Burger Joint,Cafeteria,Café,Candy Store,Chinese Restaurant,Chocolate Shop,Clothing Store,Cocktail Bar,Coffee Shop,College Cafeteria,College Quad,Comic Shop,Convenience Store,Coworking Space,Creperie,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner,Donut Shop,Dosa Place,Eastern European Restaurant,Flower Shop,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,German Restaurant,Gift Shop,Gourmet Shop,Himalayan Restaurant,Hookah Bar,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Internet Cafe,Italian Restaurant,Japanese Restaurant,Juice Bar,Mediterranean Restaurant,Mexican Restaurant,Motorcycle Shop,Pet Service,Pizza Place,Residential Building (Apartment / Condo),Restaurant,Salon / Barbershop,Sandwich Place,Shoe Store,Shopping Mall,Snack Place,Soup Place,Swiss Restaurant,Tea Room,Used Bookstore,Vegetarian / Vegan Restaurant,Wine Bar,Neighborhood,Place
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Eastchester,Newyork
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Marble Hill,Newyork
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Marble Hill,Newyork
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Marble Hill,Newyork
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Marble Hill,Newyork


In [22]:
new_tor_venues_grouped = new_tor_venues_onehot.groupby(['Neighborhood', 'Place']).mean().reset_index()
new_tor_venues_grouped.head()

Unnamed: 0,Neighborhood,Place,Accessories Store,American Restaurant,Art Gallery,Arts & Crafts Store,Australian Restaurant,Bagel Shop,Bakery,Bank,Bar,Bike Shop,Bookstore,Breakfast Spot,Bubble Tea Shop,Burger Joint,Cafeteria,Café,Candy Store,Chinese Restaurant,Chocolate Shop,Clothing Store,Cocktail Bar,Coffee Shop,College Cafeteria,College Quad,Comic Shop,Convenience Store,Coworking Space,Creperie,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner,Donut Shop,Dosa Place,Eastern European Restaurant,Flower Shop,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,German Restaurant,Gift Shop,Gourmet Shop,Himalayan Restaurant,Hookah Bar,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Internet Cafe,Italian Restaurant,Japanese Restaurant,Juice Bar,Mediterranean Restaurant,Mexican Restaurant,Motorcycle Shop,Pet Service,Pizza Place,Residential Building (Apartment / Condo),Restaurant,Salon / Barbershop,Sandwich Place,Shoe Store,Shopping Mall,Snack Place,Soup Place,Swiss Restaurant,Tea Room,Used Bookstore,Vegetarian / Vegan Restaurant,Wine Bar
0,"Adelaide, Richmond, King",Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.16,0.0,0.0,0.0,0.0,0.0,0.74,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0
1,Agincourt,Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Agincourt North, Steeles East, L'Amoreaux East...",Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Alderwood, Long Branch",Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Allerton,Newyork,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [34]:
# set number of clusters
kclusters = 4

new_tor_venues_grouped_clustering = new_tor_venues_grouped.drop(['Neighborhood','Place'], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(new_tor_venues_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 0, 3, 0, 1, 1, 0, 3], dtype=int32)

In [35]:
# add clustering labels


new_tor_merged = new_tor_venues_grouped[['Neighborhood','Place']]

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
new_tor_merged = pd.concat([new_tor_merged, pd.DataFrame(pd.to_numeric(kmeans.labels_, downcast='integer'),columns = ['Cluster Labels'])], axis = 1)
new_tor_merged.head() # check the last columns!

Unnamed: 0,Neighborhood,Place,Cluster Labels
0,"Adelaide, Richmond, King",Toronto,1
1,Agincourt,Toronto,1
2,"Agincourt North, Steeles East, L'Amoreaux East...",Toronto,1
3,"Alderwood, Long Branch",Toronto,0
4,Allerton,Newyork,3


### Similar Neighborhoods

### Cluster 1

In [36]:
new_tor_merged[new_tor_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Place,Cluster Labels
3,"Alderwood, Long Branch",Toronto,0
5,Astoria,Newyork,0
8,Bay Ridge,Newyork,0
12,"Bedford Park, Lawrence Manor East",Toronto,0
21,Bulls Head,Newyork,0
22,Bushwick,Newyork,0
23,Business Reply Mail Processing Centre 969 Eastern,Toronto,0
24,Butler Manor,Newyork,0
28,Central Bay Street,Toronto,0
29,Central Harlem,Newyork,0


### Cluster 2

In [37]:
new_tor_merged[new_tor_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Place,Cluster Labels
0,"Adelaide, Richmond, King",Toronto,1
1,Agincourt,Toronto,1
2,"Agincourt North, Steeles East, L'Amoreaux East...",Toronto,1
6,Astoria Heights,Newyork,1
7,Battery Park City,Newyork,1
14,Bellaire,Newyork,1
18,Briarwood,Newyork,1
20,Bronxdale,Newyork,1
26,Canada Post Gateway Processing Centre,Toronto,1
27,Canarsie,Newyork,1


### Cluster 3 

In [38]:
new_tor_merged[new_tor_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Place,Cluster Labels
10,Bayview Village,Toronto,2
17,Borough Park,Newyork,2
19,Broad Channel,Newyork,2
25,Caledonia-Fairbanks,Toronto,2
33,Christie,Toronto,2
36,Clifton,Newyork,2
46,Davisville North,Toronto,2
53,East Tremont,Newyork,2
59,"Exhibition Place, Parkdale Village, Brockton",Toronto,2
68,Gerritsen Beach,Newyork,2


### Cluster 4 - None of these Neighborhoods are similar to Toronto Neighborhoods

In [39]:
new_tor_merged[new_tor_merged['Cluster Labels'] == 3]

Unnamed: 0,Neighborhood,Place,Cluster Labels
4,Allerton,Newyork,3
9,Baychester,Newyork,3
11,Bedford Park,Newyork,3
13,Beechhurst,Newyork,3
15,Bensonhurst,Newyork,3
16,Blissville,Newyork,3
34,City Line,Newyork,3
42,Corona,Newyork,3
55,Eastchester,Newyork,3
58,Erasmus,Newyork,3


In [53]:
print('Cluster 4')
new_tor_merged[new_tor_merged['Cluster Labels'] == 3]['Place'].value_counts()

Cluster 4


Newyork    31
Name: Place, dtype: int64