# Clustering Lab

### Mark Polinkovsky
### May 2, 2020

### Part I

In [1]:
#!conda install -c conda-forge geopy --yes
#!conda install -c conda-forge folium=0.5.0 --yes
#!conda install -c conda-forge beautifulsoup4 --yes
#!conda install -c conda-forge geocoder --yes

In [9]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans
import folium # map rendering library
from bs4 import BeautifulSoup # HTML Parsing library

import geocoder

print('Libraries imported.')

Libraries imported.


Parse Wikipedia page

In [3]:
page = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
soup = BeautifulSoup(page.text,'html.parser')

Get table from the parsed page and put it into the dataframe

In [4]:
df=pd.read_html(str(soup.table))[0]

#The column headings are brought in as row 0, so make column titles, drop the 0th record, and reset the index
df.columns=['Postal code','Borough','Neighborhood']
df.drop(index=0,inplace=True)
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
1,M1A,Not assigned,
2,M2A,Not assigned,
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Regent Park / Harbourfront


In [5]:
#Clean up the data
#Remove 'Not assigned' Boroughs, and switch 'Not assigned' neighborhoods to their borough names (there are none of those)
df=df[df['Borough']!='Not assigned']
df.loc[df['Neighborhood']=='Not assigned','Neighborhood']=df.loc[df['Neighborhood']=='Not assigned','Borough']

#Replace multiple neighborhood separator from / to ,
df['Neighborhood']=df['Neighborhood'].str.replace(pat=r' /',repl=',',regex=False)

#Reset the index
df.reset_index(drop=True,inplace=True)

df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [6]:
df.shape

(103, 3)

### Part II

In [13]:
import geocoder # import geocoder

#Initialize variables
latdict={'Latitude':[0]*len(df),'Longitude':[0]*len(df)}
latlng = pd.DataFrame(data=latdict)
#Default Toronto latitude longitude
torontolatlng = (43.6529,-79.3849)

#Step through each postal code
for n in range(0,len(df)):#len(df)):
    
    #initialize variables for loop
    lat_lng_coords = None
    postal_code = df.loc[n,'Postal code']
    borough = df.loc[n,'Borough']    
    q=0;
    
    # loop until you get the coordinates or do 10 tries
    while((lat_lng_coords is None) and (q<10)):
#        print('{}, {}, Toronto, Ontario, Canada'.format(postal_code, borough))
        
        #Use Arcgis to get location data
        g = geocoder.arcgis('{}, Toronto, Ontario, Canada'.format(postal_code))    
        
        if g.json != None:
            lat_lng_coords = (g.json['lat'],g.json['lng'])
            
        q+=1
    
    #If no latitude, longitude found, set to center of Toronto as default
    if (lat_lng_coords is None):
        lat_lng_coords = torontolatlng
    
#    print(n, q)
#    print(lat_lng_coords)
        
    latlng.loc[n,'Latitude'] = lat_lng_coords[0]
    latlng.loc[n,'Longitude'] = lat_lng_coords[1]

print(latlng.head())

    Latitude  Longitude
0  43.752935 -79.335641
1  43.728102 -79.311890
2  43.650964 -79.353041
3  43.723265 -79.451211
4  43.661790 -79.389390


Concatenate the Latitude and longitude data to the previous dataframe

In [14]:
df=pd.concat([df,latlng],axis=1)

In [16]:
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.752935,-79.335641
1,M4A,North York,Victoria Village,43.728102,-79.31189
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.650964,-79.353041
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.723265,-79.451211
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66179,-79.38939


### Part III

Visualize neighborhoods on map

In [73]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[torontolatlng[0], torontolatlng[1]], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Set up Foursquare details for queries

In [74]:
CLIENT_ID = '3NHOBPCDSRG1R0LMHUGB0ROC2Q1AET32BM5YLMEFFEVTEP4G' # your Foursquare ID
CLIENT_SECRET = '3BF4AGXETKHU3PDUGNW4SPMSBZ5FMWGRCODJW3130MQCWXM5' # your Foursquare Secret
VERSION = '20200505'#'20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 3NHOBPCDSRG1R0LMHUGB0ROC2Q1AET32BM5YLMEFFEVTEP4G
CLIENT_SECRET:3BF4AGXETKHU3PDUGNW4SPMSBZ5FMWGRCODJW3130MQCWXM5


Define function to get venues in each neighborhood

In [76]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT=10):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [None]:
toronto_venues = getNearbyVenues(names=df['Neighborhood'],latitudes=df['Latitude'], 
                                 longitudes=df['Longitude'])

Show parameters of the venues dataframe

In [79]:
print(toronto_venues.shape)
toronto_venues.head()

(694, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.752935,-79.335641,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.752935,-79.335641,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Parkwoods,43.752935,-79.335641,MacLeod Exteriors Inc.,43.755014,-79.338688,Construction & Landscaping
3,Victoria Village,43.728102,-79.31189,Tim Hortons,43.725517,-79.313103,Coffee Shop
4,Victoria Village,43.728102,-79.31189,Portugril,43.725819,-79.312785,Portuguese Restaurant


Information on the venue categories

In [80]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 179 uniques categories.


One hot encoding of venues in neighborhoods

In [88]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
cols=list(toronto_onehot)
cols.insert(0, cols.pop(cols.index('Neighborhood')))
toronto_onehot = toronto_onehot.loc[:, cols]
toronto_onehot

toronto_onehot.head()

Unnamed: 0,Neighborhood,Airport,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,BBQ Joint,Baby Store,Badminton Court,Bakery,Bank,Bar,Baseball Field,Basketball Court,Beer Bar,Beer Store,Big Box Store,Bike Shop,Bistro,Bookstore,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bridge,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Bus Station,Business Service,Butcher,Café,Candy Store,Caribbean Restaurant,Chinese Restaurant,Chocolate Shop,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,College Stadium,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Creperie,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Distribution Center,Doctor's Office,Dog Run,Donut Shop,Eastern European Restaurant,Electronics Store,Farm,Farmers Market,Fast Food Restaurant,Field,Fish & Chips Shop,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Garden,Gas Station,Gastropub,General Entertainment,Gift Shop,Golf Course,Gourmet Shop,Government Building,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,History Museum,Hobby Shop,Hockey Arena,Home Service,Hotel,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kitchen Supply Store,Korean Restaurant,Light Rail Station,Liquor Store,Lounge,Market,Massage Studio,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Movie Theater,Museum,Music Venue,Night Market,Organic Grocery,Other Great Outdoors,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Record Shop,Rental Car Location,Residential Building (Apartment / Condo),Restaurant,Rock Climbing Spot,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shoe Store,Shopping Mall,Shopping Plaza,Skating Rink,Smoke Shop,Soccer Field,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Steakhouse,Storage Facility,Supermarket,Sushi Restaurant,Swim School,Tea Room,Tech Startup,Tennis Court,Thai Restaurant,Theater,Theme Park,Theme Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Wine Shop,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Group by neighborhood and frequency of venue category

In [None]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()

Cluster the neighborhoods

In [90]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 3, 3, 4, 1, 3, 3, 3, 1, 3])

In [95]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [96]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Breakfast Spot,Skating Rink,Supermarket,Sushi Restaurant,Badminton Court,Distribution Center,Farmers Market,Farm,Electronics Store,Eastern European Restaurant
1,"Alderwood, Long Branch",Pizza Place,Sandwich Place,Gas Station,Dance Studio,Convenience Store,Pharmacy,Coffee Shop,Pub,Gym,Athletics & Sports
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bridal Shop,Sushi Restaurant,Deli / Bodega,Diner,Middle Eastern Restaurant,Bank,Restaurant,Ice Cream Shop,Farm
3,Bayview Village,Flower Shop,Construction & Landscaping,Trail,Yoga Studio,Discount Store,Farm,Electronics Store,Eastern European Restaurant,Donut Shop,Dog Run
4,"Bedford Park, Lawrence Manor East",Greek Restaurant,Sushi Restaurant,Coffee Shop,Italian Restaurant,Sports Club,Indian Restaurant,Pub,Thai Restaurant,Café,Restaurant


In [114]:
# add clustering labels
#neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.752935,-79.335641,2.0,Construction & Landscaping,Park,Food & Drink Shop,Yoga Studio,Discount Store,Farm,Electronics Store,Eastern European Restaurant,Donut Shop,Dog Run
1,M4A,North York,Victoria Village,43.728102,-79.31189,3.0,Pizza Place,Park,Coffee Shop,Portuguese Restaurant,Intersection,French Restaurant,Diner,Electronics Store,Eastern European Restaurant,Donut Shop
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.650964,-79.353041,1.0,Athletics & Sports,Performing Arts Venue,Chocolate Shop,Mediterranean Restaurant,Café,French Restaurant,Pub,Theater,Tech Startup,Dance Studio
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.723265,-79.451211,3.0,Clothing Store,Kitchen Supply Store,Electronics Store,Cosmetics Shop,Jewelry Store,Toy / Game Store,Shopping Mall,Furniture / Home Store,Men's Store,Distribution Center
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66179,-79.38939,3.0,Coffee Shop,Yoga Studio,Distribution Center,Park,Arts & Crafts Store,Creperie,Italian Restaurant,Sushi Restaurant,Farm,Electronics Store


In [123]:
toronto_merged=toronto_merged[toronto_merged['Cluster Labels'].isnull()==False]

In [124]:
# create map
map_clusters = folium.Map(location=[torontolatlng[0], torontolatlng[1]], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Examine clusters

Cluster 0: Scarborough

In [125]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
85,Scarborough,0.0,Pharmacy,Comic Shop,Farmers Market,Farm,Electronics Store,Eastern European Restaurant,Donut Shop,Dog Run,Doctor's Office,Distribution Center


Cluster 1: Urban area, coffee shops, hotels

In [130]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,1.0,Athletics & Sports,Performing Arts Venue,Chocolate Shop,Mediterranean Restaurant,Café,French Restaurant,Pub,Theater,Tech Startup,Dance Studio
9,Downtown Toronto,1.0,Café,Clothing Store,Burrito Place,Hotel,Tea Room,Theater,Comic Shop,Plaza,Music Venue,Diner
25,Downtown Toronto,1.0,Grocery Store,Café,Coffee Shop,Candy Store,Park,Playground,Convenience Store,Discount Store,Electronics Store,Eastern European Restaurant
30,Downtown Toronto,1.0,Gym / Fitness Center,Restaurant,Coffee Shop,Concert Hall,Vegetarian / Vegan Restaurant,Speakeasy,Plaza,Seafood Restaurant,Café,Steakhouse
33,North York,1.0,Clothing Store,Chocolate Shop,Toy / Game Store,Movie Theater,Burger Joint,Theater,Restaurant,Tea Room,Bakery,Yoga Studio
42,Downtown Toronto,1.0,Coffee Shop,Café,Bakery,Pub,Restaurant,Beer Bar,Tea Room,Gym,Auto Garage,Fish & Chips Shop
48,Downtown Toronto,1.0,Café,Coffee Shop,Gym,Museum,Restaurant,Pub,Gastropub,Bakery,Dance Studio,Discount Store
52,North York,1.0,Bridal Shop,Café,Yoga Studio,Distribution Center,Farmers Market,Farm,Electronics Store,Eastern European Restaurant,Donut Shop,Dog Run
55,North York,1.0,Greek Restaurant,Sushi Restaurant,Coffee Shop,Italian Restaurant,Sports Club,Indian Restaurant,Pub,Thai Restaurant,Café,Restaurant
74,Central Toronto,1.0,Café,American Restaurant,Burger Joint,Vegetarian / Vegan Restaurant,Donut Shop,BBQ Joint,Indian Restaurant,History Museum,Middle Eastern Restaurant,Mexican Restaurant


Cluster 2: Semi-rural - parks, fields, and farms

In [131]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,2.0,Construction & Landscaping,Park,Food & Drink Shop,Yoga Studio,Discount Store,Farm,Electronics Store,Eastern European Restaurant,Donut Shop,Dog Run
5,Etobicoke,2.0,Park,Skating Rink,Baseball Field,Yoga Studio,Discount Store,Farm,Electronics Store,Eastern European Restaurant,Donut Shop,Dog Run
16,York,2.0,Field,Trail,Park,Business Service,Hockey Arena,Grocery Store,Gift Shop,General Entertainment,Eastern European Restaurant,Donut Shop
21,York,2.0,Park,Bakery,Mexican Restaurant,Sporting Goods Shop,Beer Store,Spa,Gym,Women's Store,Gift Shop,Electronics Store
27,North York,2.0,Park,Dog Run,Residential Building (Apartment / Condo),Yoga Studio,Diner,Farm,Electronics Store,Eastern European Restaurant,Donut Shop,Doctor's Office
31,West Toronto,2.0,Park,Gym / Fitness Center,Bakery,Pharmacy,Gym,Café,Brazilian Restaurant,Bank,Grocery Store,BBQ Joint
36,Downtown Toronto,2.0,Harbor / Marina,Theme Park,Farm,Park,Fast Food Restaurant,Yoga Studio,Discount Store,Electronics Store,Eastern European Restaurant,Donut Shop
41,East Toronto,2.0,Grocery Store,Business Service,Bus Line,Discount Store,Park,Cosmetics Shop,Convenience Store,Farm,Electronics Store,Eastern European Restaurant
49,North York,2.0,Bakery,Park,Basketball Court,Yoga Studio,Farmers Market,Farm,Electronics Store,Eastern European Restaurant,Donut Shop,Dog Run
57,North York,2.0,Playground,Park,Diner,Farm,Electronics Store,Eastern European Restaurant,Donut Shop,Dog Run,Doctor's Office,Distribution Center


Cluster 3: Small, middle class restaurants

In [132]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,3.0,Pizza Place,Park,Coffee Shop,Portuguese Restaurant,Intersection,French Restaurant,Diner,Electronics Store,Eastern European Restaurant,Donut Shop
3,North York,3.0,Clothing Store,Kitchen Supply Store,Electronics Store,Cosmetics Shop,Jewelry Store,Toy / Game Store,Shopping Mall,Furniture / Home Store,Men's Store,Distribution Center
4,Downtown Toronto,3.0,Coffee Shop,Yoga Studio,Distribution Center,Park,Arts & Crafts Store,Creperie,Italian Restaurant,Sushi Restaurant,Farm,Electronics Store
7,North York,3.0,Athletics & Sports,Park,Coffee Shop,Other Great Outdoors,Spa,Burger Joint,Restaurant,Bank,Trail,Gym
8,East York,3.0,Gym / Fitness Center,Bank,Gastropub,Fast Food Restaurant,Pet Store,Pharmacy,Café,Rock Climbing Spot,Breakfast Spot,Pizza Place
10,North York,3.0,Pizza Place,Grocery Store,Gas Station,Pub,Rental Car Location,Mediterranean Restaurant,Japanese Restaurant,Fast Food Restaurant,Gastropub,Golf Course
11,Etobicoke,3.0,Pizza Place,Chinese Restaurant,Sandwich Place,Tea Room,Diner,Electronics Store,Eastern European Restaurant,Donut Shop,Dog Run,Doctor's Office
13,North York,3.0,Athletics & Sports,Park,Coffee Shop,Other Great Outdoors,Spa,Burger Joint,Restaurant,Bank,Trail,Gym
14,East York,3.0,Grocery Store,Coffee Shop,Doctor's Office,Sushi Restaurant,Bar,Gas Station,Breakfast Spot,Arts & Crafts Store,Pharmacy,Creperie
15,Downtown Toronto,3.0,Coffee Shop,Gym,Gastropub,Japanese Restaurant,Food Truck,Middle Eastern Restaurant,Restaurant,Cosmetics Shop,Creperie,Deli / Bodega


Cluster 4: Hipster / gentrifying neighborhoods - Yoga studios and Farmer's Markets

In [133]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Scarborough,4.0,Trail,Yoga Studio,Diner,Farmers Market,Farm,Electronics Store,Eastern European Restaurant,Donut Shop,Dog Run,Doctor's Office
12,Scarborough,4.0,Construction & Landscaping,Yoga Studio,Distribution Center,Fast Food Restaurant,Farmers Market,Farm,Electronics Store,Eastern European Restaurant,Donut Shop,Dog Run
26,Scarborough,4.0,Construction & Landscaping,Trail,Yoga Studio,Discount Store,Farmers Market,Farm,Electronics Store,Eastern European Restaurant,Donut Shop,Dog Run
39,North York,4.0,Flower Shop,Construction & Landscaping,Trail,Yoga Studio,Discount Store,Farm,Electronics Store,Eastern European Restaurant,Donut Shop,Dog Run
