# Segmenting and Clustering Neighborhoods in Toronto
## Part 3

## 1. Prepare datasets.

#### Scrape the Wikipedia page.

In [1]:
# Import libraries.

import time
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs

In [2]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = bs(source, 'lxml')

# Use tag 'table' and class 'wikitable sortable'.
table = soup.find('table', class_='wikitable sortable')

# Group by tag <tr>, within this tag group by <th> for the table headers and by <td> for the table entries.
postcodes = table.find_all('tr')

# Create and populate a pandas dataframe.
df = pd.DataFrame()

columns = [column_name.text.strip('\n') for column_name in postcodes[0].find_all('th')]
columns[0] = 'PostalCode'
df = pd.DataFrame(columns=columns)
for postcode in postcodes[1:]:
    df.loc[len(df)] = [value.text.strip('\n') for value in postcode.find_all('td')]
    
# Ignore all cells with the borough that is 'Not assigned'.
df = df[df['Borough'] != 'Not assigned']

# Rename 'Not assigned' neighborhoods with the corresponding borough names.
df.loc[df['Neighbourhood'] == 'Not assigned', 'Neighbourhood'] = np.nan
df['Neighbourhood'].fillna(value=df['Borough'], inplace=True)

# Combine neighborhoods with the same postal codes.
df = df.groupby(['PostalCode', 'Borough'])['Neighbourhood'].apply(sorted).apply(', '.join).reset_index()

# Print the number of rows of the dataframe.
print('The shape of the dataframe: %s.' % str(df.shape))
df.head(10)

The shape of the dataframe: (103, 3).


Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Highland Creek, Port Union, Rouge Hill"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


#### Get the latitude and the longitude coordinates of each neighborhood.

Get the coordinates for all postal codes.

In [3]:
df['Latitude'] = np.nan
df['Longitude'] = np.nan

In [4]:
# Scraping coordinates from Google.

def coordinates(request):
    source = requests.get('http://www.google.com/search?q=%s' % request).text
    pos = -1
    while pos == -1:
        pos = source.find(r'href="http://maps.google.com/maps?')
        if pos == -1:
            return [np.nan, np.nan]
        source = source[pos+1:]
        temp = source[:200]
        pos = temp.find(r'll=')

    return [float(value) for value in temp[pos+3:].split('&amp;')[0].split(',')]

In [5]:
for k, postal_code in enumerate(df['PostalCode']):
    if not np.isfinite(df.iloc[k, df.columns.get_loc('Latitude')]):
        location = coordinates('%s+Canada+postal+code+coordinates' % postal_code)
        df.iloc[k, df.columns.get_loc('Latitude')] = location[0]
        df.iloc[k, df.columns.get_loc('Longitude')] = location[1]
        time.sleep(1)

In [6]:
df.head(15)

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Port Union, Rouge Hill",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [7]:
df['Latitude'].count()

103

In [8]:
# The code was removed by Watson Studio for sharing.

Your credentails:
CLIENT_ID: HJ5YHOUB51PCLSV1WS5DR04MDRT2LQ03E0B4FMCBDNX2SZSI
CLIENT_SECRET:KJD1Y5I3AALZBL0S2KICKQR3B4GXZSF1JH5VLS5WLECT3BUI


## 2. Explore Neighbourhoods

In [56]:
radius = 500
LIMIT = 200

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Run the above function on each neighborhood to create a new dataframe called *venues*.

In [57]:
venues = getNearbyVenues(names=df['Neighbourhood'],
                        latitudes=df['Latitude'],
                        longitudes=df['Longitude'])

Malvern, Rouge
Highland Creek, Port Union, Rouge Hill
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park, Ionview, Kennedy Park
Clairlea, Golden Mile, Oakridge
Cliffcrest, Cliffside, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Scarborough Town Centre, Wexford Heights
Maryvale, Wexford
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Agincourt North, L'Amoreaux East, Milliken, Steeles East
L'Amoreaux West, Steeles West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
Silver Hills, York Mills
Newtonbrook, Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Don Mills South, Flemingdon Park
Bathurst Manor, Downsview North, Wilson Heights
Northwood Park, York University
CFB Toronto, Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Parkview Hill, Woodbine Gardens
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
River

Let's check the size of the resulting dataframe.

In [91]:
print(venues.shape)
venues.head()

(2232, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,"Highland Creek, Port Union, Rouge Hill",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
3,"Guildwood, Morningside, West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
4,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Big Bite Burrito,43.766299,-79.19072,Mexican Restaurant


Let's check how many venues were returned for each neighbourhood.

In [92]:
venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Agincourt,5,5,5,5,5,5
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",3,3,3,3,3,3
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",11,11,11,11,11,11
"Alderwood, Long Branch",8,8,8,8,8,8
"Bathurst Manor, Downsview North, Wilson Heights",16,16,16,16,16,16
"Bathurst Quay, CN Tower, Harbourfront West, Island airport, King and Spadina, Railway Lands, South Niagara",12,12,12,12,12,12
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",23,23,23,23,23,23
Berczy Park,55,55,55,55,55,55


## 3. Analyze Each Neighbourhood

In [93]:
# One hot encoding.
onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")

# Add neighbourhood column back to dataframe.
onehot['Neighbourhood'] = venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [onehot.columns[-1]] + list(onehot.columns[:-1])
onehot = onehot[fixed_columns]

onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Highland Creek, Port Union, Rouge Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [94]:
onehot.shape

(2232, 279)

#### Let's group rows by neighbourhood and by taking the mean of the frequency of occurrence of each category.

In [95]:
grouped = onehot.groupby('Neighbourhood').mean().reset_index()
grouped.head()

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0
1,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Let's print each neighborhood along with the top 5 most common venues.

In [96]:
num_top_venues = 5

for hood in grouped['Neighbourhood']:
    print("----%s----" % hood)
    temp = grouped[grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue', 'freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
                 venue  freq
0          Coffee Shop  0.06
1                 Café  0.05
2      Thai Restaurant  0.04
3  American Restaurant  0.04
4           Steakhouse  0.04


----Agincourt----
                venue  freq
0      Sandwich Place   0.2
1      Breakfast Spot   0.2
2  Chinese Restaurant   0.2
3              Lounge   0.2
4        Skating Rink   0.2


----Agincourt North, L'Amoreaux East, Milliken, Steeles East----
                 venue  freq
0           Playground  0.33
1  Arts & Crafts Store  0.33
2                 Park  0.33
3    Mobile Phone Shop  0.00
4        Movie Theater  0.00


----Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown----
                 venue  freq
0        Grocery Store  0.18
1          Coffee Shop  0.09
2       Sandwich Place  0.09
3       Discount Store  0.09
4  Japanese Restaurant  0.09


----Alderwood, Long Branch----
            venue  freq
0     Pizza P

Function to sort the venues in descending order.

In [97]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Let's create the new dataframe and display the top 10 venues for each neighborhood.

In [98]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# Create columns according to number of top venues.
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# Create a new dataframe.
venues_sorted = pd.DataFrame(columns=columns)
venues_sorted['Neighbourhood'] = grouped['Neighbourhood']

for ind in np.arange(grouped.shape[0]):
    venues_sorted.iloc[ind,1:] = return_most_common_venues(grouped.iloc[ind,:], num_top_venues)

venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Thai Restaurant,Steakhouse,American Restaurant,Hotel,Clothing Store,Bar,Bakery,Asian Restaurant
1,Agincourt,Chinese Restaurant,Lounge,Skating Rink,Sandwich Place,Breakfast Spot,Yoga Studio,Dog Run,Doner Restaurant,Donut Shop,Drugstore
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Park,Playground,Arts & Crafts Store,Yoga Studio,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Pharmacy,Fried Chicken Joint,Beer Store,Sandwich Place,Discount Store,Japanese Restaurant,Fast Food Restaurant,Pizza Place,Coffee Shop
4,"Alderwood, Long Branch",Pizza Place,Pharmacy,Gym,Coffee Shop,Skating Rink,Sandwich Place,Pub,Doner Restaurant,Dim Sum Restaurant,Diner


## 4. Cluster neighborhoods.

In [99]:
grouped_clustering = grouped.drop('Neighbourhood', 1)

In [100]:
# Matplotlib and associated plotting modules.
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage.
from sklearn.cluster import KMeans

import folium # map rendering library

In [101]:
# Set number of clusters.
kclusters = 5

# Run k-means clustering.
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(grouped_clustering)

# Check cluster labels generated for each row in the dataframe.
kmeans.labels_[0:10]

array([4, 4, 3, 4, 4, 4, 4, 4, 4, 4], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [102]:
# Add clustering labels.
venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [103]:
merged = df

# Merge toronto_grouped with Toronto_data to add latitude/longitude for each neighbourhood.
merged = merged.join(venues_sorted.set_index('Neighbourhood'), on='Neighbourhood', how='inner')

merged = merged.astype({'Cluster Labels': int})

merged.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,4,Fast Food Restaurant,Yoga Studio,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store
1,M1C,Scarborough,"Highland Creek, Port Union, Rouge Hill",43.784535,-79.160497,2,Bar,Yoga Studio,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Diner
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,4,Rental Car Location,Pizza Place,Medical Center,Breakfast Spot,Electronics Store,Mexican Restaurant,Yoga Studio,Drugstore,Discount Store,Dog Run
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0,Coffee Shop,Korean Restaurant,Insurance Office,Yoga Studio,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,4,Athletics & Sports,Hakka Restaurant,Fried Chicken Joint,Caribbean Restaurant,Bakery,Thai Restaurant,Bank,Drugstore,Dog Run,Doner Restaurant


#### Finally, let's visualize the resulting clusters.

In [104]:
latitude, longitude = coordinates('Toronto+Canada')

# Create map.
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# Set color scheme for the clusters.
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Add markers to the map.
markers_colors = []
for lat, lon, poi, cluster in zip(merged['Latitude'], merged['Longitude'],
                                  merged['Neighbourhood'], merged['Cluster Labels']):
    label = folium.Popup('%s Cluster %s' % (str(poi), str(cluster+1)), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## 5. Examine Clusters

### Cluster 1

In [105]:
merged.loc[merged['Cluster Labels'] == 0, merged.columns[[1] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Scarborough,0,Coffee Shop,Korean Restaurant,Insurance Office,Yoga Studio,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant
6,Scarborough,0,Discount Store,Bus Station,Department Store,Coffee Shop,Convenience Store,Train Station,Dumpling Restaurant,Dog Run,Doner Restaurant,Donut Shop
29,North York,0,Bar,Massage Studio,Coffee Shop,Miscellaneous Shop,Furniture / Home Store,Yoga Studio,Dog Run,Doner Restaurant,Donut Shop,Drugstore
33,North York,0,Discount Store,Gym / Fitness Center,Coffee Shop,Liquor Store,Yoga Studio,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore
34,North York,0,Intersection,Coffee Shop,Hockey Arena,Portuguese Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Empanada Restaurant,Electronics Store,Dessert Shop
37,East Toronto,0,BBQ Joint,Coffee Shop,Neighborhood,Pub,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
40,East York,0,Park,Coffee Shop,Convenience Store,Yoga Studio,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
57,Downtown Toronto,0,Coffee Shop,Café,Italian Restaurant,Burger Joint,Bar,Chinese Restaurant,Indian Restaurant,Bubble Tea Shop,Sandwich Place,Salad Place
65,Central Toronto,0,Coffee Shop,Sandwich Place,Café,Park,Pizza Place,Pub,Burger Joint,BBQ Joint,American Restaurant,History Museum
80,York,0,Sandwich Place,Coffee Shop,Skating Rink,Check Cashing Service,Yoga Studio,Drugstore,Discount Store,Dog Run,Doner Restaurant,Donut Shop


#### This cluster is clearly characterized by the downtown vibes: cafes, coffee shops, restarurants, etc. 

### Cluster 2

In [106]:
merged.loc[merged['Cluster Labels'] == 1, merged.columns[[1] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
32,North York,1,Baseball Field,Food Truck,Electronics Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Yoga Studio
91,Etobicoke,1,Baseball Field,Construction & Landscaping,Electronics Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Yoga Studio
97,North York,1,Baseball Field,Electronics Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Yoga Studio,Diner


#### This one is for baseball fans!

### Cluster 3

In [107]:
merged.loc[merged['Cluster Labels'] == 2, merged.columns[[1] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,2,Bar,Yoga Studio,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Diner


#### Seems like this specific neighbourhood is quite unique.

### Cluster 4

In [108]:
merged.loc[merged['Cluster Labels'] == 3, merged.columns[[1] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Scarborough,3,Park,Playground,Arts & Crafts Store,Yoga Studio,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
23,North York,3,Park,Bank,Yoga Studio,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant
25,North York,3,Fast Food Restaurant,Park,Food & Drink Shop,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
31,North York,3,Park,Grocery Store,Bank,Yoga Studio,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
50,Downtown Toronto,3,Park,Playground,Trail,Yoga Studio,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
73,York,3,Park,Trail,Field,Hockey Arena,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
74,York,3,Park,Pharmacy,Women's Store,Market,Fast Food Restaurant,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop
90,Etobicoke,3,River,Park,Yoga Studio,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant
98,York,3,Park,Convenience Store,Yoga Studio,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant



#### This cluster is for nature lovers: parks and trails!

#### Cluster 5

In [110]:
merged.loc[merged['Cluster Labels'] == 4, merged.columns[[1] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,4,Fast Food Restaurant,Yoga Studio,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store
2,Scarborough,4,Rental Car Location,Pizza Place,Medical Center,Breakfast Spot,Electronics Store,Mexican Restaurant,Yoga Studio,Drugstore,Discount Store,Dog Run
4,Scarborough,4,Athletics & Sports,Hakka Restaurant,Fried Chicken Joint,Caribbean Restaurant,Bakery,Thai Restaurant,Bank,Drugstore,Dog Run,Doner Restaurant
5,Scarborough,4,Playground,Convenience Store,Yoga Studio,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
7,Scarborough,4,Bakery,Bus Line,Park,Fast Food Restaurant,Metro Station,Food Truck,Bus Station,Intersection,Soccer Field,Cuban Restaurant
8,Scarborough,4,American Restaurant,Skating Rink,Motel,Yoga Studio,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
9,Scarborough,4,College Stadium,General Entertainment,Skating Rink,Café,Concert Hall,Diner,Falafel Restaurant,Event Space,Ethiopian Restaurant,Comfort Food Restaurant
10,Scarborough,4,Indian Restaurant,Pet Store,Vietnamese Restaurant,Light Rail Station,Latin American Restaurant,Chinese Restaurant,Drugstore,Discount Store,Dog Run,Doner Restaurant
11,Scarborough,4,Middle Eastern Restaurant,Breakfast Spot,Shopping Mall,Bakery,Sandwich Place,Auto Garage,Yoga Studio,Drugstore,Doner Restaurant,Donut Shop
12,Scarborough,4,Chinese Restaurant,Lounge,Skating Rink,Sandwich Place,Breakfast Spot,Yoga Studio,Dog Run,Doner Restaurant,Donut Shop,Drugstore


#### Urban mix: restaurants, fast pood joints, sport venues, pharmacies, rental car locations, public transportation, etc.