## Battle of Neighborhoods

This is the notebook of my capstone project for the Applied Data Science Capstone course

In [66]:
import pandas as pd
import numpy as np
from requests import get
from bs4 import BeautifulSoup  # for scraping the wikipedia webpage
from geopy.geocoders import Nominatim
import folium 

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

print ("Hello Capstone Project Course!")

Hello Capstone Project Course!


### 1. Download all neighborhoods in Toronto

Scrap the list of city-designated neighbourhoods in Toronto from wikipedia: https://en.wikipedia.org/wiki/List_of_city-designated_neighbourhoods_in_Toronto

In [3]:
wikiurl = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

# scrap wiki webpage
response = get(wikiurl)
soup = BeautifulSoup(response.content, 'html.parser')
table = soup.find('table', {'class': 'wikitable sortable'})

# convert the table text to a pandas dataframe
neighborhoods = pd.read_html(str(table))[0]
neighborhoods.shape

(180, 3)

In [4]:
# review the first few rows of the neighborhoods df
neighborhoods.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [5]:
# remove data with "Not asigned" in "Neighbourhood"

neighborhoods = neighborhoods[neighborhoods['Neighbourhood'] != 'Not assigned']
neighborhoods.shape

(103, 3)

In [6]:
# review again
neighborhoods.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


-- 

Use the downloaded CSV file to match postal code and coordinates

In [7]:
lat_lon = pd.read_csv('Geospatial_Coordinates.csv')
lat_lon.shape

(103, 3)

In [8]:
# review df
lat_lon.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [9]:
# join two dfs
neighborhoods_ll = neighborhoods.merge(lat_lon, on='Postal Code')
neighborhoods_ll.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [11]:
# get the coordinates for Toronto

address = 'Toronto, ON, Canada'

geolocator = Nominatim(user_agent="toronto_explorer")
tor_location = geolocator.geocode(address)
tor_lat = tor_location.latitude
tor_lon = tor_location.longitude

print('The geograpical coordinate of {} are {}, {}.'.format(address, tor_lat, tor_lon))

The geograpical coordinate of Toronto, ON, Canada are 43.6534817, -79.3839347.


In [12]:
# create a map of Toronto withe all of the 103 neighborhoods based on postal codes
map_toronto = folium.Map(location=[tor_lat, tor_lon], zoom_start=11)

# add neighborhoods on the map
for lat, lon, bor, neigh, pos in zip(neighborhoods_ll['Latitude'], neighborhoods_ll['Longitude'], neighborhoods_ll['Borough'], neighborhoods_ll['Neighbourhood'], neighborhoods_ll['Postal Code']):
    label = '{} ({}, {})'.format(neigh, bor, pos)
    label = folium.Popup(
        label, 
        parse_html = True, 
        max_width = 150)
    folium.CircleMarker(
        [lat, lon], 
        radius = 8, 
        popup = label, 
        color = 'red', 
        fill = True, 
        fill_opacity = 0.5, 
        parse_html=False).add_to(map_toronto)

map_toronto

## 2. Explore Neighborhoods in Torontos


#### Define Foursquare Credentials and Version

In [13]:
CLIENT_ID = 'UN3245AMKMLJGTFOBM3NCZYTSY2UUMLBNAMIQTBUC2FSWGM1' # your Foursquare ID
CLIENT_SECRET = 'XEY4L3OFGS5ULNXTYLXUSNDRL0AFN0JJJCAD10UK1E3UNRVT' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

#### Download neighborhood venues

In [17]:
# a function to get neighborhood venues based on the central coordinate:

def exploreVenues (neighs, lats, lons, radius = 500): 
    
    venues = []
    
    for neigh, lat, lon in zip (neighs, lats, lons):
        print (neigh, end=': ')
        
        try: 
            # get data from foursqaure
            url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
                CLIENT_ID, 
                CLIENT_SECRET, 
                VERSION, 
                lat, 
                lon, 
                radius, 
                LIMIT)
            re = get(url).json()["response"]['groups'][0]['items']

            # parse data into a dataframe
            venues.append([(
                neigh, 
                lat, 
                lon, 
                v['venue']['name'], 
                v['venue']['location']['lat'], 
                v['venue']['location']['lng'],  
                v['venue']['categories'][0]['name']) for v in re])

            venues_df = pd.DataFrame([item for venues in venues for item in venues])
            venues_df.columns = [
                'Neighborhood', 
                'Neigh_Lat', 
                'Neigg_Lon', 
                'Venue', 
                'Venue_Lat', 
                'Venue_Lon', 
                'Venue_Category']
            
        
            # print results for review
            print ('found {} venues'.format(len(venues_df)))
        
        except: 
            print ('Not found')
        
        print ('')
        
    return (venues_df)
        

In [18]:
# get data using the function
neighs = neighborhoods_ll['Neighbourhood']
lats = neighborhoods_ll['Latitude']
lons = neighborhoods_ll['Longitude']

toronto_venues = exploreVenues(neighs, lats, lons)

Parkwoods: found 2 venues

Victoria Village: found 7 venues

Regent Park, Harbourfront: found 51 venues

Lawrence Manor, Lawrence Heights: found 64 venues

Queen's Park, Ontario Provincial Government: found 97 venues

Islington Avenue, Humber Valley Village: found 97 venues

Malvern, Rouge: found 98 venues

Don Mills: found 102 venues

Parkview Hill, Woodbine Gardens: found 112 venues

Garden District, Ryerson: found 212 venues

Glencairn: found 216 venues

West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale: found 217 venues

Rouge Hill, Port Union, Highland Creek: found 219 venues

Don Mills: found 238 venues

Woodbine Heights: found 245 venues

St. James Town: found 330 venues

Humewood-Cedarvale: found 335 venues

Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood: found 343 venues

Guildwood, Morningside, West Hill: found 351 venues

The Beaches: found 355 venues

Berczy Park: found 410 venues

Caledonia-Fairbanks: found 414 venues

Woburn: found 418 

#### Exploratary neighborhood venue data

In [19]:
# check neighborhood venue summary
toronto_venues.groupby('Neighborhood').count().sort_values(by='Venue_Category', ascending=False)

Unnamed: 0_level_0,Neigh_Lat,Neigg_Lon,Venue,Venue_Lat,Venue_Lon,Venue_Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Toronto Dominion Centre, Design Exchange",100,100,100,100,100,100
"Richmond, Adelaide, King",100,100,100,100,100,100
"Harbourfront East, Union Station, Toronto Islands",100,100,100,100,100,100
"Garden District, Ryerson",100,100,100,100,100,100
"First Canadian Place, Underground city",100,100,100,100,100,100
...,...,...,...,...,...,...
Roselawn,2,2,2,2,2,2
"West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale",1,1,1,1,1,1
"Humberlea, Emery",1,1,1,1,1,1
"Malvern, Rouge",1,1,1,1,1,1


In [20]:
# check neighborhood venue categoires
unique_cats = len(toronto_venues['Venue_Category'].unique())  

print ('There are {} unique categories in the df'.format(unique_cats))

There are 273 unique categories in the df


In [21]:
# check venue categories counts
toronto_venues.groupby('Venue_Category').count().sort_values(by='Neighborhood', ascending=False)

Unnamed: 0_level_0,Neighborhood,Neigh_Lat,Neigg_Lon,Venue,Venue_Lat,Venue_Lon
Venue_Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Coffee Shop,191,191,191,191,191,191
Café,93,93,93,93,93,93
Restaurant,67,67,67,67,67,67
Park,51,51,51,51,51,51
Pizza Place,49,49,49,49,49,49
...,...,...,...,...,...,...
Hookah Bar,1,1,1,1,1,1
Recording Studio,1,1,1,1,1,1
Home Service,1,1,1,1,1,1
Curling Ice,1,1,1,1,1,1


## 3. Analyze Each Neighborhood

#### Pre-process data

In [40]:
# one hot encoding
tor_onehot = pd.get_dummies(toronto_venues['Venue_Category'], prefix = '', prefix_sep = '')
print ('Shape of tor_onehot: ', tor_onehot.shape)

# add neighborhood data to df
tor_onehot['Neighborhood'] = toronto_venues['Neighborhood']

# move neighborhood column to the first column
neigh_index = list(tor_onehot.columns).index('Neighborhood')  # find the index of the 'Neighborhood' column
fixed_columns = [tor_onehot.columns[neigh_index]] + list(tor_onehot.columns[:neigh_index]) + list(tor_onehot.columns[neigh_index+1:])  
tor_onehot = tor_onehot[fixed_columns]

print ('Shape of tor_onehot (after adding Neighborhoods): ', tor_onehot.shape)

Shape of tor_onehot:  (2136, 273)
Shape of tor_onehot (after adding Neighborhoods):  (2136, 273)


#### Check frequency of each category for each neighborhood

In [41]:
tor_grouped = tor_onehot.groupby('Neighborhood').mean().reset_index()
print ('Shape of the df grouped by neighborhood: ', tor_grouped.shape)
tor_grouped

Shape of the df grouped by neighborhood:  (96, 273)


Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,"Willowdale, Willowdale West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
92,Woburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
93,Woodbine Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
94,York Mills West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Top venue categories in each neighborhood

In [42]:
num_top_venues = 5

for hood in tor_grouped['Neighborhood']:
    print("---- " + hood + " ----")
    temp = tor_grouped[tor_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

---- Agincourt ----
                       venue  freq
0             Clothing Store   0.2
1             Breakfast Spot   0.2
2                     Lounge   0.2
3               Skating Rink   0.2
4  Latin American Restaurant   0.2


---- Alderwood, Long Branch ----
         venue  freq
0  Pizza Place  0.29
1          Gym  0.14
2  Coffee Shop  0.14
3          Pub  0.14
4     Pharmacy  0.14


---- Bathurst Manor, Wilson Heights, Downsview North ----
                       venue  freq
0                Coffee Shop  0.10
1                       Bank  0.10
2                Pizza Place  0.05
3                Bridal Shop  0.05
4  Middle Eastern Restaurant  0.05


---- Bayview Village ----
                 venue  freq
0  Japanese Restaurant  0.25
1   Chinese Restaurant  0.25
2                 Bank  0.25
3                 Café  0.25
4    Accessories Store  0.00


---- Bedford Park, Lawrence Manor East ----
                     venue  freq
0              Coffee Shop  0.09
1           Sandwich Plac

In [43]:
# convert top venues into a df

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [44]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = tor_grouped['Neighborhood']

for ind in np.arange(tor_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(tor_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Clothing Store,Lounge,Breakfast Spot,Skating Rink,Latin American Restaurant,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore
1,"Alderwood, Long Branch",Pizza Place,Sandwich Place,Coffee Shop,Pub,Pharmacy,Gym,Greek Restaurant,Discount Store,Department Store,Dessert Shop
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Pharmacy,Deli / Bodega,Bridal Shop,Shopping Mall,Sandwich Place,Diner,Restaurant,Middle Eastern Restaurant
3,Bayview Village,Café,Japanese Restaurant,Chinese Restaurant,Bank,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Yoga Studio
4,"Bedford Park, Lawrence Manor East",Italian Restaurant,Sandwich Place,Coffee Shop,Greek Restaurant,Sushi Restaurant,Juice Bar,Café,Thai Restaurant,Restaurant,Indian Restaurant


## 4. Cluster Neighborhoods
   

Run _k_-means to cluster the neighborhood into 5 clusters.


In [83]:
kmeans.labels_

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 4, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1,
       0, 1, 1, 1, 1, 1, 0, 2])

In [80]:
# set number of clusters
kclusters = 5

tor_grouped_clustering = tor_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(tor_grouped_clustering)

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

neighborhoods_venues_sorted.head()

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1,Agincourt,Clothing Store,Lounge,Breakfast Spot,Skating Rink,Latin American Restaurant,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore
1,1,"Alderwood, Long Branch",Pizza Place,Sandwich Place,Coffee Shop,Pub,Pharmacy,Gym,Greek Restaurant,Discount Store,Department Store,Dessert Shop
2,1,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Pharmacy,Deli / Bodega,Bridal Shop,Shopping Mall,Sandwich Place,Diner,Restaurant,Middle Eastern Restaurant
3,1,Bayview Village,Café,Japanese Restaurant,Chinese Restaurant,Bank,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Yoga Studio
4,1,"Bedford Park, Lawrence Manor East",Italian Restaurant,Sandwich Place,Coffee Shop,Greek Restaurant,Sushi Restaurant,Juice Bar,Café,Thai Restaurant,Restaurant,Indian Restaurant


Create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.


In [84]:
# add lat and lon to the clusters 
tor_merged = neighborhoods_ll

# merge tor_grouped with toronto neighborhood data and add latitude/longitude for each neighborhood
tor_merged = tor_merged.merge(neighborhoods_venues_sorted, left_on = 'Neighbourhood', right_on='Neighborhood', how='left')

tor_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,0.0,Parkwoods,Park,Food & Drink Shop,Yoga Studio,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant
1,M4A,North York,Victoria Village,43.725882,-79.315572,1.0,Victoria Village,Pizza Place,Hockey Arena,Coffee Shop,Portuguese Restaurant,Intersection,Electronics Store,Eastern European Restaurant,Escape Room,Ethiopian Restaurant,Event Space
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,1.0,"Regent Park, Harbourfront",Coffee Shop,Bakery,Pub,Park,Breakfast Spot,Café,Theater,Spa,Brewery,Shoe Store
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,1.0,"Lawrence Manor, Lawrence Heights",Clothing Store,Accessories Store,Boutique,Gift Shop,Furniture / Home Store,Event Space,Coffee Shop,Women's Store,Vietnamese Restaurant,Airport Service
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,1.0,"Queen's Park, Ontario Provincial Government",Coffee Shop,Yoga Studio,Sushi Restaurant,Bar,Beer Bar,Smoothie Shop,Sandwich Place,Café,Restaurant,Chinese Restaurant


Clean up the merged dataframe

In [86]:
# drop unused columns
tor_merged.drop(['Neighbourhood'], axis=1, inplace=True)

# drop data without a cluster label
n = len(tor_merged)
tor_merged = tor_merged[tor_merged['Cluster Labels'].notna()]
print ('Dropped {} NA rows.'.format(n - len(tor_merged)))

# reset Cluster Labels to be ints
tor_merged['Cluster Labels'] = tor_merged['Cluster Labels'].astype('int')

Dropped 3 NA rows.


Finally, let's visualize the resulting clusters


In [87]:
# create a map of Toronto withe all of the 103 neighborhoods based on postal codes
map_toronto_clusters = folium.Map(location=[tor_lat, tor_lon], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add neighborhoods on the map
for lat, lon, neigh, cluster in zip(tor_merged['Latitude'], tor_merged['Longitude'], tor_merged['Neighborhood'], tor_merged['Cluster Labels']):
    label = '{} (Cluster: {})'.format(neigh, cluster)
    label = folium.Popup(
        label, 
        parse_html = True, 
        max_width = 150)
    folium.CircleMarker(
        [lat, lon], 
        radius = 8, 
        popup = label, 
        color = rainbow[cluster-1],
        fill = True, 
        fill_color = rainbow[cluster-1],
        fill_opacity = 0.5, 
        parse_html=False).add_to(map_toronto_clusters)

map_toronto_clusters

## 5. Examine Clusters

#### Cluster 1: Areas with a lot of activities 

Top venues in these areas include parks, yoga studio, dog run, etc. Good for people with an active personal style

In [88]:
tor_merged[tor_merged['Cluster Labels'] == 0]

Unnamed: 0,Postal Code,Borough,Latitude,Longitude,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,43.753259,-79.329656,0,Parkwoods,Park,Food & Drink Shop,Yoga Studio,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant
21,M6E,York,43.689026,-79.453512,0,Caledonia-Fairbanks,Park,Women's Store,Pool,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
35,M4J,East York,43.685347,-79.338106,0,"East Toronto, Broadview North (Old East York)",Park,Convenience Store,Intersection,Yoga Studio,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
49,M6L,North York,43.713756,-79.490074,0,"North Park, Maple Leaf Park, Upwood Park",Park,Construction & Landscaping,Bakery,Yoga Studio,Dumpling Restaurant,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore
61,M4N,Central Toronto,43.72802,-79.38879,0,Lawrence Park,Park,Swim School,Bus Line,Yoga Studio,Drugstore,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
64,M9N,York,43.706876,-79.518188,0,Weston,Park,Yoga Studio,Dumpling Restaurant,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant
66,M2P,North York,43.752758,-79.400049,0,York Mills West,Park,Convenience Store,Yoga Studio,Dumpling Restaurant,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore
85,M1V,Scarborough,43.815252,-79.284577,0,"Milliken, Agincourt North, Steeles East, L'Amo...",Playground,Park,Bakery,Yoga Studio,Dumpling Restaurant,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore
91,M4W,Downtown Toronto,43.679563,-79.377529,0,Rosedale,Park,Playground,Trail,Yoga Studio,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant


#### Cluster 2: Hipster Hubs

Top venues in these areas include pizza places, different types of resturants, coffee shops, bars, etc. Good for hipster and yound folks who love dining outside and hanging out at a coffee shop or bar. 

In [89]:
tor_merged[tor_merged['Cluster Labels'] == 1]

Unnamed: 0,Postal Code,Borough,Latitude,Longitude,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,M4A,North York,43.725882,-79.315572,1,Victoria Village,Pizza Place,Hockey Arena,Coffee Shop,Portuguese Restaurant,Intersection,Electronics Store,Eastern European Restaurant,Escape Room,Ethiopian Restaurant,Event Space
2,M5A,Downtown Toronto,43.654260,-79.360636,1,"Regent Park, Harbourfront",Coffee Shop,Bakery,Pub,Park,Breakfast Spot,Café,Theater,Spa,Brewery,Shoe Store
3,M6A,North York,43.718518,-79.464763,1,"Lawrence Manor, Lawrence Heights",Clothing Store,Accessories Store,Boutique,Gift Shop,Furniture / Home Store,Event Space,Coffee Shop,Women's Store,Vietnamese Restaurant,Airport Service
4,M7A,Downtown Toronto,43.662301,-79.389494,1,"Queen's Park, Ontario Provincial Government",Coffee Shop,Yoga Studio,Sushi Restaurant,Bar,Beer Bar,Smoothie Shop,Sandwich Place,Café,Restaurant,Chinese Restaurant
7,M3B,North York,43.745906,-79.352188,1,Don Mills,Gym,Beer Store,Coffee Shop,Japanese Restaurant,Caribbean Restaurant,Clothing Store,Italian Restaurant,Restaurant,Discount Store,Café
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,M8X,Etobicoke,43.653654,-79.506944,1,"The Kingsway, Montgomery Road, Old Mill North",River,Pool,Yoga Studio,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Drugstore
99,M4Y,Downtown Toronto,43.665860,-79.383160,1,Church and Wellesley,Coffee Shop,Sushi Restaurant,Gay Bar,Japanese Restaurant,Restaurant,Café,Hotel,Men's Store,Yoga Studio,Bubble Tea Shop
100,M7Y,East Toronto,43.662744,-79.321558,1,"Business reply mail Processing Centre, South C...",Pizza Place,Auto Workshop,Garden Center,Garden,Light Rail Station,Fast Food Restaurant,Farmers Market,Comic Shop,Park,Recording Studio
101,M8Y,Etobicoke,43.636258,-79.498509,1,"Old Mill South, King's Mill Park, Sunnylea, Hu...",Construction & Landscaping,Baseball Field,Yoga Studio,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store


#### Cluster 3: (Undertermined)

There is only 1 neighborhood fits into this cluster, the observation is too limit to generalize its characteristics. Further analysis is required. 

In [92]:
tor_merged[tor_merged['Cluster Labels'] == 2]

Unnamed: 0,Postal Code,Borough,Latitude,Longitude,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
45,M2L,North York,43.75749,-79.374714,2,"York Mills, Silver Hills",Martial Arts School,Yoga Studio,Dumpling Restaurant,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant


#### Cluster 4: (Undertermined)

There is only 1 neighborhood fits into this cluster, the observation is too limit to generalize its characteristics. Further analysis is required. 

In [93]:
tor_merged[tor_merged['Cluster Labels'] == 3]

Unnamed: 0,Postal Code,Borough,Latitude,Longitude,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,M9B,Etobicoke,43.650943,-79.554724,3,"West Deane Park, Princess Gardens, Martin Grov...",Print Shop,Yoga Studio,Dumpling Restaurant,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant


#### Cluster 5: (Undertermined)

There is only 1 neighborhood fits into this cluster, the observation is too limit to generalize its characteristics. Further analysis is required. 

In [94]:
tor_merged[tor_merged['Cluster Labels'] == 4]

Unnamed: 0,Postal Code,Borough,Latitude,Longitude,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,M1B,Scarborough,43.806686,-79.194353,4,"Malvern, Rouge",Fast Food Restaurant,Dumpling Restaurant,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant,Women's Store
