<b>Segmenting and Clustering Neighborhoods in Toronto</b>

In [1]:
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

import json
from pandas.io.json import json_normalize

<b>Read data to explore Dataset</b>

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = requests.get(url).text
soup = BeautifulSoup(page,'lxml')
data = soup.find('table')
table_data = data.find_all('td')

In [3]:
postalCode = []
borough = []
neighborhood = []

for i in range(0, len(table_data),3):
    postalCode.append(table_data[i].text.strip())
    borough.append(table_data[i+1].text.strip())
    neighborhood.append(table_data[i+2].text.strip())

In [4]:
postCodeDF = pd.DataFrame(data=[postalCode, borough, neighborhood]).transpose()
postCodeDF.columns = ['PostalCode', 'Borough','Neighborhood']

<b>Delete Boroughs that are not assigned, and Assign Boroughs to not-assigned Neighborhoods</b>

In [5]:
postCodeDF.drop(postCodeDF[postCodeDF['Borough']=='Not assigned'].index, inplace = True)
postCodeDF.loc[postCodeDF.Neighborhood == 'Not assigned', 'Neighborhood'] = postCodeDF.Borough

<b>Group by Postcode and separate different Boroughs with a Comma</b>

In [6]:
postCodeDF = postCodeDF.groupby(['PostalCode','Borough'])['Neighborhood'].apply(', '.join).reset_index()

<b>Read Geospatial data using read_csv</b>

In [7]:
geoDF = pd.read_csv("http://cocl.us/Geospatial_data")
geoDF.rename(columns={'Postal Code': 'PostalCode'}, inplace = True)
postCodeDF = pd.merge(postCodeDF, geoDF, on =['PostalCode'], how = 'inner')
postCodeDF.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [8]:
postCodeDF.shape

(103, 5)

In [8]:
print('The dataframe has {} borough and {} neighborhood.'.format(len(postCodeDF['Borough'].unique()),postCodeDF.shape[0]))

The dataframe has 11 borough and 103 neighborhood.


<b>Use geopy library to get lat and long values of Toronto City, Canada

In [10]:
!conda install -c conda-forge geopy --yes


Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
geopy                     1.18.1                     py_0    conda-forge


In [9]:
from geopy.geocoders import Nominatim
address = 'Toronto City, Canada'
geolocator = Nominatim(user_agent='toronto_agent')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.7394839, -79.369314.


<b>Create a map of Toronto city with neighborhoods

In [12]:

!conda install -c conda-forge folium=0.5.0 --yes


Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
folium                    0.5.0                      py_0    conda-forge


In [10]:
#import Folium library
import folium

# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location =[latitude,longitude],zoom_start = 10)

# Add markets to map
for lat, long, borough, neighborhood in zip(postCodeDF['Latitude'], postCodeDF['Longitude'],postCodeDF['Borough'], postCodeDF['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label,parse_html=True)
    folium.CircleMarker(
        [lat,long],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#c96b30',
        fill_opacity=0.9,
        parse_html=False).add_to(map_toronto)
    
map_toronto

In [11]:
borough_count = postCodeDF.groupby(['Borough']).size().reset_index(name='counts')
borough_count

Unnamed: 0,Borough,counts
0,Central Toronto,9
1,Downtown Toronto,18
2,East Toronto,5
3,East York,5
4,Etobicoke,12
5,Mississauga,1
6,North York,24
7,Queen's Park,1
8,Scarborough,17
9,West Toronto,6


<b>For illustration purpose, let's simplify the map by segmenting and clustering only the neighborhood in North York

In [12]:
# Slice the dataframe and create a new dataframe of North York data
NorthYork_data = postCodeDF[postCodeDF['Borough']=='North York'].reset_index(drop=True)
NorthYork_data.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M2H,North York,Hillcrest Village,43.803762,-79.363452
1,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556
2,M2K,North York,Bayview Village,43.786947,-79.385975
3,M2L,North York,"Silver Hills, York Mills",43.75749,-79.374714
4,M2M,North York,"Newtonbrook, Willowdale",43.789053,-79.408493
5,M2N,North York,Willowdale South,43.77012,-79.408493
6,M2P,North York,York Mills West,43.752758,-79.400049
7,M2R,North York,Willowdale West,43.782736,-79.442259
8,M3A,North York,Parkwoods,43.753259,-79.329656
9,M3B,North York,Don Mills North,43.745906,-79.352188


<b> Let's get the geographical coordinates of North York

In [13]:
address = 'North York, Toronto'

geolocator = Nominatim(user_agent='toronto_agent')
location = geolocator.geocode(address)
lati = location.latitude
longi = location.longitude
print('The geographical coordicates of North York, Totonto are {}, {}'.format(latitude, longitude))

The geographical coordicates of North York, Totonto are 43.7394839, -79.369314


<b>Visualize North York neighborhood

In [14]:
map_ny = folium.Map(location=[lati,longi], zoom_start=11)

for lat,long, label in zip(NorthYork_data['Latitude'], NorthYork_data['Longitude'], NorthYork_data['Neighborhood']):
    label = folium.Popup(label,parse_html=True)
    folium.CircleMarker(
        [lat,long],
        radius =5,
        popup=label,
        color ='blue',
        fill='True',
        fill_color='#3186cc',
        fill_opacity=0.7,
        pasrse_html=False
    ).add_to(map_ny)
map_ny

<b>start utilizing the Foursquare API to explore the neighborhoods and segment them.

In [15]:
#Define Foursquare Credentials and Version
CLIENT_ID = 'WOON0KQHLXOFW1FRY4SBIW4SWJ1ZIGHH5LHWRTRVE5OLVXWI' # your Foursquare ID
CLIENT_SECRET = 'BIXIIMW3ZPRFC32LFUDIRGS2OVUNKUBAO4UT130K424XYQDM' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: WOON0KQHLXOFW1FRY4SBIW4SWJ1ZIGHH5LHWRTRVE5OLVXWI
CLIENT_SECRET:BIXIIMW3ZPRFC32LFUDIRGS2OVUNKUBAO4UT130K424XYQDM


<b> Explore Neighborhoods in North York

In [16]:
#Get Nearby Venues Function
def getNearbyVenues(names, latitudes, longitudes, radius=500, limit=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [17]:
NorthYork_venues = getNearbyVenues(names=NorthYork_data['Neighborhood'],
                                   latitudes=NorthYork_data['Latitude'],
                                   longitudes=NorthYork_data['Longitude']
                                  )

Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
Silver Hills, York Mills
Newtonbrook, Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park, Don Mills South
Bathurst Manor, Downsview North, Wilson Heights
Northwood Park, York University
CFB Toronto, Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Bedford Park, Lawrence Manor East
Lawrence Heights, Lawrence Manor
Glencairn
Maple Leaf Park, North Park, Upwood Park
Humber Summit
Emery, Humberlea


In [18]:
print(NorthYork_venues.shape)

(228, 7)


In [19]:
NorthYork_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Hillcrest Village,43.803762,-79.363452,Eagle's Nest Golf Club,43.805455,-79.364186,Golf Course
1,Hillcrest Village,43.803762,-79.363452,AY Jackson Pool,43.804515,-79.366138,Pool
2,Hillcrest Village,43.803762,-79.363452,Villa Madina,43.801685,-79.363938,Mediterranean Restaurant
3,Hillcrest Village,43.803762,-79.363452,Duncan Creek Park,43.805539,-79.360695,Dog Run
4,Hillcrest Village,43.803762,-79.363452,A.Y. Jackson Secondary School Track,43.805068,-79.366677,Athletics & Sports


In [20]:
NorthYork_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor, Downsview North, Wilson Heights",17,17,17,17,17,17
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",25,25,25,25,25,25
"CFB Toronto, Downsview East",3,3,3,3,3,3
Don Mills North,5,5,5,5,5,5
Downsview Central,3,3,3,3,3,3
Downsview Northwest,5,5,5,5,5,5
Downsview West,4,4,4,4,4,4
"Emery, Humberlea",2,2,2,2,2,2
"Fairview, Henry Farm, Oriole",60,60,60,60,60,60


In [21]:
print('There are {} uniques categories.'.format(len(NorthYork_venues['Venue Category'].unique())))

There are 102 uniques categories.


<b>Analyze Each Neighborhood

In [22]:
# One hot encoding (convert categirical variables to dummy variables)
NorthYork_onehot = pd.get_dummies(NorthYork_venues[['Venue Category']],prefix="",prefix_sep="")

# Add neighborhood column back to 
NorthYork_onehot['Neighborhood']= NorthYork_venues['Neighborhood']

# Move neighborhood column to the first column
fixed_columns = [NorthYork_onehot.columns[-1]] + list(NorthYork_onehot.columns[:-1])
NorthYork_onehot = NorthYork_onehot[fixed_columns]

NorthYork_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,...,Tailor Shop,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Video Store,Vietnamese Restaurant,Wings Joint,Women's Store
0,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Hillcrest Village,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
NorthYork_onehot.shape

(228, 103)

Group rows by Neighborhood

In [24]:
NorthYork_grouped = NorthYork_onehot.groupby('Neighborhood').mean().reset_index()
NorthYork_grouped

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,...,Tailor Shop,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Video Store,Vietnamese Restaurant,Wings Joint,Women's Store
0,"Bathurst Manor, Downsview North, Wilson Heights",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park, Lawrence Manor East",0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CFB Toronto, Downsview East",0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Don Mills North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Downsview Central,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Downsview Northwest,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Downsview West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Emery, Humberlea",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Fairview, Henry Farm, Oriole",0.0,0.0,0.016667,0.0,0.033333,0.0,0.033333,0.016667,0.0,...,0.016667,0.016667,0.0,0.016667,0.016667,0.016667,0.0,0.0,0.016667,0.033333


<b> Print each neighborhood along with top 5 venues

In [25]:
num = 5

for hood in NorthYork_grouped['Neighborhood']:
    print("===="+hood+"====")
    temp = NorthYork_grouped[NorthYork_grouped['Neighborhood']==hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq':2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num),'\n')

====Bathurst Manor, Downsview North, Wilson Heights====
                  venue  freq
0           Coffee Shop  0.12
1           Bridal Shop  0.06
2    Frozen Yogurt Shop  0.06
3   Fried Chicken Joint  0.06
4  Fast Food Restaurant  0.06 

====Bayview Village====
                 venue  freq
0   Chinese Restaurant  0.25
1                 Café  0.25
2                 Bank  0.25
3  Japanese Restaurant  0.25
4    Accessories Store  0.00 

====Bedford Park, Lawrence Manor East====
                  venue  freq
0           Coffee Shop  0.08
1    Italian Restaurant  0.08
2  Fast Food Restaurant  0.08
3           Pizza Place  0.08
4        Sandwich Place  0.04 

====CFB Toronto, Downsview East====
         venue  freq
0      Airport  0.33
1         Park  0.33
2     Bus Stop  0.33
3  Pizza Place  0.00
4    Pet Store  0.00 

====Don Mills North====
                  venue  freq
0  Gym / Fitness Center   0.2
1  Caribbean Restaurant   0.2
2                  Café   0.2
3   Japanese Restaurant   0.2


<b> Put it into Pandas dataframe

In [26]:
# Function to sort the venues in descending order
def return_most_common_venues(row,num_top_venues):
    row_cate = row.iloc[1:]
    row_cate_sorted = row_cate.sort_values(ascending=False)
    
    return row_cate_sorted.index.values[0:num_top_venues]

In [27]:
# Create a new Dataframe and display top 5 venues for each neighborhood
num_top_venues = 5

rank_suffix = ['st','nd','rd']

# Create columns according to number of top venues
col = ['Neighborhood']
for i in range(num_top_venues):
    try:
        col.append('{}{} Most Common Venue'.format(i+1, rank_suffix[i]))
    except:
        col.append('{}th Most Common Venue'.format(i+1))

# Create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns= col)
neighborhoods_venues_sorted['Neighborhood'] = NorthYork_grouped['Neighborhood']

for i in np.arange(NorthYork_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[i,1:] = return_most_common_venues(NorthYork_grouped.iloc[i,:],num_top_venues)

neighborhoods_venues_sorted.head()


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,"Bathurst Manor, Downsview North, Wilson Heights",Coffee Shop,Frozen Yogurt Shop,Pharmacy,Restaurant,Diner
1,Bayview Village,Chinese Restaurant,Café,Bank,Japanese Restaurant,Dog Run
2,"Bedford Park, Lawrence Manor East",Fast Food Restaurant,Pizza Place,Italian Restaurant,Coffee Shop,Greek Restaurant
3,"CFB Toronto, Downsview East",Park,Airport,Bus Stop,Dog Run,Coffee Shop
4,Don Mills North,Japanese Restaurant,Caribbean Restaurant,Gym / Fitness Center,Café,Basketball Court


<b> Cluster Neighborhoods

In [28]:
#Apply k-means to cluster the neighborhood into 5 clusters
kclusters = 5

ny_grouped_clustering = NorthYork_grouped.drop('Neighborhood',1)

# run k-means clustering
kmeans = KMeans(n_clusters = kclusters, random_state=0).fit(ny_grouped_clustering)

#check cluster labels generated for each row in the DF
kmeans.labels_[0:5]

array([3, 3, 3, 1, 3], dtype=int32)

In [29]:
#add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

ny_merged = NorthYork_data

#merge North York_grouped with North York data to add latitude/longitude for each neighborhood
ny_merged = ny_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on=
                          'Neighborhood')



In [46]:
ny_merged.dropna(subset=['Cluster Labels'],inplace = True)
ny_merged['Cluster Labels'] = ny_merged['Cluster Labels'].astype(int)
ny_merged.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M2H,North York,Hillcrest Village,43.803762,-79.363452,3,Golf Course,Dog Run,Pool,Athletics & Sports,Mediterranean Restaurant
1,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556,3,Clothing Store,Fast Food Restaurant,Coffee Shop,Restaurant,Asian Restaurant
2,M2K,North York,Bayview Village,43.786947,-79.385975,3,Chinese Restaurant,Café,Bank,Japanese Restaurant,Dog Run
5,M2N,North York,Willowdale South,43.77012,-79.408493,3,Restaurant,Ramen Restaurant,Sandwich Place,Café,Japanese Restaurant
6,M2P,North York,York Mills West,43.752758,-79.400049,1,Park,Bank,Women's Store,Dog Run,Coffee Shop
7,M2R,North York,Willowdale West,43.782736,-79.442259,3,Pharmacy,Grocery Store,Pizza Place,Coffee Shop,Butcher
8,M3A,North York,Parkwoods,43.753259,-79.329656,1,Park,Food & Drink Shop,Fast Food Restaurant,Women's Store,Discount Store
9,M3B,North York,Don Mills North,43.745906,-79.352188,3,Japanese Restaurant,Caribbean Restaurant,Gym / Fitness Center,Café,Basketball Court
10,M3C,North York,"Flemingdon Park, Don Mills South",43.7259,-79.340923,3,Coffee Shop,Asian Restaurant,Gym,Beer Store,Bike Shop
11,M3H,North York,"Bathurst Manor, Downsview North, Wilson Heights",43.754328,-79.442259,3,Coffee Shop,Frozen Yogurt Shop,Pharmacy,Restaurant,Diner


<b>Visualize the resulting cluster

In [48]:
#create map
map_clusters = folium.Map(location=[latitude,longitude],zoom_start=11)

#set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

#add markers to the map
markers_colors = []
for lat,long,poi, cluster in zip(ny_merged['Latitude'], ny_merged['Longitude'], ny_merged['Neighborhood'], ny_merged['Cluster Labels']):
    label = folium.Popup(str(poi)+ ', Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat,long],
        radius=5,
        popup = label,
        color = rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
map_clusters

<b>Examine Cluster

In [49]:
#Cluster 0
ny_merged.loc[ny_merged['Cluster Labels'] == 0, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
22,North York,0,Pizza Place,Empanada Restaurant,Dog Run,Clothing Store,Coffee Shop


In [50]:
#Cluster 1
ny_merged.loc[ny_merged['Cluster Labels'] == 1, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
6,North York,1,Park,Bank,Women's Store,Dog Run,Coffee Shop
8,North York,1,Park,Food & Drink Shop,Fast Food Restaurant,Women's Store,Discount Store
13,North York,1,Park,Airport,Bus Stop,Dog Run,Coffee Shop
21,North York,1,Park,Construction & Landscaping,Bakery,Basketball Court,Electronics Store


In [51]:
#Cluster 2
ny_merged.loc[ny_merged['Cluster Labels'] == 2, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
14,North York,2,Grocery Store,Moving Target,Bank,Women's Store,Electronics Store
16,North York,2,Liquor Store,Grocery Store,Gym / Fitness Center,Athletics & Sports,Discount Store


In [52]:
#Cluster 3
ny_merged.loc[ny_merged['Cluster Labels'] == 3, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,North York,3,Golf Course,Dog Run,Pool,Athletics & Sports,Mediterranean Restaurant
1,North York,3,Clothing Store,Fast Food Restaurant,Coffee Shop,Restaurant,Asian Restaurant
2,North York,3,Chinese Restaurant,Café,Bank,Japanese Restaurant,Dog Run
5,North York,3,Restaurant,Ramen Restaurant,Sandwich Place,Café,Japanese Restaurant
7,North York,3,Pharmacy,Grocery Store,Pizza Place,Coffee Shop,Butcher
9,North York,3,Japanese Restaurant,Caribbean Restaurant,Gym / Fitness Center,Café,Basketball Court
10,North York,3,Coffee Shop,Asian Restaurant,Gym,Beer Store,Bike Shop
11,North York,3,Coffee Shop,Frozen Yogurt Shop,Pharmacy,Restaurant,Diner
12,North York,3,Coffee Shop,Miscellaneous Shop,Massage Studio,Bar,Dog Run
15,North York,3,Business Service,Food Truck,Baseball Field,Women's Store,Electronics Store


In [53]:
#Cluster 4
ny_merged.loc[ny_merged['Cluster Labels'] == 4, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
23,North York,4,Furniture / Home Store,Baseball Field,Women's Store,Electronics Store,Coffee Shop
