In [None]:
# Import library to process data into pandas DataFrame
import pandas as pd
# Transform JSON file into a pandas dataframe
from pandas.io.json import json_normalize

In [None]:
!pip install lxml

In [None]:
# import libraries for accessing website url
import requests

In [None]:
# Import library for webscrapping
from bs4 import BeautifulSoup
from urllib.request import urlopen

In [None]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

In [None]:
import numpy as np

In [None]:
import json


In [None]:
import folium

In [None]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
# import k-means from clustering stage
from sklearn.cluster import KMeans

In [None]:
# identify the website
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
html = urlopen(url)
soup = BeautifulSoup(html, 'html.parser')

In [None]:
soup

In [None]:
table_contents=[]
table=soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace('/',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

# print(table_contents)
df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business','EtobicokeNorthwest':'Etobicoke Northwest',
'East YorkEast Toronto':'East York/East Toronto','MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

In [None]:
df.head(10)

In [None]:
print('\nThe dataframe has {} boroughs and {} neighborhoods,'.format(
    len(df['Borough'].unique()),
      df.shape[0]
     )
)

In [None]:
# Postal code and geographical coordinates .csv file made available
df_geo = pd.read_csv("Geospatial_data.csv", delimiter = ",")
df_geo.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)
df_geo.head(10)

In [None]:
# Merging the DataFrame and location coordinates
df = pd.merge(df, df_geo, on='PostalCode')
df.head(10)

# To Explore and Cluster Toronto Neighborhoods

### Using the geopy library to get the latitude and longitude values of Toronto City

In [None]:
address = 'Toronto, Ontario'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto City are {}, {}.'.format(latitude, longitude))

In [None]:
# create map of Toronto City using latitude and longitude values
map_toronto = folium.Map(location=[latitude,longitude], zoom_start=10)
# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto
)
map_toronto

### Define Foursquare Credentials and Version

In [None]:
CLIENT_ID = '0C2BD04A2V1DGSC4FXV5GIU01V2WURJBZ2XM044H3IGYJVRX' # My Foursquare ID
CLIENT_SECRET = 'ILVXI2IA2FRQ5V2RKVBSEFDAG2NAAJNEXYODYFMGIZN1OXYJ' # My Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET: ' + CLIENT_SECRET)

In [None]:
# What are the neighborhoods of Downtown Toronto?
downtown_data = df[df['Borough'] == "Downtown Toronto"].reset_index(drop=True)
downtown_data.head(10)

In [None]:
# What is the first neighborhood of Downtown Toronto?
downtown_data.loc[0, 'Neighborhood']

In [None]:
# What are the latitude and longitude values for Downtown, Toronto, Ontario?
address = 'Downtown Toronto, Toronto, Ontario'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Downtown Toronto are {}, {}.'.format(latitude, longitude))

In [None]:
# What are the latitude and longitude values for Regent Park, Harbourfront, Toronto, Ontario?
neighborhood_latitude = downtown_data.loc[0,'Latitude'] # neighborhood latitude value
neighborhood_longitude = downtown_data.loc[0, 'Longitude'] # neighborhood longitude value
neighborhood_name = downtown_data.loc[0, 'Neighborhood'] # neighborhood name
print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name,neighborhood_latitude,
neighborhood_longitude))

In [None]:
# The top 100 venues that are in Regent Park, Harbourfront within a radius of 300 meters.
# Limit the number of venues returned.
LIMIT = 100
# Define the radius explored
radius = 300
# First, we create the GET request URL, which we name 'url'.
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID,
    CLIENT_SECRET,
    VERSION,
    neighborhood_latitude,
    neighborhood_longitude,
    radius,
    LIMIT)
url
# display URL

In [None]:
# Send the request and examine the results
results = requests.get(url).json()
results

From the Foursquare lab in the previous module, we know that all the
information is in the items key. Before proceeding, borrow the
get_category_type function from the Foursquare lab.

In [None]:
# The function that extracts the category of the venue...

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']


In [None]:
# clean the json and structure it into a pandas dataframe.
venues = results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues) # flatten JSON
# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]
# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)
# clean columns
nearby_venues.columns = [col.split(".")[-1]
for col in nearby_venues.columns]
nearby_venues.head(7)

In [None]:
# The number of venues returned by Foursquare:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

### To create a function that repeats the same process to all the neighborhoods in Downtown, Toronto.

In [143]:
def getNearbyVenues(names, latitudes, longitudes, radius=300):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [144]:
downtown_venues = getNearbyVenues(names=downtown_data['Neighborhood'],
                                   latitudes=downtown_data['Latitude'],
                                   longitudes=downtown_data['Longitude']
                                  )

Regent Park , Harbourfront
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Christie
Richmond , Adelaide , King
Harbourfront East , Union Station , Toronto Islands
Toronto Dominion Centre , Design Exchange
Commerce Court , Victoria Hotel
University of Toronto , Harbord
Kensington Market , Chinatown , Grange Park
CN Tower , King and Spadina , Railway Lands , Harbourfront West , Bathurst Quay , South Niagara , Island airport
Rosedale
St. James Town , Cabbagetown
First Canadian Place , Underground city
Church and Wellesley


In [146]:
# What is the resulting dataframe?
print(downtown_venues.shape)
#What does it contain, for exmple?
downtown_venues.head()

(610, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park , Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
1,"Regent Park , Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
2,"Regent Park , Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
3,"Regent Park , Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
4,"Regent Park , Harbourfront",43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot


In [147]:
# How many venues were returned for each neighborhood?
downtown_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,6,6,6,6,6,6
"CN Tower , King and Spadina , Railway Lands , Harbourfront West , Bathurst Quay , South Niagara , Island airport",5,5,5,5,5,5
Central Bay Street,34,34,34,34,34,34
Christie,7,7,7,7,7,7
Church and Wellesley,50,50,50,50,50,50
"Commerce Court , Victoria Hotel",79,79,79,79,79,79
"First Canadian Place , Underground city",75,75,75,75,75,75
"Garden District, Ryerson",51,51,51,51,51,51
"Harbourfront East , Union Station , Toronto Islands",54,54,54,54,54,54
"Kensington Market , Chinatown , Grange Park",59,59,59,59,59,59


In [148]:
# Unique categories that can be curated from all the venues returned...
print('There are {} uniques categories.'.format(len(downtown_venues['Venue Category'].unique())))

There are 142 uniques categories.


In [149]:
# To analyze each neighborhood...
# one hot encoding
downtown_onehot = pd.get_dummies(downtown_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
downtown_onehot['Neighborhood'] = downtown_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [downtown_onehot.columns[-1]] + list(downtown_onehot.columns[:-1])
downtown_onehot = downtown_onehot[fixed_columns]

downtown_onehot.head()

Unnamed: 0,Yoga Studio,Adult Boutique,Airport Food Court,Airport Gate,Airport Lounge,Airport Terminal,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,...,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [150]:
# Closer examiniation of the dataframe size
downtown_onehot.shape

(610, 142)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category


In [151]:
downtown_grouped = downtown_onehot.groupby('Neighborhood').mean().reset_index()
downtown_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Adult Boutique,Airport Food Court,Airport Gate,Airport Lounge,Airport Terminal,American Restaurant,Art Gallery,Arts & Crafts Store,...,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"CN Tower , King and Spadina , Railway Lands , ...",0.0,0.0,0.2,0.2,0.2,0.2,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Church and Wellesley,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02
5,"Commerce Court , Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.025316,0.012658,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0
6,"First Canadian Place , Underground city",0.0,0.0,0.0,0.0,0.0,0.0,0.026667,0.013333,0.0,...,0.013333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013333,0.0
7,"Garden District, Ryerson",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.019608,0.019608,0.019608,0.0,0.0,0.0,0.019608,0.019608,0.0,0.0
8,"Harbourfront East , Union Station , Toronto Is...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018519,0.0,...,0.018519,0.018519,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Kensington Market , Chinatown , Grange Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016949,...,0.033898,0.016949,0.0,0.0,0.016949,0.033898,0.0,0.033898,0.016949,0.0


Confirm the new size

In [152]:
downtown_grouped.shape

(17, 142)

In [153]:
num_top_venues = 5

for hood in downtown_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = downtown_grouped[downtown_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
            venue  freq
0             Spa  0.17
1      Restaurant  0.17
2    Liquor Store  0.17
3        Beer Bar  0.17
4  Sandwich Place  0.17


----CN Tower , King and Spadina , Railway Lands , Harbourfront West , Bathurst Quay , South Niagara , Island airport----
                venue  freq
0  Airport Food Court   0.2
1        Airport Gate   0.2
2      Airport Lounge   0.2
3    Airport Terminal   0.2
4         Coffee Shop   0.2


----Central Bay Street----
            venue  freq
0     Coffee Shop  0.35
1            Café  0.12
2  Sandwich Place  0.09
3        Pharmacy  0.06
4     Pizza Place  0.06


----Christie----
                 venue  freq
0        Grocery Store  0.29
1          Flower Shop  0.14
2  American Restaurant  0.14
3          Coffee Shop  0.14
4                 Café  0.14


----Church and Wellesley----
                 venue  freq
0              Gay Bar  0.08
1       Sandwich Place  0.06
2  Japanese Restaurant  0.06
3         Dessert Shop  0.04
4  

## To put the information above into a pandas dataframework

### First, Write a function to sort the venues in descending order.

In [154]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [155]:
# Create the dtaframe and display the top 10 venues
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = downtown_grouped['Neighborhood']

for ind in np.arange(downtown_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(downtown_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Spa,Restaurant,Liquor Store,Beer Bar,Sandwich Place,Breakfast Spot,Music Venue,Museum,Movie Theater,New American Restaurant
1,"CN Tower , King and Spadina , Railway Lands , ...",Airport Food Court,Airport Gate,Airport Lounge,Airport Terminal,Coffee Shop,Yoga Studio,Music Venue,Middle Eastern Restaurant,Modern European Restaurant,Molecular Gastronomy Restaurant
2,Central Bay Street,Coffee Shop,Café,Sandwich Place,Pharmacy,Pizza Place,Restaurant,Italian Restaurant,Bookstore,Bank,Farmers Market
3,Christie,Grocery Store,Flower Shop,American Restaurant,Coffee Shop,Café,Japanese Restaurant,Yoga Studio,Movie Theater,Middle Eastern Restaurant,Modern European Restaurant
4,Church and Wellesley,Gay Bar,Sandwich Place,Japanese Restaurant,Dessert Shop,Burrito Place,Mexican Restaurant,Coffee Shop,Gym,Diner,Escape Room


# Step 4. Clustering Neighborhoods

### To run k-means to cluster the neighborhood into 5 clusters

In [160]:
# set number of clusters
kclusters = 5

downtown_grouped_clustering = downtown_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(downtown_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([4, 1, 0, 3, 0, 0, 0, 0, 0, 0])

## To create a new dataframe that includes the cluster as well as the top venues for each neighborhood

In [161]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

downtown_merged = downtown_data

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
downtown_merged = downtown_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

downtown_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636,0,Spa,Coffee Shop,Park,Bus Stop,Food Truck,Furniture / Home Store,Bakery,Dog Run,Breakfast Spot,Distribution Center
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Coffee Shop,Café,Middle Eastern Restaurant,Sandwich Place,Hotel,Pizza Place,Clothing Store,Bar,Ramen Restaurant,College Rec Center
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Gastropub,Coffee Shop,Restaurant,Italian Restaurant,Japanese Restaurant,Gym,Cosmetics Shop,Salon / Barbershop,Café,Molecular Gastronomy Restaurant
3,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,4,Spa,Restaurant,Liquor Store,Beer Bar,Sandwich Place,Breakfast Spot,Music Venue,Museum,Movie Theater,New American Restaurant
4,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0,Coffee Shop,Café,Sandwich Place,Pharmacy,Pizza Place,Restaurant,Italian Restaurant,Bookstore,Bank,Farmers Market


In [162]:
# To visualize the resulting clusters...
# create the map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(downtown_merged['Latitude'], downtown_merged['Longitude'], downtown_merged['Neighborhood'], downtown_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

 ### Examining the clusters to see the discriminating venue category that distinguishes each cluster. 

In [171]:
# Cluster 1
downtown_merged.loc[downtown_merged['Cluster Labels'] == 1, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Downtown Toronto,1,Airport Food Court,Airport Gate,Airport Lounge,Airport Terminal,Coffee Shop,Yoga Studio,Music Venue,Middle Eastern Restaurant,Modern European Restaurant,Molecular Gastronomy Restaurant


In [175]:
# Examining the clusters to see the discriminating venue category that distinguishes each cluster. 
downtown_merged.loc[downtown_merged['Cluster Labels'] == 2, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,Downtown Toronto,2,Park,Movie Theater,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Museum,Lounge


In [167]:
# Examining the clusters to see the discriminating venue category that distinguishes each cluster. 
downtown_merged.loc[downtown_merged['Cluster Labels'] == 3, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Downtown Toronto,3,Grocery Store,Flower Shop,American Restaurant,Coffee Shop,Café,Japanese Restaurant,Yoga Studio,Movie Theater,Middle Eastern Restaurant,Modern European Restaurant


In [168]:
# Examining the clusters to see the discriminating venue category that distinguishes each cluster. 
downtown_merged.loc[downtown_merged['Cluster Labels'] == 4, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Downtown Toronto,4,Spa,Restaurant,Liquor Store,Beer Bar,Sandwich Place,Breakfast Spot,Music Venue,Museum,Movie Theater,New American Restaurant


In [174]:
# Examining the clusters to see the discriminating venue category that distinguishes each cluster. 
downtown_merged.loc[downtown_merged['Cluster Labels'] == 6, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
