# Capstone Project Week 3 Assignment

## 1. Transform Wikipedia page data into dataframe

We start by using read_html function to read the Wiki page data unfiltered:

In [4]:
from IPython.display import display_html
import pandas as pd
unfiltered_df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
unfiltered_df.columns = ['PostalCode', 'Borough','Neighborhood']
unfiltered_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


Then drop the rows in which Borough equals Not assigned:

In [12]:
indexNames = unfiltered_df[ unfiltered_df['Borough'] == 'Not assigned' ].index
unfiltered_df.drop(indexNames , inplace=True)
unfiltered_df = unfiltered_df.reset_index(drop=True)
unfiltered_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor


Then replace the Not assigned value for Queen's Park neighborhood:

In [15]:
unfiltered_df.loc[(unfiltered_df.Neighborhood == 'Not assigned'),'Neighborhood']="Queen's Park"
unfiltered_df.loc[(unfiltered_df.Neighborhood == "Queen's Park")]

Unnamed: 0,PostalCode,Borough,Neighborhood
5,M7A,Downtown Toronto,Queen's Park
6,M9A,Queen's Park,Queen's Park


Then we group our Neighborhoods by PostalCode

In [17]:
unfiltered_df = unfiltered_df.groupby('PostalCode', as_index=False).agg(lambda x : ','.join(set(x)))
unfiltered_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern,Rouge"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Morningside,Guildwood,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


Then we filter the PostalCodes required by the assignment and order them accordingly:

In [19]:
filtered_df = unfiltered_df[unfiltered_df['PostalCode'].isin(['M5G', 'M2H','M4B','M1J','M4G','M4M','M1R','M9V','M9L','M5V','M1B','M5A']) ]
filtered_df = filtered_df.reset_index(drop=True)
filtered_df = filtered_df.loc[[8, 3,4,1,5,6,2,11,10,9,0,7], :].reset_index(drop=True)
filtered_df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M5G,Downtown Toronto,Central Bay Street
1,M2H,North York,Hillcrest Village
2,M4B,East York,"Woodbine Gardens,Parkview Hill"
3,M1J,Scarborough,Scarborough Village
4,M4G,East York,Leaside
5,M4M,East Toronto,Studio District
6,M1R,Scarborough,"Wexford,Maryvale"
7,M9V,Etobicoke,"Mount Olive,South Steeles,Thistletown,Silverst..."
8,M9L,North York,Humber Summit
9,M5V,Downtown Toronto,"South Niagara,Bathurst Quay,King and Spadina,R..."


## 2. Add latitude and longitude

We assign the latitude and longitude values as provided on the csv file:

In [41]:
filtered_df = filtered_df.assign(Latitude = ['43.6579524', '43.8037622', '43.7063972', '43.7447342','43.7090604','43.6595255','43.7500715','43.7394164','43.7563033','43.6289467','43.8066863','43.6542599'])
filtered_df = filtered_df.assign(Longitude = ['-79.3873826','-79.3634517','-79.309937', '-79.2394761','-79.3634517','-79.340923','-79.2958491','-79.5884369','-79.5659633','-79.3944199','-79.1943534','-79.3606359'])
neighborhoods = filtered_df
neighborhoods[["Latitude", "Longitude"]] = neighborhoods[["Latitude", "Longitude"]].apply(pd.to_numeric)
neighborhoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
1,M2H,North York,Hillcrest Village,43.803762,-79.363452
2,M4B,East York,"Woodbine Gardens,Parkview Hill",43.706397,-79.309937
3,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
4,M4G,East York,Leaside,43.70906,-79.363452


Let's use shape function to get the number of rows as required by assignment:

In [42]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)

The dataframe has 6 boroughs and 12 neighborhoods.


## 3. Cluster the neighborhoods in Toronto

We use geocoder to find the coordinates of Toronto

In [43]:
from geopy.geocoders import Nominatim 

address = 'Toronto, ON'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

lat_lng_coords = None

The geograpical coordinate of Toronto are 43.653963, -79.387207.


Let's create a map using our previously defined dataframe:

In [44]:
import folium 
map_tor = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
       [lat, lng],
       radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
    parse_html=False).add_to(map_tor)  
    

map_tor

Let's define our Foursquare credentials:

In [45]:
CLIENT_ID = 'AKQWLMLUSZBWCRKIHFEJO4SMAFPS0T1RQEVTWU4GO4MJUWCM' # your Foursquare ID
CLIENT_SECRET = 'A5VVSAA4PPYBKR0ZT0ICAUVHIUJ3LJZSLD0XIBELGP31U02M' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: AKQWLMLUSZBWCRKIHFEJO4SMAFPS0T1RQEVTWU4GO4MJUWCM
CLIENT_SECRET:A5VVSAA4PPYBKR0ZT0ICAUVHIUJ3LJZSLD0XIBELGP31U02M


Let's explore the first neighborhood in our dataset:

In [48]:
neighborhoods.loc[0, 'Neighborhood']
neighborhood_latitude = neighborhoods.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = neighborhoods.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = neighborhoods.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))


Latitude and longitude values of Central Bay Street are 43.6579524, -79.3873826.


Let's get the top 100 venues from Central Bay Street:

In [86]:
# type your answer here
LIMIT = 100
radius = 500 # define radius
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
#url # display URL
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
results = requests.get(url).json()
#results
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Let's get  a list of nearby venues:

In [87]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Jimmy's Coffee,Coffee Shop,43.658421,-79.385613
1,Tim Hortons,Coffee Shop,43.65857,-79.385123
2,Hailed Coffee,Coffee Shop,43.658833,-79.383684
3,The Elm Tree Restaurant,Modern European Restaurant,43.657397,-79.383761
4,College Park Area,Park,43.659751,-79.384911


In [88]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

86 venues were returned by Foursquare.


Now lets do the same for all neighborhoods in our dataframe:

In [55]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [56]:
toronto_venues = getNearbyVenues(names=neighborhoods['Neighborhood'],
                                   latitudes=neighborhoods['Latitude'],
                                   longitudes=neighborhoods['Longitude']
                                  )

Central Bay Street
Hillcrest Village
Woodbine Gardens,Parkview Hill
Scarborough Village
Leaside
Studio District
Wexford,Maryvale
Mount Olive,South Steeles,Thistletown,Silverstone,Beaumond Heights,Albion Gardens,Humbergate,Jamestown
Humber Summit
South Niagara,Bathurst Quay,King and Spadina,Railway Lands,CN Tower,Harbourfront West,Island airport
Malvern,Rouge
Harbourfront


In [57]:
print(toronto_venues.shape)
toronto_venues.head()

(256, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Central Bay Street,43.657952,-79.387383,Jimmy's Coffee,43.658421,-79.385613,Coffee Shop
1,Central Bay Street,43.657952,-79.387383,Tim Hortons,43.65857,-79.385123,Coffee Shop
2,Central Bay Street,43.657952,-79.387383,Hailed Coffee,43.658833,-79.383684,Coffee Shop
3,Central Bay Street,43.657952,-79.387383,The Elm Tree Restaurant,43.657397,-79.383761,Modern European Restaurant
4,Central Bay Street,43.657952,-79.387383,College Park Area,43.659751,-79.384911,Park


In [64]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Stationery Store,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Thrift / Vintage Store,Vegetarian / Vegan Restaurant,Wine Bar
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [66]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Stationery Store,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Thrift / Vintage Store,Vegetarian / Vegan Restaurant,Wine Bar
0,Central Bay Street,0.011628,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011628,...,0.0,0.011628,0.0,0.011628,0.011628,0.011628,0.0,0.0,0.011628,0.011628
1,Harbourfront,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0
2,Hillcrest Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Humber Summit,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Leaside,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.03125,0.03125,0.0,0.0,0.0,0.0,0.0,0.0
5,"Malvern,Rouge",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Mount Olive,South Steeles,Thistletown,Silverst...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Scarborough Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"South Niagara,Bathurst Quay,King and Spadina,R...",0.0,0.0,0.066667,0.066667,0.066667,0.133333,0.133333,0.133333,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Studio District,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04878,...,0.02439,0.0,0.0,0.0,0.0,0.02439,0.0,0.02439,0.0,0.02439


In [67]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Central Bay Street----
                venue  freq
0         Coffee Shop  0.15
1                Café  0.07
2  Italian Restaurant  0.05
3      Sandwich Place  0.03
4        Burger Joint  0.03


----Harbourfront----
         venue  freq
0  Coffee Shop  0.18
1         Park  0.07
2       Bakery  0.07
3          Pub  0.07
4         Café  0.04


----Hillcrest Village----
                      venue  freq
0               Golf Course  0.25
1                      Pool  0.25
2  Mediterranean Restaurant  0.25
3                   Dog Run  0.25
4              Liquor Store  0.00


----Humber Summit----
                       venue  freq
0        Empanada Restaurant   0.5
1                Pizza Place   0.5
2                Yoga Studio   0.0
3  Latin American Restaurant   0.0
4                  Pet Store   0.0


----Leaside----
                    venue  freq
0             Coffee Shop  0.09
1     Sporting Goods Shop  0.09
2  Furniture / Home Store  0.06
3            Burger Joint  0.06
4           

In [68]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [73]:
import numpy as np
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Bay Street,Coffee Shop,Café,Italian Restaurant,Burger Joint,Sandwich Place,Ice Cream Shop,Salad Place,Bakery,Bar,Juice Bar
1,Harbourfront,Coffee Shop,Pub,Bakery,Park,Breakfast Spot,Café,Mexican Restaurant,Cosmetics Shop,Brewery,Chocolate Shop
2,Hillcrest Village,Golf Course,Pool,Mediterranean Restaurant,Dog Run,Diner,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Comic Shop
3,Humber Summit,Pizza Place,Empanada Restaurant,Discount Store,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Comic Shop,Convenience Store,Cosmetics Shop
4,Leaside,Sporting Goods Shop,Coffee Shop,Furniture / Home Store,Burger Joint,Breakfast Spot,Restaurant,Bank,Beer Store,Bike Shop,Pet Store


In [76]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 1, 4, 0, 3, 0, 2, 0, 0], dtype=int32)

In [79]:
# add clustering labels
#neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = neighborhoods

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0,Coffee Shop,Café,Italian Restaurant,Burger Joint,Sandwich Place,Ice Cream Shop,Salad Place,Bakery,Bar,Juice Bar
1,M2H,North York,Hillcrest Village,43.803762,-79.363452,1,Golf Course,Pool,Mediterranean Restaurant,Dog Run,Diner,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Comic Shop
2,M4B,East York,"Woodbine Gardens,Parkview Hill",43.706397,-79.309937,0,Fast Food Restaurant,Pizza Place,Pharmacy,Athletics & Sports,Gym / Fitness Center,Intersection,Pet Store,Café,Bank,Gastropub
3,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,2,Playground,Spa,Wine Bar,Discount Store,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Comic Shop,Convenience Store
4,M4G,East York,Leaside,43.70906,-79.363452,0,Sporting Goods Shop,Coffee Shop,Furniture / Home Store,Burger Joint,Breakfast Spot,Restaurant,Bank,Beer Store,Bike Shop,Pet Store


In [80]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [82]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,0,Coffee Shop,Café,Italian Restaurant,Burger Joint,Sandwich Place,Ice Cream Shop,Salad Place,Bakery,Bar,Juice Bar
2,East York,0,Fast Food Restaurant,Pizza Place,Pharmacy,Athletics & Sports,Gym / Fitness Center,Intersection,Pet Store,Café,Bank,Gastropub
4,East York,0,Sporting Goods Shop,Coffee Shop,Furniture / Home Store,Burger Joint,Breakfast Spot,Restaurant,Bank,Beer Store,Bike Shop,Pet Store
5,East Toronto,0,Café,Coffee Shop,American Restaurant,Bakery,Italian Restaurant,Gastropub,Wine Bar,Diner,Middle Eastern Restaurant,Latin American Restaurant
6,Scarborough,0,Shopping Mall,Breakfast Spot,Smoke Shop,Sandwich Place,Auto Garage,Bakery,Middle Eastern Restaurant,Accessories Store,Comic Shop,Comfort Food Restaurant
7,Etobicoke,0,Grocery Store,Pharmacy,Beer Store,Fast Food Restaurant,Pizza Place,Sandwich Place,Fried Chicken Joint,Airport Service,Dog Run,Coffee Shop
9,Downtown Toronto,0,Airport Terminal,Airport Lounge,Airport Service,Bar,Boutique,Boat or Ferry,Sculpture Garden,Harbor / Marina,Coffee Shop,Airport
11,Downtown Toronto,0,Coffee Shop,Pub,Bakery,Park,Breakfast Spot,Café,Mexican Restaurant,Cosmetics Shop,Brewery,Chocolate Shop


In [83]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,1,Golf Course,Pool,Mediterranean Restaurant,Dog Run,Diner,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Comic Shop


In [84]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Scarborough,2,Playground,Spa,Wine Bar,Discount Store,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Comic Shop,Convenience Store
