<a href="https://colab.research.google.com/github/prajualpillai/Coursera_Capstone/blob/master/Exploring%20and%20clustering%20near%20Toronto.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import pandas as pd
import numpy as np 
from bs4 import BeautifulSoup as bs
import requests as req
import matplotlib.cm as cm
import matplotlib.colors as colors

# Scraping data from the website

In [0]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
web = req.get(url)

## Checking if data has been scraped from website

In [157]:
web.status_code

200

## Extracting the content of the scraped data

In [158]:
x = web.content
print(x)

b'\n<!DOCTYPE html>\n<html class="client-nojs" lang="en" dir="ltr">\n<head>\n<meta charset="UTF-8"/>\n<title>List of postal codes of Canada: M - Wikipedia</title>\n<script>document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"XrRJvgpAEKcAAI@-EI0AAAAQ","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":955414546,"wgRevisionId":955414546,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Communications in Ontario","Postal codes in Canada","Toronto","Ontario-related 

# Calling beautifulsoup to convert the content into a managable form

In [0]:
soup = bs(x,"lxml")

# Data Preprocessing to extract the features we require

In [0]:
links = soup.find_all("td")
i = []
nn = []
nn1=[]
c=0
for l in links:
    l = str(l)
    if len(l) >= 500: #we know the biggest sentance/adress is aroung 150 words so we do this to avoid useless iterations
        break
    if len(l) == 13:
        i.append(l[4:-6])   # we eliminate the td present at the starting and ending therefore giving us the entire sentance
    else:
        if c==0:
            nn.append(l[4:-6])
            c+=1
        else:
            nn1.append(l[4:-6])
            c=0

# Converting to DataFrame

In [0]:
df1 = pd.DataFrame({
    "Postal Code":i,
    "Borough":nn,
    "Neighborhood":nn1
},columns = ["Postal Code","Borough","Neighborhood"])

## Removing codes which don't have a borough

In [0]:
df2 = df1[df1["Borough"]!="Not assigned"]
df2 = df2.reset_index(drop = True) 

## Checking if any neighborhood value is mssing after initial preprocessing

In [163]:
df2['Neighborhood'].isnull().value_counts()

False    103
Name: Neighborhood, dtype: int64

# Checking shape of final dataframe

In [164]:
df2.shape

(103, 3)

# Try to use geocoders for latitudes and longitudes

In [165]:
!pip install geocoder
!pip install geopy
import geocoder
from geopy.geocoders import Nominatim



### Geocoders were not able to give out locations for majority of the adresses so a decision was made to not use them instead the csv file "Geospatial_Coordinates.csv" is used for our dataframe setup

In [166]:
loca = pd.read_csv("Geospatial_Coordinates.csv")
loca.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [167]:
loca['Postal Code'].isin(df2['Postal Code']).value_counts()

True    103
Name: Postal Code, dtype: int64

# Merging both the datasets into a final dataset

In [0]:
final = pd.merge(df2, loca[["Postal Code","Latitude","Longitude"]], on = 'Postal Code')

In [169]:
final.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


# Exporting the dataframe to csv for further uses

In [0]:
final.to_csv("Wiki_final.csv")

# Checking the shape

In [171]:
final.columns

Index(['Postal Code', 'Borough', 'Neighborhood', 'Latitude', 'Longitude'], dtype='object')

# Selecting boroughs having Toronto in their name

In [0]:
g = -1
col = final.columns
final1 = pd.DataFrame(columns = col)
for f in final["Borough"]:
  g+=1
  if f[-7:] == 'Toronto':
    final1 = final1.append({"Postal Code": final.loc[g,"Postal Code"],
                            "Borough": final.loc[g,"Borough"],
                            "Neighborhood": final.loc[g, "Neighborhood"],
                            "Latitude": final.loc[g, "Latitude"],
                            "Longitude": final.loc[g, "Longitude"]}, ignore_index=True)

In [0]:
import folium

In [0]:
address = 'Ontario, Toronto'
geo = Nominatim(user_agent = 'tr_explorer')
loc = geo.geocode(address)
lat = loc.latitude
long = loc.longitude 

In [0]:
map_tr = folium.Map(location = [lat, long], zoom_start = 10)

# Plotting the boroughs near Toronto

In [0]:
for la,lo,bo,nei in zip(final1['Latitude'],final1['Longitude'],final1['Borough'],final1['Neighborhood']):
  label = '{},{}'.format(nei, bo)
  label = folium.Popup(label, parse_html=True)
  folium.CircleMarker(
        [la, lo],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_tr)

In [178]:
map_tr

# Using foursquare database

In [0]:
c_id = 'EE500GIJJWSAWLLSAJFKXSDXZ1KHSITBDHWYW52RTH4BKKAK'
c_se = 'QM1D2VGHTUF3IN00AYTV25PRY0WOID203BG2Q5JZPXPMADLF'
ver = '20180605'

In [0]:
# type your answer here
radius = 500
limit=100
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(c_id,c_se,lat,long,ver,radius,limit)

In [0]:
result = req.get(url).json()

### Here we use get_category function from foursquare api

In [0]:
def get_category_type(row):
  try:
    cat_lis = row['categories']
  except:
    cat_lis = row['venue.categories']
  if len(cat_lis)==0:
    return None
  else:
    return cat_lis[0]['name']

In [0]:
venue = result['response']['groups'][0]['items']

In [184]:
from pandas.io.json import json_normalize 
n_ven = json_normalize(venue)

  


In [185]:
n_ven.tail()

Unnamed: 0,referralId,reasons.count,reasons.items,venue.id,venue.name,venue.location.lat,venue.location.lng,venue.location.labeledLatLngs,venue.location.distance,venue.location.cc,venue.location.city,venue.location.state,venue.location.country,venue.location.formattedAddress,venue.categories,venue.photos.count,venue.photos.groups,venue.location.address,venue.location.crossStreet,venue.location.postalCode,venue.location.neighborhood,venue.venuePage.id
66,e-0-4ad9ffbbf964a520091d21e3-66,0,"[{'summary': 'This spot is popular', 'type': '...",4ad9ffbbf964a520091d21e3,Jack Astor's Bar & Grill,43.656019,-79.380326,"[{'label': 'display', 'lat': 43.65601939992059...",405,CA,Toronto,ON,Canada,"[10 Dundas St. E (at Yonge St.), Toronto ON M5...","[{'id': '4bf58dd8d48988d1c4941735', 'name': 'R...",0,[],10 Dundas St. E,at Yonge St.,M5B 0A1,,
67,e-0-4ae61cf6f964a520caa421e3-67,0,"[{'summary': 'This spot is popular', 'type': '...",4ae61cf6f964a520caa421e3,Pantages Hotel & Spa,43.654498,-79.379035,"[{'label': 'display', 'lat': 43.65449797039222...",410,CA,Toronto,ON,Canada,"[200 Victoria St (at Shuter St), Toronto ON, C...","[{'id': '4bf58dd8d48988d1fa931735', 'name': 'H...",0,[],200 Victoria St,at Shuter St,,,
68,e-0-4b7aa994f964a5200c362fe3-68,0,"[{'summary': 'This spot is popular', 'type': '...",4b7aa994f964a5200c362fe3,Tim Hortons,43.655212,-79.380063,"[{'label': 'display', 'lat': 43.65521249025681...",366,CA,Toronto,ON,Canada,"[261 Yonge St, Toronto ON M5B 1N8, Canada]","[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",0,[],261 Yonge St,,M5B 1N8,,
69,e-0-4b6a0069f964a520f3c02be3-69,0,"[{'summary': 'This spot is popular', 'type': '...",4b6a0069f964a520f3c02be3,Pantages Lounge & Bar,43.654493,-79.379,"[{'label': 'display', 'lat': 43.65449343126956...",413,CA,Toronto,ON,Canada,"[200 Victoria St. (at Shuter St.), Toronto ON ...","[{'id': '4bf58dd8d48988d11e941735', 'name': 'C...",0,[],200 Victoria St.,at Shuter St.,M5B 1W8,,
70,e-0-4b5219ccf964a520d56727e3-70,0,"[{'summary': 'This spot is popular', 'type': '...",4b5219ccf964a520d56727e3,Tim Hortons,43.65369,-79.378356,"[{'label': 'display', 'lat': 43.6536897, 'lng'...",449,CA,Toronto,ON,Canada,"[30 Bond St (in St. Michael's Hospital), Toron...","[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",0,[],30 Bond St,in St. Michael's Hospital,M5B 1W8,,


In [0]:
n_ven1=[]

### Converting the useful information in respone/groups/0/item to a dataframe

In [0]:
re_col = ['venue.categories','venue.location.lat','venue.location.lng']
n_ven1 = pd.DataFrame(n_ven.loc[:,'venue.name'])
for f in re_col:
  n_ven1[f] = n_ven[f]

In [188]:
n_ven1.head()

Unnamed: 0,venue.name,venue.categories,venue.location.lat,venue.location.lng
0,Downtown Toronto,"[{'id': '4f2a25ac4b909258e854f55f', 'name': 'N...",43.653232,-79.385296
1,Nathan Phillips Square,"[{'id': '4bf58dd8d48988d164941735', 'name': 'P...",43.65227,-79.383516
2,Indigo,"[{'id': '4bf58dd8d48988d114951735', 'name': 'B...",43.653515,-79.380696
3,Chatime 日出茶太,"[{'id': '52e81612bcbc57f1066b7a0c', 'name': 'B...",43.655542,-79.384684
4,Textile Museum of Canada,"[{'id': '4bf58dd8d48988d18f941735', 'name': 'A...",43.654396,-79.3865


In [0]:
n_ven1['venue.categories'] = n_ven1.apply(get_category_type, axis = 1)

In [190]:
n_ven1.head()

Unnamed: 0,venue.name,venue.categories,venue.location.lat,venue.location.lng
0,Downtown Toronto,Neighborhood,43.653232,-79.385296
1,Nathan Phillips Square,Plaza,43.65227,-79.383516
2,Indigo,Bookstore,43.653515,-79.380696
3,Chatime 日出茶太,Bubble Tea Shop,43.655542,-79.384684
4,Textile Museum of Canada,Art Museum,43.654396,-79.3865


# Exploring the neighborhoods using foursquare data 

In [0]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            c_id, 
            c_se, 
            ver, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = req.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
    #print(venues_list)
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [0]:
# type your answer here

t_ven = getNearbyVenues(names=final1['Neighborhood'],
                                   latitudes=final1['Latitude'],
                                   longitudes=final1['Longitude']
                                  )


In [200]:
print(t_ven.shape)
t_ven

(1602, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.654260,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.654260,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.654260,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
3,"Regent Park, Harbourfront",43.654260,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
4,"Regent Park, Harbourfront",43.654260,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
...,...,...,...,...,...,...,...
1597,Business reply mail Processing Centre,43.662744,-79.321558,Jonathan Ashbridge Park,43.664702,-79.319898,Park
1598,Business reply mail Processing Centre,43.662744,-79.321558,Olliffe On Queen,43.664503,-79.324768,Butcher
1599,Business reply mail Processing Centre,43.662744,-79.321558,Greenwood Cigar & Variety,43.664538,-79.325379,Smoke Shop
1600,Business reply mail Processing Centre,43.662744,-79.321558,Revolution Recording,43.662561,-79.326940,Recording Studio


In [201]:
len(set(t_ven['Venue Category']))

235

# Add neighborhood to the oh encoded dataframe and bringing it to the front

In [0]:
t_ven_oh = pd.get_dummies(t_ven[['Venue Category']], prefix ="", prefix_sep="")
t_ven_oh.drop(["Neighborhood"],axis=1,inplace=True)
t_ven_oh['Neighborhood'] = t_ven['Neighborhood'] 

In [0]:
cols = [t_ven_oh.columns[-1]]+ list(t_ven_oh.columns[:-1])
t_ven_oh = t_ven_oh[cols]

In [0]:
t_gr = t_ven_oh.groupby('Neighborhood').mean().reset_index()

# Finding the top 5 commom venues in a neighborhood

In [0]:
num_top_venues = 5

for hood in t_gr['Neighborhood']:
    print("----"+hood+"----")
    temp = t_gr[t_gr['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

# Find top 10 common venues in a neighborhood and convert it to a Dataframe

In [0]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [213]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = t_gr['Neighborhood']

for ind in np.arange(t_gr.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(t_gr.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Beer Bar,Café,Restaurant,Cheese Shop,Seafood Restaurant,Bakery,Jazz Club,Hotel
1,"Brockton, Parkdale Village, Exhibition Place",Café,Nightclub,Breakfast Spot,Coffee Shop,Pet Store,Stadium,Burrito Place,Restaurant,Climbing Gym,Yoga Studio
2,Business reply mail Processing Centre,Light Rail Station,Smoke Shop,Recording Studio,Fast Food Restaurant,Farmers Market,Auto Workshop,Burrito Place,Restaurant,Pizza Place,Brewery
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Service,Airport Lounge,Airport Terminal,Coffee Shop,Harbor / Marina,Plane,Rental Car Location,Sculpture Garden,Boutique,Boat or Ferry
4,Central Bay Street,Coffee Shop,Italian Restaurant,Sandwich Place,Café,Burger Joint,Bar,Ice Cream Shop,Japanese Restaurant,Salad Place,Bubble Tea Shop


# Clustering

In [218]:
# set number of clusters
kclusters = 5

t_gr_clustering = t_gr.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(t_gr_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

# add clustering labels
#neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

t_merged = final1

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
t_merged = t_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

t_merged.head() # check the last columns!

# create map
map_clusters = folium.Map(location=[lat, long], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(t_merged['Latitude'], t_merged['Longitude'], t_merged['Neighborhood'], t_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Exploring clusters

In [224]:
t_merged.loc[t_merged['Cluster Labels'] == 0, t_merged.columns[[1] + list(range(5, t_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,0,Coffee Shop,Bakery,Park,Breakfast Spot,Café,Pub,Theater,Yoga Studio,Mexican Restaurant,Shoe Store
1,Downtown Toronto,0,Coffee Shop,College Cafeteria,Sushi Restaurant,Yoga Studio,Burrito Place,Bar,Beer Bar,Smoothie Shop,Italian Restaurant,Sandwich Place
2,Downtown Toronto,0,Clothing Store,Coffee Shop,Café,Cosmetics Shop,Bubble Tea Shop,Japanese Restaurant,Italian Restaurant,Restaurant,Middle Eastern Restaurant,Tea Room
3,Downtown Toronto,0,Café,Coffee Shop,Cocktail Bar,American Restaurant,Gastropub,Italian Restaurant,Restaurant,Department Store,Clothing Store,Theater
4,East Toronto,0,Trail,Pub,Health Food Store,Distribution Center,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Yoga Studio
5,Downtown Toronto,0,Coffee Shop,Cocktail Bar,Beer Bar,Café,Restaurant,Cheese Shop,Seafood Restaurant,Bakery,Jazz Club,Hotel
6,Downtown Toronto,0,Coffee Shop,Italian Restaurant,Sandwich Place,Café,Burger Joint,Bar,Ice Cream Shop,Japanese Restaurant,Salad Place,Bubble Tea Shop
7,Downtown Toronto,0,Grocery Store,Café,Park,Baby Store,Restaurant,Coffee Shop,Candy Store,Nightclub,Diner,Italian Restaurant
8,Downtown Toronto,0,Coffee Shop,Café,Restaurant,Clothing Store,Deli / Bodega,Hotel,Thai Restaurant,Gym,Salad Place,Sushi Restaurant
9,West Toronto,0,Bakery,Pharmacy,Bank,Bar,Café,Pool,Bus Stop,Supermarket,Middle Eastern Restaurant,Music Venue


In [220]:
t_merged.loc[t_merged['Cluster Labels'] == 1, t_merged.columns[[1] + list(range(5, t_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Central Toronto,1,Park,Swim School,Bus Line,Yoga Studio,Deli / Bodega,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run


In [221]:
t_merged.loc[t_merged['Cluster Labels'] == 2, t_merged.columns[[1] + list(range(5, t_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
29,Central Toronto,2,Park,Gym,Tennis Court,Trail,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run,Cupcake Shop,Distribution Center
33,Downtown Toronto,2,Park,Playground,Trail,Yoga Studio,Cupcake Shop,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run,Distribution Center


In [222]:
t_merged.loc[t_merged['Cluster Labels'] == 3, t_merged.columns[[1] + list(range(5, t_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,Central Toronto,3,Park,Jewelry Store,Trail,Sushi Restaurant,Dance Studio,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run


In [223]:
t_merged.loc[t_merged['Cluster Labels'] == 4, t_merged.columns[[1] + list(range(5, t_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,Central Toronto,4,Home Service,Garden,Yoga Studio,Dance Studio,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run
