In [68]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
#!conda install -c conda-forge folium --yes
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors

I am using BeautifulSoup to parse the HTML and using a dictionary with the postal codes as keys to handle the situation where we can have multiple neighborhoods per postal code. I then create a dataframe from the dictionary.

In [3]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source, 'lxml')
table = soup.find('table', class_='wikitable sortable')

dict = {}

for row in table.find_all('tr')[1:]:
    postal, borough, hood = ('','','')
    for i, column in enumerate(row.find_all('td')):
        if i == 0:
            postal = column.text
        elif i == 1:
            borough = column.text
        elif i == 2:
            hood = column.text.strip('\n')
            if hood =='Not assigned':
                hood = borough
    if borough != 'Not assigned':
        if not postal in dict.keys():
            dict[postal] = [borough, hood]
        else:
            dict[postal] = [borough, hood + ", " + dict[postal][1]]

neighborhoods = pd.DataFrame(columns=['PostalCode', 'Borough', 'Neighborhood'])

for key in dict:
    neighborhoods = neighborhoods.append({'PostalCode': key, 'Borough': dict[key][0], 'Neighborhood': dict[key][1]}, ignore_index = True)
neighborhoods.head(10)

    

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Queen's Park
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills North
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [4]:
neighborhoods.shape

(103, 3)

In [5]:
lat_lng = pd.read_csv('http://cocl.us/Geospatial_data')

In [6]:
lat_lng.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [7]:
hood_Toronto = neighborhoods.merge(lat_lng, left_on = 'PostalCode', right_on='Postal Code')

In [8]:
hood_Toronto.drop(['Postal Code'], axis=1, inplace=True)

The dataframe hood_Toronto is the merge of the postal codes for each neighborhood with the latitude and longitude

In [9]:
hood_Toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494


In [75]:
# The code was removed by Watson Studio for sharing.

The following function, getNearbyVenues, performs the venue explore query for each of the postal codes in the dataframe

In [35]:
LIMIT = 100
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [36]:
venues_Toronto = getNearbyVenues(hood_Toronto['PostalCode'], hood_Toronto['Latitude'], hood_Toronto['Longitude'], 1000)

M3A
M4A
M5A
M6A
M7A
M9A
M1B
M3B
M4B
M5B
M6B
M9B
M1C
M3C
M4C
M5C
M6C
M9C
M1E
M4E
M5E
M6E
M1G
M4G
M5G
M6G
M1H
M2H
M3H
M4H
M5H
M6H
M1J
M2J
M3J
M4J
M5J
M6J
M1K
M2K
M3K
M4K
M5K
M6K
M1L
M2L
M3L
M4L
M5L
M6L
M9L
M1M
M2M
M3M
M4M
M5M
M6M
M9M
M1N
M2N
M3N
M4N
M5N
M6N
M9N
M1P
M2P
M4P
M5P
M6P
M9P
M1R
M2R
M4R
M5R
M6R
M7R
M9R
M1S
M4S
M5S
M6S
M1T
M4T
M5T
M1V
M4V
M5V
M8V
M9V
M1W
M4W
M5W
M8W
M9W
M1X
M4X
M5X
M8X
M4Y
M7Y
M8Y
M8Z


In [37]:
venues_Toronto.rename(columns={'Neighborhood': 'Postal Code'}, inplace=True)
venues_Toronto.head()

Unnamed: 0,Postal Code,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M3A,43.753259,-79.329656,Allwyn's Bakery,43.75984,-79.324719,Caribbean Restaurant
1,M3A,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
2,M3A,43.753259,-79.329656,A&W Canada,43.760643,-79.326865,Fast Food Restaurant
3,M3A,43.753259,-79.329656,Tim Hortons,43.760668,-79.326368,Café
4,M3A,43.753259,-79.329656,Bruno's valu-mart,43.746143,-79.32463,Grocery Store


In [39]:
venues_Toronto.groupby('Postal Code').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
M1B,16,16,16,16,16,16
M1C,5,5,5,5,5,5
M1E,23,23,23,23,23,23
M1G,8,8,8,8,8,8
M1H,29,29,29,29,29,29
M1J,12,12,12,12,12,12
M1K,26,26,26,26,26,26
M1L,31,31,31,31,31,31
M1M,12,12,12,12,12,12
M1N,11,11,11,11,11,11


In [40]:
print('There are {} unique categories.'.format(len(venues_Toronto['Venue Category'].unique())))

There are 336 unique categories.


We convert the venue categories into separate columns in a new datafram 

In [57]:
venues_onehot = pd.get_dummies(venues_Toronto[['Venue Category']], prefix="", prefix_sep="")
venues_onehot.head()

Unnamed: 0,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,...,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [58]:
venues_onehot['Postal Code'] = venues_Toronto['Postal Code']
venues_onehot.head()

Unnamed: 0,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,...,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo,Postal Code
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,M3A
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,M3A
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,M3A
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,M3A
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,M3A


In [59]:
fixed_columns = [venues_onehot.columns[-1]] + list(venues_onehot.columns[0:-1])
venues_onehot = venues_onehot[fixed_columns]
venues_onehot.head()

Unnamed: 0,Postal Code,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,...,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,M3A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M3A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M3A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M3A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M3A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


We calculate the frequency of each venue category for each postal code 

In [60]:
venues_grouped = venues_onehot.groupby('Postal Code').mean().reset_index()
venues_grouped.head()

Unnamed: 0,Postal Code,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,...,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,M1B,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M1C,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,M1E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M1G,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,M1H,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.034483,0.0


In [62]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

We select the top 10 most common venues for each postal code

In [64]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Postal Code']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
codes_venues_sorted = pd.DataFrame(columns=columns)
codes_venues_sorted['Postal Code'] = venues_grouped['Postal Code']

for ind in np.arange(venues_grouped.shape[0]):
    codes_venues_sorted.iloc[ind, 1:] = return_most_common_venues(venues_grouped.iloc[ind, :], num_top_venues)

codes_venues_sorted.head()

Unnamed: 0,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Fast Food Restaurant,Caribbean Restaurant,Greek Restaurant,Gym,Coffee Shop,Fruit & Vegetable Store,Paper / Office Supplies Store,Bakery,Spa,Sandwich Place
1,M1C,Breakfast Spot,Italian Restaurant,Park,Burger Joint,Playground,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant
2,M1E,Pizza Place,Fast Food Restaurant,Coffee Shop,Burger Joint,Grocery Store,Beer Store,Discount Store,Bank,Sandwich Place,Fried Chicken Joint
3,M1G,Park,Coffee Shop,Fast Food Restaurant,Chinese Restaurant,Indian Restaurant,Electronics Store,Fish & Chips Shop,Ethiopian Restaurant,Donut Shop,Dry Cleaner
4,M1H,Bakery,Coffee Shop,Pharmacy,Indian Restaurant,Board Shop,Fried Chicken Joint,Caribbean Restaurant,Grocery Store,German Restaurant,Athletics & Sports


We run k-means clustering with 5 clusters

In [69]:
kclusters = 5

venues_clustering = venues_grouped.drop('Postal Code',axis=1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(venues_clustering)

In [70]:
codes_venues_sorted.insert(0,'Cluster Labels', kmeans.labels_)

In [73]:
final_df = hood_Toronto.merge(codes_venues_sorted, left_on='PostalCode', right_on='Postal Code')
final_df.drop('Postal Code', axis=1, inplace=True)
final_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,1,Park,Convenience Store,Bus Stop,Pharmacy,Shopping Mall,Pizza Place,Café,Caribbean Restaurant,Laundry Service,Shop & Service
1,M4A,North York,Victoria Village,43.725882,-79.315572,2,Coffee Shop,Hockey Arena,Park,Golf Course,French Restaurant,Gym / Fitness Center,Men's Store,Intersection,Boxing Gym,Sporting Goods Shop
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,2,Coffee Shop,Café,Park,Theater,Italian Restaurant,Restaurant,Breakfast Spot,Bakery,Diner,Gastropub
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,2,Clothing Store,Fast Food Restaurant,Furniture / Home Store,Coffee Shop,Dessert Shop,Restaurant,Vietnamese Restaurant,Sushi Restaurant,Fried Chicken Joint,Supplement Shop
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494,2,Coffee Shop,Gastropub,Pizza Place,Park,Italian Restaurant,Sushi Restaurant,Café,Ice Cream Shop,Clothing Store,Restaurant


We map the city of Toronto by postal code and cluster

In [74]:
latitude = 43.651070
longitude = -79.347015

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(final_df['Latitude'], final_df['Longitude'], final_df['PostalCode'], final_df['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Cluster 1 - Parks as the most common venue

In [79]:
final_df.loc[final_df['Cluster Labels'] == 0, final_df.columns[[0] + list(range(6, final_df.shape[1]))]]

Unnamed: 0,PostalCode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,M9B,Park,Pizza Place,Hotel,Clothing Store,Gym,Café,Fish & Chips Shop,Bank,Mexican Restaurant,American Restaurant
12,M1C,Breakfast Spot,Italian Restaurant,Park,Burger Joint,Playground,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant
22,M1G,Park,Coffee Shop,Fast Food Restaurant,Chinese Restaurant,Indian Restaurant,Electronics Store,Fish & Chips Shop,Ethiopian Restaurant,Donut Shop,Dry Cleaner
45,M2L,Park,Pool,Zoo,Event Service,Donut Shop,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant
46,M3L,Park,Bank,Grocery Store,Spa,Shopping Mall,Pizza Place,Vietnamese Restaurant,Gym / Fitness Center,Dog Run,Doner Restaurant
66,M2P,Park,Coffee Shop,Restaurant,Bank,Tennis Court,Optical Shop,Golf Course,French Restaurant,Dentist's Office,Business Service
100,M8Y,Park,Italian Restaurant,Ice Cream Shop,Eastern European Restaurant,Gym / Fitness Center,Ethiopian Restaurant,Doner Restaurant,Donut Shop,Dry Cleaner,Dumpling Restaurant


Cluster 2 - Restaurants and stores as the most common venue

In [80]:
final_df.loc[final_df['Cluster Labels'] == 1, final_df.columns[[0] + list(range(6, final_df.shape[1]))]]

Unnamed: 0,PostalCode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,Park,Convenience Store,Bus Stop,Pharmacy,Shopping Mall,Pizza Place,Café,Caribbean Restaurant,Laundry Service,Shop & Service
5,M9A,Pharmacy,Bakery,Café,Convenience Store,Bank,Playground,Shopping Mall,Skating Rink,Golf Course,Park
6,M1B,Fast Food Restaurant,Caribbean Restaurant,Greek Restaurant,Gym,Coffee Shop,Fruit & Vegetable Store,Paper / Office Supplies Store,Bakery,Spa,Sandwich Place
8,M4B,Fast Food Restaurant,Pizza Place,Brewery,Construction & Landscaping,Gym / Fitness Center,Bank,Bakery,Café,Rock Climbing Spot,Coffee Shop
10,M6B,Fast Food Restaurant,Grocery Store,Pizza Place,Coffee Shop,Pharmacy,Latin American Restaurant,Fish Market,Metro Station,Pet Store,Japanese Restaurant
14,M4C,Park,Coffee Shop,Pizza Place,Sandwich Place,Convenience Store,Asian Restaurant,Bus Line,Bus Stop,Café,Skating Rink
16,M6C,Convenience Store,Pizza Place,Coffee Shop,Grocery Store,Hockey Arena,Bank,Gastropub,Korean Restaurant,Frozen Yogurt Shop,Food Truck
17,M9C,Coffee Shop,Convenience Store,Breakfast Spot,Farmers Market,Café,Beer Store,Liquor Store,Shopping Mall,Park,Transportation Service
18,M1E,Pizza Place,Fast Food Restaurant,Coffee Shop,Burger Joint,Grocery Store,Beer Store,Discount Store,Bank,Sandwich Place,Fried Chicken Joint
21,M6E,Bus Stop,Park,Pharmacy,Mexican Restaurant,Grocery Store,Sporting Goods Shop,Fast Food Restaurant,Falafel Restaurant,Market,Discount Store


Cluster 3 - Coffee Shops as the most common venue

In [81]:
final_df.loc[final_df['Cluster Labels'] == 2, final_df.columns[[0] + list(range(6, final_df.shape[1]))]]

Unnamed: 0,PostalCode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,M4A,Coffee Shop,Hockey Arena,Park,Golf Course,French Restaurant,Gym / Fitness Center,Men's Store,Intersection,Boxing Gym,Sporting Goods Shop
2,M5A,Coffee Shop,Café,Park,Theater,Italian Restaurant,Restaurant,Breakfast Spot,Bakery,Diner,Gastropub
3,M6A,Clothing Store,Fast Food Restaurant,Furniture / Home Store,Coffee Shop,Dessert Shop,Restaurant,Vietnamese Restaurant,Sushi Restaurant,Fried Chicken Joint,Supplement Shop
4,M7A,Coffee Shop,Gastropub,Pizza Place,Park,Italian Restaurant,Sushi Restaurant,Café,Ice Cream Shop,Clothing Store,Restaurant
7,M3B,Japanese Restaurant,Pizza Place,Burger Joint,Coffee Shop,Breakfast Spot,Salad Place,Liquor Store,Office,Basketball Court,Mobile Phone Shop
9,M5B,Coffee Shop,Clothing Store,Cosmetics Shop,Middle Eastern Restaurant,Ramen Restaurant,Italian Restaurant,Café,Gastropub,Restaurant,Diner
13,M3C,Restaurant,Gym,Supermarket,Japanese Restaurant,Coffee Shop,Beer Store,American Restaurant,Sporting Goods Shop,Asian Restaurant,Clothing Store
15,M5C,Café,Coffee Shop,Restaurant,Italian Restaurant,Bakery,Hotel,Gastropub,Seafood Restaurant,Cosmetics Shop,Breakfast Spot
19,M4E,Coffee Shop,Pub,Pizza Place,Beach,Japanese Restaurant,Breakfast Spot,Bar,Tea Room,Restaurant,Burger Joint
20,M5E,Coffee Shop,Café,Hotel,Beer Bar,Restaurant,Japanese Restaurant,Cocktail Bar,Italian Restaurant,Park,BBQ Joint


Cluster 4

In [82]:
final_df.loc[final_df['Cluster Labels'] == 3, final_df.columns[[0] + list(range(6, final_df.shape[1]))]]

Unnamed: 0,PostalCode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
94,M9W,Coffee Shop,Hotel,Rental Car Location,Dog Run,Event Space,Donut Shop,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store


Cluster 5

In [83]:
final_df.loc[final_df['Cluster Labels'] == 4, final_df.columns[[0] + list(range(6, final_df.shape[1]))]]

Unnamed: 0,PostalCode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
53,M3M,Vietnamese Restaurant,Baseball Field,Restaurant,Zoo,Event Service,Donut Shop,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
