In [1]:
#import necessary libraries 

import pandas as pd 
import numpy as np 
import folium 
import geocoder 
from sklearn.cluster import KMeans
import matplotlib.cm as cm
from pandas.io.json import json_normalize
import json
import requests
import seaborn as sns 
from geopy.geocoders import Nominatim 

In [2]:
#read csv file of AirBnB listings 
listings = pd.read_csv("/Users/korey.stegared-pace/Documents/Projects/DataScienceCapstone/data/listings.csv")
listings.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,42808,Quiet room right DOWNTOWN Stockholm,186922,Nina,,Norrmalms,59.34342,18.05298,Private room,853,1,64,2019-08-28,0.58,1,272
1,53895,Modern Architecture in Stockholm,252075,Eva,,Skarpnäcks,59.27054,18.11231,Private room,1079,3,7,2017-05-28,0.07,1,0
2,145320,In the middle of it all - with a view!,703851,Kim,,Södermalms,59.31364,18.05256,Private room,1285,2,72,2019-06-25,2.62,1,0
3,155220,"Stockholm, new spacoius villa",746396,Madeleine,,Skarpnäcks,59.24667,18.17799,Entire home/apt,1197,3,0,,,2,0
4,155685,Hornstull with water view!,748592,Robert,,Södermalms,59.31535,18.03277,Entire home/apt,3247,4,22,2015-12-01,0.23,1,0


In [3]:
#Get only releveant information
df = listings[["neighbourhood", "latitude","longitude", "price","room_type", "number_of_reviews", "availability_365"]]

In [4]:
df.head()

Unnamed: 0,neighbourhood,latitude,longitude,price,room_type,number_of_reviews,availability_365
0,Norrmalms,59.34342,18.05298,853,Private room,64,272
1,Skarpnäcks,59.27054,18.11231,1079,Private room,7,0
2,Södermalms,59.31364,18.05256,1285,Private room,72,0
3,Skarpnäcks,59.24667,18.17799,1197,Entire home/apt,0,0
4,Södermalms,59.31535,18.03277,3247,Entire home/apt,22,0


In [5]:
df = df[df.room_type != "Private room"] #removing private and shared rooms 
df = df[df.room_type != "Shared room"]
df = df[df.availability_365 != 0] #removing listings that have not been avaialbe for year 
df = df.round({'latitude': 2, 'longitude':2})
df.head()

Unnamed: 0,neighbourhood,latitude,longitude,price,room_type,number_of_reviews,availability_365
6,Södermalms,59.32,18.03,745,Entire home/apt,32,197
13,Södermalms,59.3,18.11,1962,Entire home/apt,8,242
14,Södermalms,59.32,18.1,3188,Entire home/apt,109,343
15,Farsta,59.25,18.09,412,Entire home/apt,51,359
17,Södermalms,59.32,18.07,1099,Entire home/apt,473,222


In [6]:
df.shape

(2604, 7)

In [7]:
df.neighbourhood.unique()

array(['Södermalms', 'Farsta', 'Norrmalms', 'Östermalms', 'Skarpnäcks',
       'Enskede-Årsta-Vantörs', 'Hägersten-Liljeholmens', 'Bromma',
       'Kungsholmens', 'Skärholmens', 'Älvsjö', 'Hässelby-Vällingby',
       'Spånga-Tensta', 'Rinkeby-Tensta'], dtype=object)

In [8]:
df.groupby('neighbourhood').count().sort_values('number_of_reviews', ascending=False) #finding the neighborhoods with the highest reviews 

Unnamed: 0_level_0,latitude,longitude,price,room_type,number_of_reviews,availability_365
neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Södermalms,748,748,748,748,748,748
Norrmalms,328,328,328,328,328,328
Kungsholmens,262,262,262,262,262,262
Östermalms,255,255,255,255,255,255
Hägersten-Liljeholmens,209,209,209,209,209,209
Enskede-Årsta-Vantörs,166,166,166,166,166,166
Bromma,154,154,154,154,154,154
Rinkeby-Tensta,151,151,151,151,151,151
Skarpnäcks,101,101,101,101,101,101
Hässelby-Vällingby,68,68,68,68,68,68


In [9]:
df = df.groupby(['neighbourhood']).mean().reset_index() #group by neighbourhood and take the mean of other values 
df = df.round(2)

In [10]:
#name = ['Bromma','Farsta', 'Enskede-Årsta-Vantörs', 'Skärholmens', 'Spånga-Tensta','Älvsjö', 'Hässelby-Vällingby', 'Rinkeby-Tensta','Skarpnäcks','Enskede-Årsta-Vantörs	']
#df = df[~df['neighbourhood'].isin(name)] #removing less popular neighbourhoods 

df['neighbourhood'] = df['neighbourhood'].map(lambda x: x.rstrip('s')) #removing 's' from each neighbourhood 

In [11]:
mean_price = df["price"].astype("float").mean(axis=0)
print("The average price of an Airbnb in Stockholm", mean_price)

The average price of an Airbnb in Stockholm 1185.5385714285715


In [12]:
address = 'Stockholm, Sweden'
geolocator = Nominatim(user_agent="stockholm_mapper")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude 

map_stockholm = folium.Map(location=[latitude, longitude], zoom_start=12)
for lat, lng, neighbourhood, price in zip(df['latitude'], df['longitude'], df['neighbourhood'], df['price']):
    label = '{}'.format(price)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat, lng],
    radius=3,
    popup=label,
    color='blue',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(map_stockholm)
map_stockholm

## Retrive Foursquare information

In [13]:
CLIENT_ID = 'TJVRDDBR45XWMUQH4EV1QYCPNTLR2RQAROBNMHQXJPBTI33I' # your Foursquare ID
CLIENT_SECRET = 'BHJOCMOCKSYORF3WGYOOG1B50S3CAC2MEPDESQPIEMPCOMT2' # your Foursquare Secret
ACCESS_TOKEN = 'FYXYLPMMWRDIPWIFPTAH3NEVILB5SANF1YLXLM2UNM3MRQEK'

VERSION = '20180605' # Foursquare API version
LIMIT = 500
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: TJVRDDBR45XWMUQH4EV1QYCPNTLR2RQAROBNMHQXJPBTI33I
CLIENT_SECRET:BHJOCMOCKSYORF3WGYOOG1B50S3CAC2MEPDESQPIEMPCOMT2


In [14]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)
          


In [15]:
neighbourhoods = df.neighbourhood.unique() #adding each unqiue neighbourhood
    
venues = getNearbyVenues(names=neighbourhoods[0:], latitudes=df['latitude'], longitudes=df['longitude'])



In [16]:
venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Bromma,59.34,17.94,Sushibar Kirin,59.339206,17.937934,Sushi Restaurant
1,Bromma,59.34,17.94,Restaurang Sorbon,59.339998,17.940353,Pub
2,Bromma,59.34,17.94,Coop,59.338748,17.939877,Supermarket
3,Bromma,59.34,17.94,SALK Tennis Park,59.342273,17.94341,Tennis Court
4,Bromma,59.34,17.94,ICA Supermarket Brommaplan,59.338417,17.938905,Supermarket


In [17]:
venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bromma,10,10,10,10,10,10
Enskede-Årsta-Vantör,2,2,2,2,2,2
Farsta,10,10,10,10,10,10
Hägersten-Liljeholmen,36,36,36,36,36,36
Hässelby-Vällingby,9,9,9,9,9,9
Kungsholmen,21,21,21,21,21,21
Norrmalm,100,100,100,100,100,100
Rinkeby-Tensta,38,38,38,38,38,38
Skarpnäck,6,6,6,6,6,6
Skärholmen,9,9,9,9,9,9


In [18]:
venues.shape

(379, 7)

## Analyze Each Neighborhood

In [19]:
stockholm_onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")

stockholm_onehot['Neighborhood'] = venues['Neighborhood']

fixed_columns = [stockholm_onehot.columns[-1]] + list(stockholm_onehot.columns[:-1])
stockholm_onehot = stockholm_onehot[fixed_columns]

stockholm_onehot.head()

Unnamed: 0,Neighborhood,ATM,Accessories Store,American Restaurant,Arcade,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,...,Theme Restaurant,Thrift / Vintage Store,Track,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,Bromma,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Bromma,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Bromma,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Bromma,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Bromma,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [20]:
stockholm_onehot.shape

(379, 127)

In [21]:
stockholm_grouped = stockholm_onehot.groupby('Neighborhood').mean().reset_index() #recording the mean of the venues 
stockholm_grouped

Unnamed: 0,Neighborhood,ATM,Accessories Store,American Restaurant,Arcade,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,...,Theme Restaurant,Thrift / Vintage Store,Track,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,Bromma,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Enskede-Årsta-Vantör,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Farsta,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Hägersten-Liljeholmen,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,...,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Hässelby-Vällingby,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Kungsholmen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Norrmalm,0.01,0.0,0.0,0.01,0.01,0.0,0.02,0.0,0.01,...,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.03,0.01,0.01
7,Rinkeby-Tensta,0.0,0.0,0.026316,0.0,0.0,0.0,0.078947,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.026316,0.0,0.0
8,Skarpnäck,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Skärholmen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
stockholm_grouped.shape

(14, 127)

In [23]:
num_top_venues = 5
for hood in stockholm_grouped['Neighborhood']:
    print("---"+hood+"---")
    temp = stockholm_grouped[stockholm_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue', 'freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq':2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

---Bromma---
                  venue  freq
0           Supermarket   0.2
1      Sushi Restaurant   0.1
2                Bakery   0.1
3        Farmers Market   0.1
4  Fast Food Restaurant   0.1


---Enskede-Årsta-Vantör---
             venue  freq
0            Hotel   0.5
1    Metro Station   0.5
2              ATM   0.0
3  Motorcycle Shop   0.0
4      Record Shop   0.0


---Farsta---
                     venue  freq
0  Scandinavian Restaurant   0.1
1                   Bakery   0.1
2        Convenience Store   0.1
3                    Plaza   0.1
4              Bus Station   0.1


---Hägersten-Liljeholmen---
             venue  freq
0  Thai Restaurant  0.08
1             Café  0.08
2      Supermarket  0.06
3      Pizza Place  0.06
4       Restaurant  0.06


---Hässelby-Vällingby---
                 venue  freq
0        Metro Station  0.22
1          Supermarket  0.11
2          Event Space  0.11
3  Sporting Goods Shop  0.11
4           Smoke Shop  0.11


---Kungsholmen---
              

In [24]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [25]:
num_top_venues = 5

indicators = ['st', 'nd','rd']

columns = ['Neighborhood']

for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhood_venues_sorted = pd.DataFrame(columns=columns)
neighborhood_venues_sorted['Neighborhood'] = stockholm_grouped['Neighborhood']

for ind in np.arange(stockholm_grouped.shape[0]):
    neighborhood_venues_sorted.iloc[ind, 1:] = return_most_common_venues(stockholm_grouped.iloc[ind, :], num_top_venues)

neighborhood_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Bromma,Supermarket,Hotel,Bakery,Farmers Market,Fast Food Restaurant
1,Enskede-Årsta-Vantör,Hotel,Metro Station,Food Truck,Convenience Store,Creperie
2,Farsta,Plaza,Scandinavian Restaurant,Sushi Restaurant,Bakery,Bus Station
3,Hägersten-Liljeholmen,Thai Restaurant,Café,Restaurant,Supermarket,Sushi Restaurant
4,Hässelby-Vällingby,Metro Station,Plaza,Supermarket,Grocery Store,Event Space
5,Kungsholmen,Park,Café,Scandinavian Restaurant,Grocery Store,Liquor Store
6,Norrmalm,Café,Scandinavian Restaurant,Hotel,Bakery,Hostel
7,Rinkeby-Tensta,Middle Eastern Restaurant,Asian Restaurant,Coffee Shop,Clothing Store,Gym / Fitness Center
8,Skarpnäck,Metro Station,Bakery,Supermarket,Sushi Restaurant,Café
9,Skärholmen,Metro Station,Gym / Fitness Center,Plaza,Supermarket,Japanese Restaurant


In [26]:
neighborhood_venues_sorted.drop("Neighborhood", axis=1, inplace=True)
df = pd.concat([df, neighborhood_venues_sorted], axis=1)
df

Unnamed: 0,neighbourhood,latitude,longitude,price,number_of_reviews,availability_365,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Bromma,59.34,17.94,1202.82,16.98,147.31,Supermarket,Hotel,Bakery,Farmers Market,Fast Food Restaurant
1,Enskede-Årsta-Vantör,59.28,18.06,1154.63,15.39,121.61,Hotel,Metro Station,Food Truck,Convenience Store,Creperie
2,Farsta,59.26,18.09,1097.31,15.04,134.07,Plaza,Scandinavian Restaurant,Sushi Restaurant,Bakery,Bus Station
3,Hägersten-Liljeholmen,59.3,18.0,1166.46,14.0,116.62,Thai Restaurant,Café,Restaurant,Supermarket,Sushi Restaurant
4,Hässelby-Vällingby,59.37,17.85,907.47,7.94,139.59,Metro Station,Plaza,Supermarket,Grocery Store,Event Space
5,Kungsholmen,59.33,18.02,1270.92,15.98,127.52,Park,Café,Scandinavian Restaurant,Grocery Store,Liquor Store
6,Norrmalm,59.34,18.05,1648.27,22.49,126.84,Café,Scandinavian Restaurant,Hotel,Bakery,Hostel
7,Rinkeby-Tensta,59.4,17.94,787.42,3.31,276.56,Middle Eastern Restaurant,Asian Restaurant,Coffee Shop,Clothing Store,Gym / Fitness Center
8,Skarpnäck,59.29,18.11,1200.28,13.65,119.53,Metro Station,Bakery,Supermarket,Sushi Restaurant,Café
9,Skärholmen,59.28,17.91,879.06,35.69,140.19,Metro Station,Gym / Fitness Center,Plaza,Supermarket,Japanese Restaurant


## Cluster Neighborhoods

In [27]:
kclusters = 5

stockholm_grouped_clustering = stockholm_grouped.drop('Neighborhood', 1)

#run k-means clustering 

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(stockholm_grouped_clustering)

kmeans.labels_[0:10]

array([3, 1, 0, 0, 4, 0, 0, 0, 3, 0], dtype=int32)

In [28]:
#add clustering labels

df.insert(0, 'Cluster Labels', kmeans.labels_)

df

Unnamed: 0,Cluster Labels,neighbourhood,latitude,longitude,price,number_of_reviews,availability_365,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,3,Bromma,59.34,17.94,1202.82,16.98,147.31,Supermarket,Hotel,Bakery,Farmers Market,Fast Food Restaurant
1,1,Enskede-Årsta-Vantör,59.28,18.06,1154.63,15.39,121.61,Hotel,Metro Station,Food Truck,Convenience Store,Creperie
2,0,Farsta,59.26,18.09,1097.31,15.04,134.07,Plaza,Scandinavian Restaurant,Sushi Restaurant,Bakery,Bus Station
3,0,Hägersten-Liljeholmen,59.3,18.0,1166.46,14.0,116.62,Thai Restaurant,Café,Restaurant,Supermarket,Sushi Restaurant
4,4,Hässelby-Vällingby,59.37,17.85,907.47,7.94,139.59,Metro Station,Plaza,Supermarket,Grocery Store,Event Space
5,0,Kungsholmen,59.33,18.02,1270.92,15.98,127.52,Park,Café,Scandinavian Restaurant,Grocery Store,Liquor Store
6,0,Norrmalm,59.34,18.05,1648.27,22.49,126.84,Café,Scandinavian Restaurant,Hotel,Bakery,Hostel
7,0,Rinkeby-Tensta,59.4,17.94,787.42,3.31,276.56,Middle Eastern Restaurant,Asian Restaurant,Coffee Shop,Clothing Store,Gym / Fitness Center
8,3,Skarpnäck,59.29,18.11,1200.28,13.65,119.53,Metro Station,Bakery,Supermarket,Sushi Restaurant,Café
9,0,Skärholmen,59.28,17.91,879.06,35.69,140.19,Metro Station,Gym / Fitness Center,Plaza,Supermarket,Japanese Restaurant


### Cluster 1 

In [29]:
df.loc[df['Cluster Labels'] == 0, df.columns[[1] + list(range(5, df.shape[1]))]]

Unnamed: 0,neighbourhood,number_of_reviews,availability_365,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
2,Farsta,15.04,134.07,Plaza,Scandinavian Restaurant,Sushi Restaurant,Bakery,Bus Station
3,Hägersten-Liljeholmen,14.0,116.62,Thai Restaurant,Café,Restaurant,Supermarket,Sushi Restaurant
5,Kungsholmen,15.98,127.52,Park,Café,Scandinavian Restaurant,Grocery Store,Liquor Store
6,Norrmalm,22.49,126.84,Café,Scandinavian Restaurant,Hotel,Bakery,Hostel
7,Rinkeby-Tensta,3.31,276.56,Middle Eastern Restaurant,Asian Restaurant,Coffee Shop,Clothing Store,Gym / Fitness Center
9,Skärholmen,35.69,140.19,Metro Station,Gym / Fitness Center,Plaza,Supermarket,Japanese Restaurant
10,Spånga-Tensta,5.85,120.48,Pizza Place,Pub,Park,Restaurant,Butcher
11,Södermalm,32.03,140.0,Scandinavian Restaurant,Bakery,Bar,Pub,Café
13,Östermalm,19.29,144.59,Italian Restaurant,Bakery,Café,French Restaurant,Supermarket


In [30]:
df.loc[df['Cluster Labels'] == 1, df.columns[[1] + list(range(5, df.shape[1]))]]

Unnamed: 0,neighbourhood,number_of_reviews,availability_365,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,Enskede-Årsta-Vantör,15.39,121.61,Hotel,Metro Station,Food Truck,Convenience Store,Creperie


In [31]:
df.loc[df['Cluster Labels'] == 2, df.columns[[1] + list(range(5, df.shape[1]))]]

Unnamed: 0,neighbourhood,number_of_reviews,availability_365,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
12,Älvsjö,8.71,142.06,Playground,Pizza Place,Bakery,Bus Station,Yoga Studio


In [32]:
df.loc[df['Cluster Labels'] == 3, df.columns[[1] + list(range(5, df.shape[1]))]]

Unnamed: 0,neighbourhood,number_of_reviews,availability_365,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Bromma,16.98,147.31,Supermarket,Hotel,Bakery,Farmers Market,Fast Food Restaurant
8,Skarpnäck,13.65,119.53,Metro Station,Bakery,Supermarket,Sushi Restaurant,Café


In [33]:
df.loc[df['Cluster Labels'] == 4, df.columns[[1] + list(range(5, df.shape[1]))]]

Unnamed: 0,neighbourhood,number_of_reviews,availability_365,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
4,Hässelby-Vällingby,7.94,139.59,Metro Station,Plaza,Supermarket,Grocery Store,Event Space
