# IBM Capstone Project

## 01. Scraping Top EU Cities Table

We will first read the list of top European Union cities (by population) from Wikipedia

In [1]:
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim
from sklearn.cluster import KMeans

import requests
import pandas as pd
import numpy as np
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors

In [37]:
url = 'https://en.wikipedia.org/wiki/List_of_cities_in_the_European_Union_by_population_within_city_limits'
page = requests.get(url).text
soup = BeautifulSoup(page, 'lxml') # get the page html

table = soup.find_all('table')[1] # pick second table in the page
df = pd.read_html(str(table)) # read table as a list of DataFrame objects
df = df[0] # Change list of objects to a DF by picking the 1st object

new_header = df.iloc[0] # grab first row for header
df = df[1:] # remove header row from DF
df.columns = new_header # set header row as the DF header

# Change type from object to int/str
df = df.astype({'City': str, 'Member State': str, 'Officialpopulation': int})

df.head()

Unnamed: 0,City,Member State,Officialpopulation,Date of census,Reference,Photography
1,London,United Kingdom,8908081,30 June 2018,[1],
2,Berlin,Germany,3748148,31 December 2018,[2],
3,Madrid,Spain,3223334,31 January 2018,[3],
4,Rome,Italy,2856133,31 December 2018,[4],
5,Paris,France,2140526,1 January 2019,[5][6],


We will select only those cities that have a population of more than half a million people.

In [38]:
# Select cities with population > 0.5 million
df2 = df.loc[df['Officialpopulation'] > 500000]

# Keep only relevant columns
df2 = df2[['City', 'Member State', 'Officialpopulation']]

df2

Unnamed: 0,City,Member State,Officialpopulation
1,London,United Kingdom,8908081
2,Berlin,Germany,3748148
3,Madrid,Spain,3223334
4,Rome,Italy,2856133
5,Paris,France,2140526
6,Vienna,Austria,1910370
7,Bucharest,Romania,1877155
8,Hamburg,Germany,1841179
9,Warsaw,Poland,1777972
10,Budapest,Hungary,1752286


In [39]:
# Rename row 16 City to remove redundant text
df2.at[16,'City'] = 'Brussels'

# Merge City and State name under a single variable
df2['Location'] = df2['City'] + ', ' + df2['Member State']

Unnamed: 0,City,Member State,Officialpopulation,Location
1,London,United Kingdom,8908081,"London, United Kingdom"
2,Berlin,Germany,3748148,"Berlin, Germany"
3,Madrid,Spain,3223334,"Madrid, Spain"
4,Rome,Italy,2856133,"Rome, Italy"
5,Paris,France,2140526,"Paris, France"
6,Vienna,Austria,1910370,"Vienna, Austria"
7,Bucharest,Romania,1877155,"Bucharest, Romania"
8,Hamburg,Germany,1841179,"Hamburg, Germany"
9,Warsaw,Poland,1777972,"Warsaw, Poland"
10,Budapest,Hungary,1752286,"Budapest, Hungary"


In [40]:
df2.shape

(62, 4)

In [41]:
df2.to_csv(r'df_EU_Cities.csv')

## 02. Adding Geographic Coordinates

In [42]:
df2["Latitude"] = np.nan
df2["Longitude"] = np.nan

for index, row in df2.iterrows():
    address = df2.Location[index]
    geolocator = Nominatim(user_agent="EU")
    loc = geolocator.geocode(address, timeout=10)
    df2.at[index,'Latitude'] = loc.latitude
    df2.at[index,'Longitude'] = loc.longitude

df2.head()

Unnamed: 0,City,Member State,Officialpopulation,Location,Latitude,Longitude
1,London,United Kingdom,8908081,"London, United Kingdom",51.507322,-0.127647
2,Berlin,Germany,3748148,"Berlin, Germany",52.517037,13.38886
3,Madrid,Spain,3223334,"Madrid, Spain",40.416705,-3.703582
4,Rome,Italy,2856133,"Rome, Italy",41.894802,12.485338
5,Paris,France,2140526,"Paris, France",48.85661,2.351499


In [44]:
df2.to_csv(r'df_EU_Cities_Coord.csv')

In [45]:
map_T = folium.Map(location=[51.5,-0.1], zoom_start=2)

# add markers to map
for lat, lng, city, state in zip(df2['Latitude'], df2['Longitude'], df2['City'], df2['Member State']):
    label = '{}, {}'.format(city, state)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_T)  
    
map_T

## 03. Adding Venues for Each City

In [46]:
# FourSqaure credentials and parameters

CLIENT_ID = '4KYT5VOHRFQTECYK1ZMYO4DXKQU15WPQHAPORYFKA5KSWKAK'
CLIENT_SECRET = 'D3OKNB0QQG4JCDYOAJNG4RLZ2KGXQJZFFMZGPAODWYBVUYJO'
VERSION = '20180605'

LIMIT = 100

#### Function to get venues in all neighbourhoods

In [47]:
def getNearbyVenues(names, latitudes, longitudes, radius=20000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['City', 
                  'City Latitude', 
                  'City Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [48]:
# Function call for all cities
EU_venues = getNearbyVenues(names=df2['Location'],
                                   latitudes=df2['Latitude'],
                                   longitudes=df2['Longitude']
                                  )

London, United Kingdom
Berlin, Germany
Madrid, Spain
Rome, Italy
Paris, France
Vienna, Austria
Bucharest, Romania
Hamburg, Germany
Warsaw, Poland
Budapest, Hungary
Barcelona, Spain
Munich, Germany
Milan, Italy
Prague, Czech Republic
Sofia, Bulgaria
Brussels, Belgium
Birmingham, United Kingdom
Cologne, Germany
Stockholm, Sweden
Naples, Italy
Turin, Italy
Amsterdam, Netherlands
Marseille, France
Zagreb, Croatia
Valencia, Spain
Leeds, United Kingdom
Kraków, Poland
Frankfurt, Germany
Seville, Spain
Łódź, Poland
Zaragoza, Spain
Athens, Greece
Palermo, Italy
Helsinki, Finland
Rotterdam, Netherlands
Wrocław, Poland
Stuttgart, Germany
Riga, Latvia
Copenhagen, Denmark
Glasgow, United Kingdom
Düsseldorf, Germany
Leipzig, Germany
Dortmund, Germany
Essen, Germany
Genoa, Italy
Sheffield, United Kingdom
Gothenburg, Sweden
Málaga, Spain
Bremen, Germany
Vilnius, Lithuania
Dublin, Ireland
Dresden, Germany
Manchester, United Kingdom
The Hague, Netherlands
Hanover, Germany
Poznań, Poland
Bradford, United

In [52]:
EU_venues.to_csv(r'df_EU_Cities_Venues.csv')
EU_venues.shape

(6200, 7)

In [53]:
EU_venues.head()

Unnamed: 0,City,City Latitude,City Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"London, United Kingdom",51.507322,-0.127647,National Gallery,51.508876,-0.128478,Art Museum
1,"London, United Kingdom",51.507322,-0.127647,Corinthia Hotel,51.506607,-0.12446,Hotel
2,"London, United Kingdom",51.507322,-0.127647,Trafalgar Square,51.507987,-0.128048,Plaza
3,"London, United Kingdom",51.507322,-0.127647,Gordon's Wine Bar,51.507911,-0.123293,Wine Bar
4,"London, United Kingdom",51.507322,-0.127647,Whitehall Gardens,51.506354,-0.1229,Garden


In [54]:
print('There are {} uniques categories.'.format(len(EU_venues['Venue Category'].unique())))

There are 346 uniques categories.


## 04. Analyzing Venues in Each City

In [56]:
# one hot encoding
EU_onehot = pd.get_dummies(EU_venues[['Venue Category']], prefix="", prefix_sep="")

# add city column back to dataframe
EU_onehot['City'] = EU_venues['City'] 

# move city column to the first column
fixed_columns = [EU_onehot.columns[-1]] + list(EU_onehot.columns[:-1])
EU_onehot = EU_onehot[fixed_columns]

EU_onehot.head()

Unnamed: 0,City,Accessories Store,Adult Boutique,African Restaurant,American Restaurant,Antique Shop,Apple Wine Pub,Aquarium,Arcade,Argentinian Restaurant,...,Waterfall,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Winery,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,"London, United Kingdom",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"London, United Kingdom",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"London, United Kingdom",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"London, United Kingdom",0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,"London, United Kingdom",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [59]:
# Group rows by city and take mean frequency of occurence of each category

EU_grouped = EU_onehot.groupby('City').mean().reset_index()
EU_grouped.head()

Unnamed: 0,City,Accessories Store,Adult Boutique,African Restaurant,American Restaurant,Antique Shop,Apple Wine Pub,Aquarium,Arcade,Argentinian Restaurant,...,Waterfall,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Winery,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,"Amsterdam, Netherlands",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.0
1,"Antwerp, Belgium",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.01,0.0
2,"Athens, Greece",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.03,0.02,0.0,0.0,0.0,0.0,0.0,0.0
3,"Barcelona, Spain",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,...,0.0,0.0,0.0,0.04,0.01,0.0,0.0,0.01,0.0,0.0
4,"Berlin, Germany",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0


#### Function to sort venues in descending order

In [60]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#### Create dataframe with Top 10 venues for each neighborhood

In [62]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['City']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
city_venues_sorted = pd.DataFrame(columns=columns)
city_venues_sorted['City'] = EU_grouped['City']

for ind in np.arange(EU_grouped.shape[0]):
    city_venues_sorted.iloc[ind, 1:] = return_most_common_venues(EU_grouped.iloc[ind, :], num_top_venues)

city_venues_sorted.head()

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Amsterdam, Netherlands",Hotel,French Restaurant,Restaurant,Coffee Shop,Breakfast Spot,Café,Yoga Studio,Theater,Bookstore,Cocktail Bar
1,"Antwerp, Belgium",Coffee Shop,Cocktail Bar,Clothing Store,Plaza,Italian Restaurant,Juice Bar,Sushi Restaurant,Wine Bar,Bar,Boutique
2,"Athens, Greece",Café,Coffee Shop,Historic Site,Bar,Meze Restaurant,Hotel,Cocktail Bar,Dessert Shop,Theater,History Museum
3,"Barcelona, Spain",Hotel,Tapas Restaurant,Plaza,Spanish Restaurant,Pizza Place,Wine Bar,Cocktail Bar,Italian Restaurant,Dessert Shop,Coffee Shop
4,"Berlin, Germany",Coffee Shop,Park,Bookstore,Concert Hall,Café,Gourmet Shop,Bakery,Art Gallery,Hotel,Wine Bar


## 05. Clustering Cities

In [64]:
# set number of clusters
kclusters = 8

EU_grouped_clustering = EU_grouped.drop('City', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(EU_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([4, 6, 0, 4, 0, 7, 7, 0, 0, 6])

#### Create dataframe that includes cluster as well as top 10 venues for each city

In [72]:
# add clustering labels
#city_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
city_venues_sorted.rename(columns={'City':'Location'}, inplace=True)

city_merged = df2

# merge city_grouped with city_data to add latitude/longitude for each city
city_merged = city_merged.join(city_venues_sorted.set_index('Location'), on='Location')

# Change cluster float to integer
city_merged['Cluster Labels'] = city_merged['Cluster Labels'].fillna(0.0).astype(int)

city_merged.head()

Unnamed: 0,City,Member State,Officialpopulation,Location,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,London,United Kingdom,8908081,"London, United Kingdom",51.507322,-0.127647,4,Hotel,Cocktail Bar,Bookstore,Park,Art Museum,Clothing Store,Theater,Department Store,Bakery,Boutique
2,Berlin,Germany,3748148,"Berlin, Germany",52.517037,13.38886,0,Coffee Shop,Park,Bookstore,Concert Hall,Café,Gourmet Shop,Bakery,Art Gallery,Hotel,Wine Bar
3,Madrid,Spain,3223334,"Madrid, Spain",40.416705,-3.703582,4,Hotel,Restaurant,Plaza,Café,Spanish Restaurant,Art Museum,Monument / Landmark,Ice Cream Shop,Cocktail Bar,Tapas Restaurant
4,Rome,Italy,2856133,"Rome, Italy",41.894802,12.485338,2,Plaza,Historic Site,Ice Cream Shop,Monument / Landmark,Sandwich Place,Italian Restaurant,Wine Bar,Church,Pizza Place,Fountain
5,Paris,France,2140526,"Paris, France",48.85661,2.351499,3,Plaza,Wine Bar,Hotel,Bookstore,Cocktail Bar,French Restaurant,Italian Restaurant,Historic Site,Bakery,Garden


In [73]:
city_merged.to_csv(r'df_EU_Cities_Clusters.csv')

In [74]:
# create map
map_clusters = folium.Map(location=[51.5,-0.1], zoom_start=2)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(city_merged['Latitude'], city_merged['Longitude'], city_merged['Location'], city_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine Clusters

#### Cluster 0

In [77]:
city_merged.loc[city_merged['Cluster Labels'] == 0, city_merged.columns[[0] + [1] + list(range(6, city_merged.shape[1]))]]

Unnamed: 0,City,Member State,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Berlin,Germany,0,Coffee Shop,Park,Bookstore,Concert Hall,Café,Gourmet Shop,Bakery,Art Gallery,Hotel,Wine Bar
9,Warsaw,Poland,0,Park,Theater,Cocktail Bar,Coffee Shop,Sushi Restaurant,Café,Hotel,Vegetarian / Vegan Restaurant,Boutique,Beer Bar
12,Munich,Germany,0,Café,Plaza,German Restaurant,Hotel,Cocktail Bar,Ice Cream Shop,Coffee Shop,Church,Pastry Shop,Gourmet Shop
14,Prague,Czech Republic,0,Café,Park,Cocktail Bar,Hotel,Italian Restaurant,Ice Cream Shop,Garden,Wine Bar,Burger Joint,Theater
15,Sofia,Bulgaria,0,Park,Bakery,Italian Restaurant,Restaurant,Bar,Dessert Shop,Theater,Coffee Shop,Beer Store,Café
16,Brussels,Belgium,0,Bar,Italian Restaurant,Concert Hall,Hotel,Chocolate Shop,Plaza,Park,Bakery,Boutique,Steakhouse
18,Cologne,Germany,0,Italian Restaurant,Café,Park,Hotel,Bakery,Coffee Shop,Sushi Restaurant,Art Museum,Plaza,Cocktail Bar
24,Zagreb,Croatia,0,Café,Bar,Plaza,Restaurant,Dessert Shop,BBQ Joint,Mediterranean Restaurant,Hostel,Theater,Park
27,Kraków,Poland,0,Hotel,Café,Park,Bar,Plaza,Food Truck,Church,Ice Cream Shop,Restaurant,Pub
28,Frankfurt,Germany,0,Café,Park,Hotel,Japanese Restaurant,Bar,Plaza,Coffee Shop,Art Museum,Burger Joint,Wine Bar


#### Cluster 1

In [78]:
city_merged.loc[city_merged['Cluster Labels'] == 1, city_merged.columns[[0] + [1] + list(range(6, city_merged.shape[1]))]]

Unnamed: 0,City,Member State,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
26,Leeds,United Kingdom,1,Bar,Coffee Shop,Pub,Café,Restaurant,Thai Restaurant,Indian Restaurant,Shopping Mall,Beer Bar,Burger Joint
40,Glasgow,United Kingdom,1,Bar,Coffee Shop,Italian Restaurant,Hotel,Pub,Café,Restaurant,Steakhouse,Whisky Bar,Music Venue
53,Manchester,United Kingdom,1,Coffee Shop,Pub,Bar,Café,Beer Bar,Italian Restaurant,Asian Restaurant,Indian Restaurant,Hotel,Tea Room


#### Cluster 2

In [79]:
city_merged.loc[city_merged['Cluster Labels'] == 2, city_merged.columns[[0] + [1] + list(range(6, city_merged.shape[1]))]]

Unnamed: 0,City,Member State,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Rome,Italy,2,Plaza,Historic Site,Ice Cream Shop,Monument / Landmark,Sandwich Place,Italian Restaurant,Wine Bar,Church,Pizza Place,Fountain
20,Naples,Italy,2,Pizza Place,Plaza,Italian Restaurant,Café,Historic Site,Ice Cream Shop,Scenic Lookout,Hotel,Art Museum,Castle
21,Turin,Italy,2,Plaza,Ice Cream Shop,Hotel,Café,Piedmontese Restaurant,Historic Site,History Museum,Pizza Place,Japanese Restaurant,Boutique
33,Palermo,Italy,2,Ice Cream Shop,Italian Restaurant,Pizza Place,Plaza,Café,Mediterranean Restaurant,Historic Site,Park,Dessert Shop,Museum
45,Genoa,Italy,2,Italian Restaurant,Plaza,Pizza Place,Ice Cream Shop,Café,Hotel,Historic Site,Bar,Pub,Scenic Lookout


#### Cluster 3

In [80]:
city_merged.loc[city_merged['Cluster Labels'] == 3, city_merged.columns[[0] + [1] + list(range(6, city_merged.shape[1]))]]

Unnamed: 0,City,Member State,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Paris,France,3,Plaza,Wine Bar,Hotel,Bookstore,Cocktail Bar,French Restaurant,Italian Restaurant,Historic Site,Bakery,Garden
10,Budapest,Hungary,3,Coffee Shop,Hotel,Bakery,Italian Restaurant,Restaurant,Theater,Tea Room,Wine Bar,Dessert Shop,Cocktail Bar
19,Stockholm,Sweden,3,Scandinavian Restaurant,Hotel,Café,Coffee Shop,Park,Bookstore,Falafel Restaurant,Seafood Restaurant,Beer Bar,Cocktail Bar
34,Helsinki,Finland,3,Scandinavian Restaurant,Coffee Shop,Hotel,Café,Park,Wine Bar,Restaurant,French Restaurant,Art Museum,Beer Bar
39,Copenhagen,Denmark,3,Beer Bar,Café,Coffee Shop,Cocktail Bar,Wine Bar,Scandinavian Restaurant,Park,Bakery,Art Museum,Breakfast Spot
47,Gothenburg,Sweden,3,Coffee Shop,Hotel,Burger Joint,Scandinavian Restaurant,Pub,Italian Restaurant,Theme Park Ride / Attraction,Café,Bar,Wine Bar


#### Cluster 4

In [81]:
city_merged.loc[city_merged['Cluster Labels'] == 4, city_merged.columns[[0] + [1] + list(range(6, city_merged.shape[1]))]]

Unnamed: 0,City,Member State,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,London,United Kingdom,4,Hotel,Cocktail Bar,Bookstore,Park,Art Museum,Clothing Store,Theater,Department Store,Bakery,Boutique
3,Madrid,Spain,4,Hotel,Restaurant,Plaza,Café,Spanish Restaurant,Art Museum,Monument / Landmark,Ice Cream Shop,Cocktail Bar,Tapas Restaurant
6,Vienna,Austria,4,Plaza,Hotel,Restaurant,Austrian Restaurant,Park,Italian Restaurant,Ice Cream Shop,Concert Hall,Art Museum,Breakfast Spot
8,Hamburg,Germany,4,Hotel,Coffee Shop,Café,Steakhouse,Theater,Cocktail Bar,Pizza Place,Restaurant,Park,Neighborhood
11,Barcelona,Spain,4,Hotel,Tapas Restaurant,Plaza,Spanish Restaurant,Pizza Place,Wine Bar,Cocktail Bar,Italian Restaurant,Dessert Shop,Coffee Shop
13,Milan,Italy,4,Hotel,Boutique,Italian Restaurant,Pizza Place,Monument / Landmark,Plaza,Ice Cream Shop,Wine Bar,Art Museum,Bakery
22,Amsterdam,Netherlands,4,Hotel,French Restaurant,Restaurant,Coffee Shop,Breakfast Spot,Café,Yoga Studio,Theater,Bookstore,Cocktail Bar
23,Marseille,France,4,Hotel,French Restaurant,Plaza,Bar,Church,Provençal Restaurant,Pub,Historic Site,Tea Room,Park
41,Düsseldorf,Germany,4,Hotel,Coffee Shop,Bar,Café,Park,Brewery,Pizza Place,Japanese Restaurant,Cocktail Bar,Grocery Store
50,Vilnius,Lithuania,4,Hotel,Coffee Shop,Café,Bar,Park,Restaurant,Plaza,Scenic Lookout,Museum,Pizza Place


#### Cluster 5

In [82]:
city_merged.loc[city_merged['Cluster Labels'] == 5, city_merged.columns[[0] + [1] + list(range(6, city_merged.shape[1]))]]

Unnamed: 0,City,Member State,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
25,Valencia,Spain,5,Tapas Restaurant,Plaza,Italian Restaurant,Restaurant,Hotel,Café,Coffee Shop,Garden,Ice Cream Shop,Museum
29,Seville,Spain,5,Tapas Restaurant,Plaza,Spanish Restaurant,Hotel,Ice Cream Shop,Historic Site,Restaurant,Café,Monument / Landmark,Park
31,Zaragoza,Spain,5,Tapas Restaurant,Restaurant,Hotel,Spanish Restaurant,Bar,Plaza,Park,Coffee Shop,Mediterranean Restaurant,Bistro
48,Málaga,Spain,5,Spanish Restaurant,Tapas Restaurant,Hotel,Restaurant,Mediterranean Restaurant,Plaza,Beach,Art Museum,Café,Park


#### Cluster 6

In [83]:
city_merged.loc[city_merged['Cluster Labels'] == 6, city_merged.columns[[0] + [1] + list(range(6, city_merged.shape[1]))]]

Unnamed: 0,City,Member State,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Bucharest,Romania,6,Coffee Shop,Dessert Shop,Hotel,Restaurant,Park,Pizza Place,Cocktail Bar,Romanian Restaurant,Bar,Gym / Fitness Center
58,Antwerp,Belgium,6,Coffee Shop,Cocktail Bar,Clothing Store,Plaza,Italian Restaurant,Juice Bar,Sushi Restaurant,Wine Bar,Bar,Boutique


#### Cluster 7

In [84]:
city_merged.loc[city_merged['Cluster Labels'] == 7, city_merged.columns[[0] + [1] + list(range(6, city_merged.shape[1]))]]

Unnamed: 0,City,Member State,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,Birmingham,United Kingdom,7,Pub,Bar,Indian Restaurant,Coffee Shop,Hotel,Pizza Place,Brewery,Café,Shopping Mall,Burger Joint
46,Sheffield,United Kingdom,7,Pub,Coffee Shop,Park,Café,Bar,Pizza Place,Gym / Fitness Center,Indian Restaurant,Bakery,Deli / Bodega
57,Bradford,United Kingdom,7,Pub,Bar,Coffee Shop,Indian Restaurant,Park,Beer Bar,Brewery,Thai Restaurant,Beer Store,Café
