### Import libraries

In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

import json # library to handle JSON files

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

### Read london csv file from link

In [2]:
london_data = pd.read_csv('https://www.doogal.co.uk/UKPostcodesCSV.ashx?region=E12000007')
london_data.head()

Unnamed: 0,Postcode,In Use?,Latitude,Longitude,Easting,Northing,Grid Ref,County,District,Ward,...,Quality,User Type,Last updated,Nearest station,Distance to station,Postcode area,Postcode district,Police force,Water company,Plus Code
0,BR1 1AA,Yes,51.401546,0.015415,540291,168873,TQ402688,Greater London,Bromley,Bromley Town,...,1,0,2019-11-23,Bromley South,0.218257,BR,BR1,Metropolitan Police,Thames Water,9F32C228+J5
1,BR1 1AB,Yes,51.406333,0.015208,540262,169405,TQ402694,Greater London,Bromley,Bromley Town,...,1,0,2019-11-23,Bromley North,0.253666,BR,BR1,Metropolitan Police,Thames Water,9F32C248+G3
2,BR1 1AD,No,51.400057,0.016715,540386,168710,TQ403687,Greater London,Bromley,Bromley Town,...,1,1,2019-11-23,Bromley South,0.044559,BR,BR1,Metropolitan Police,,9F32C228+2M
3,BR1 1AE,Yes,51.404543,0.014195,540197,169204,TQ401692,Greater London,Bromley,Bromley Town,...,1,0,2019-11-23,Bromley North,0.462939,BR,BR1,Metropolitan Police,Thames Water,9F32C237+RM
4,BR1 1AF,Yes,51.401392,0.014948,540259,168855,TQ402688,Greater London,Bromley,Bromley Town,...,1,0,2019-11-23,Bromley South,0.227664,BR,BR1,Metropolitan Police,Thames Water,9F32C227+HX


In [3]:
london_data.columns

Index(['Postcode', 'In Use?', 'Latitude', 'Longitude', 'Easting', 'Northing',
       'Grid Ref', 'County', 'District', 'Ward', 'District Code', 'Ward Code',
       'Country', 'County Code', 'Constituency', 'Introduced', 'Terminated',
       'Parish', 'National Park', 'Population', 'Households', 'Built up area',
       'Built up sub-division', 'Lower layer super output area', 'Rural/urban',
       'Region', 'Altitude', 'London zone', 'LSOA Code', 'Local authority',
       'MSOA Code', 'Middle layer super output area', 'Parish Code',
       'Census output area', 'Constituency Code',
       'Index of Multiple Deprivation', 'Quality', 'User Type', 'Last updated',
       'Nearest station', 'Distance to station', 'Postcode area',
       'Postcode district', 'Police force', 'Water company', 'Plus Code'],
      dtype='object')

### Data processing to clean and group the table by zones and Districts

In [7]:
# clean up postal codes not in use

# Get names of indexes for which column In use has value No
indexNames = london_data[london_data['In Use?'] == 'No' ].index
 
# Delete these row indexes from dataFrame
london_data.drop(indexNames , inplace=True)
london_data_2 = london_data[['London zone','District','Latitude','Longitude']]

In [31]:
# group data by zones and districts and find the mean of coordinates
london_grouped = london_data_2.groupby(['London zone','District']).mean().reset_index()
london_grouped

Unnamed: 0,London zone,District,Latitude,Longitude
0,1,Camden,51.524655,-0.129067
1,1,City of London,51.514622,-0.092233
2,1,Hackney,51.528535,-0.083053
3,1,Islington,51.526789,-0.106001
4,1,Kensington and Chelsea,51.495620,-0.180086
5,1,Lambeth,51.490308,-0.119084
6,1,Southwark,51.499403,-0.092212
7,1,Tower Hamlets,51.518024,-0.071222
8,1,Wandsworth,51.480774,-0.135710
9,1,Westminster,51.511460,-0.146574


## Now we will check the geographical coordinates of London

In [32]:
#import geocoders
from geopy.geocoders import Nominatim

#import folium
#!conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library

In [33]:
address = 'London'

latitude = 51.5074
longitude = 0.1278
#geolocator = Nominatim(user_agent="sc_explorer")
#location = geolocator.geocode(address)
#latitude = location.latitude
#longitude = location.longitude
print('The geograpical coordinate of London are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of London are 51.5074, 0.1278.


In [34]:
map_london = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(london_grouped['Latitude'], london_grouped['Longitude'], london_grouped['District']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_london)  
    
map_london

## Define Foursquare Credentials and Version

In [35]:
CLIENT_ID = '1Y01OHXCZ2EN5ZYSLRLWIF25AFXW13FGAZP1I3TREPT0PMYS' # your Foursquare ID
CLIENT_SECRET = '5JQEYSSEYQVM1SE1G0XUO5JEG4PWCAESZ43RVO2UX3WUQFKM' # your Foursquare Secret
VERSION = '20200120' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 1Y01OHXCZ2EN5ZYSLRLWIF25AFXW13FGAZP1I3TREPT0PMYS
CLIENT_SECRET:5JQEYSSEYQVM1SE1G0XUO5JEG4PWCAESZ43RVO2UX3WUQFKM


## Top 100 venues from the region within radius of 500m

In [36]:
#define function 

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [37]:
LIMIT = 100 # limit of number of venues returned by Foursquare API

london_venues = getNearbyVenues(names=london_grouped['District'],
                                   latitudes=london_grouped['Latitude'],
                                   longitudes=london_grouped['Longitude']
                                  )

Camden
City of London
Hackney
Islington
Kensington and Chelsea
Lambeth
Southwark
Tower Hamlets
Wandsworth
Westminster
Barnet
Brent
Camden
Ealing
Greenwich
Hackney
Hammersmith and Fulham
Haringey
Hounslow
Islington
Kensington and Chelsea
Lambeth
Lewisham
Newham
Richmond upon Thames
Southwark
Tower Hamlets
Waltham Forest
Wandsworth
Westminster
Barking and Dagenham
Barnet
Brent
Bromley
Camden
Croydon
Ealing
Enfield
Greenwich
Hackney
Hammersmith and Fulham
Haringey
Hounslow
Islington
Kingston upon Thames
Lambeth
Lewisham
Merton
Newham
Redbridge
Richmond upon Thames
Southwark
Sutton
Tower Hamlets
Waltham Forest
Wandsworth
Barking and Dagenham
Barnet
Bexley
Brent
Bromley
Croydon
Ealing
Enfield
Greenwich
Haringey
Harrow
Hillingdon
Hounslow
Kingston upon Thames
Lewisham
Merton
Newham
Redbridge
Richmond upon Thames
Sutton
Waltham Forest
Barking and Dagenham
Barnet
Bexley
Bromley
Croydon
Ealing
Enfield
Harrow
Havering
Hillingdon
Hounslow
Kingston upon Thames
Redbridge
Richmond upon Thames
Sutton

In [38]:
print(london_venues.shape)
print('There are {} uniques categories.'.format(len(london_venues['Venue Category'].unique())))
london_venues.head()

(2093, 7)
There are 272 uniques categories.


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Camden,51.524655,-0.129067,Bloomsbury Coffee House,51.525282,-0.126495,Café
1,Camden,51.524655,-0.129067,Lever & Bloom Coffee,51.523334,-0.131076,Coffee Shop
2,Camden,51.524655,-0.129067,Tavistock Square,51.525006,-0.129068,Park
3,Camden,51.524655,-0.129067,Gordon Square,51.524098,-0.130763,Park
4,Camden,51.524655,-0.129067,Gay's The Word,51.525386,-0.125369,Bookstore


# Data analysis

In [39]:
# one hot encoding
london_onehot = pd.get_dummies(london_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
london_onehot['Neighbourhood'] = london_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [london_onehot.columns[-1]] + list(london_onehot.columns[:-1])
london_onehot = london_onehot[fixed_columns]

london_onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Antique Shop,Argentinian Restaurant,Art Gallery,Art Museum,...,Vietnamese Restaurant,Warehouse Store,Watch Shop,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Camden,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Camden,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Camden,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Camden,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Camden,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [40]:
london_onehot_grouped = london_onehot.groupby('Neighbourhood').mean().reset_index()
london_onehot_grouped

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Antique Shop,Argentinian Restaurant,Art Gallery,Art Museum,...,Vietnamese Restaurant,Warehouse Store,Watch Shop,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Barking and Dagenham,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Barnet,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Bexley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Brent,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667
4,Bromley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Camden,0.0,0.0,0.0,0.0,0.005263,0.0,0.005263,0.005263,0.005263,...,0.0,0.0,0.0,0.0,0.0,0.0,0.005263,0.005263,0.005263,0.0
6,City of London,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,...,0.02,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.02
7,Croydon,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Ealing,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0
9,Enfield,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0


## Finding the top venues 

In [42]:
num_top_venues = 5

for hood in london_onehot_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = london_onehot_grouped[london_onehot_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Barking and Dagenham----
            venue  freq
0             Pub  0.13
1   Train Station  0.07
2  Shipping Store  0.07
3     Gas Station  0.07
4     Bus Station  0.07


----Barnet----
           venue  freq
0  Grocery Store  0.17
1    Golf Course  0.11
2       Bus Stop  0.06
3    Pizza Place  0.06
4           Café  0.06


----Bexley----
               venue  freq
0          Pet Store   0.1
1               Lake   0.1
2  Fish & Chips Shop   0.1
3      Grocery Store   0.1
4        Golf Course   0.1


----Brent----
               venue  freq
0  Fish & Chips Shop  0.13
1               Park  0.13
2        Yoga Studio  0.07
3                Bar  0.07
4                Pub  0.07


----Bromley----
                  venue  freq
0                  Park  0.11
1  Gym / Fitness Center  0.07
2    Italian Restaurant  0.07
3           Supermarket  0.07
4         Movie Theater  0.04


----Camden----
                venue  freq
0                Café  0.08
1         Coffee Shop  0.07
2               

In [85]:
#write to pandas dataframe
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [86]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = london_onehot_grouped['Neighbourhood']

for ind in np.arange(london_onehot_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(london_onehot_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Barking and Dagenham,Pub,Grocery Store,Liquor Store,Shipping Store,Chinese Restaurant,Train Station,Bookstore,Metro Station,Gas Station,Construction & Landscaping
1,Barnet,Grocery Store,Golf Course,Indian Restaurant,Athletics & Sports,Bus Stop,Stables,Market,Tennis Court,Beer Garden,Coffee Shop
2,Bexley,Museum,Lake,Pub,Grocery Store,Restaurant,Clothing Store,Home Service,Pet Store,Fish & Chips Shop,Golf Course
3,Brent,Park,Fish & Chips Shop,Yoga Studio,Bar,Pub,Department Store,Fast Food Restaurant,Café,Deli / Bodega,Metro Station
4,Bromley,Park,Italian Restaurant,Gym / Fitness Center,Supermarket,Hotel,Stationery Store,Food,Movie Theater,Fast Food Restaurant,Office


In [None]:
## Cluster Neighborhood

In [87]:
# set number of clusters
kclusters = 4

london_grouped_clustering = london_onehot_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(london_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 1, 3, 3, 1, 1, 1, 1, 3, 1], dtype=int32)

In [88]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)



In [90]:
london_merged = london_grouped 

# merge london data with london groups to add latitude/longitude for each neighborhood
london_merged = london_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='District')

london_merged.head()

Unnamed: 0,London zone,District,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1,Camden,51.524655,-0.129067,1,Café,Coffee Shop,Pub,Bakery,Italian Restaurant,Hotel,Bookstore,Pizza Place,Park,Museum
1,1,City of London,51.514622,-0.092233,1,Coffee Shop,Gym / Fitness Center,Italian Restaurant,Steakhouse,Wine Bar,Hotel,Seafood Restaurant,French Restaurant,Scenic Lookout,Sushi Restaurant
2,1,Hackney,51.528535,-0.083053,1,Coffee Shop,Café,Pub,Bar,Cocktail Bar,Hotel,Vietnamese Restaurant,Yoga Studio,Italian Restaurant,Burger Joint
3,1,Islington,51.526789,-0.106001,1,Pub,Café,Coffee Shop,Sandwich Place,Park,Pizza Place,Bar,Italian Restaurant,Wine Shop,Gym / Fitness Center
4,1,Kensington and Chelsea,51.49562,-0.180086,1,Hotel,Science Museum,Pub,Bakery,Exhibit,Garden,Coffee Shop,Italian Restaurant,Pizza Place,Café


In [91]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=13)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(london_merged['Latitude'], london_merged['Longitude'], london_merged['District'], london_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=9,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Look at each cluster to see the most common venues

In [92]:
#Cluser 0 
london_merged.loc[london_merged['Cluster Labels'] == 0, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,London zone,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
85,5,Chinese Restaurant,Bar,Indian Restaurant,Grocery Store,Event Service,Park,Fish & Chips Shop,Farmers Market,Fast Food Restaurant,Field
100,6,Chinese Restaurant,Bar,Indian Restaurant,Grocery Store,Event Service,Park,Fish & Chips Shop,Farmers Market,Fast Food Restaurant,Field
108,7,Chinese Restaurant,Bar,Indian Restaurant,Grocery Store,Event Service,Park,Fish & Chips Shop,Farmers Market,Fast Food Restaurant,Field


### Cluster 1 is located further away to East London. If Chinese food is popular, it might be possible that a high population of Chinese community is located there.  Seems like specific cuisines are in favour


In [93]:
# Cluster 1 
london_merged.loc[london_merged['Cluster Labels'] == 1, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,London zone,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1,Café,Coffee Shop,Pub,Bakery,Italian Restaurant,Hotel,Bookstore,Pizza Place,Park,Museum
1,1,Coffee Shop,Gym / Fitness Center,Italian Restaurant,Steakhouse,Wine Bar,Hotel,Seafood Restaurant,French Restaurant,Scenic Lookout,Sushi Restaurant
2,1,Coffee Shop,Café,Pub,Bar,Cocktail Bar,Hotel,Vietnamese Restaurant,Yoga Studio,Italian Restaurant,Burger Joint
3,1,Pub,Café,Coffee Shop,Sandwich Place,Park,Pizza Place,Bar,Italian Restaurant,Wine Shop,Gym / Fitness Center
4,1,Hotel,Science Museum,Pub,Bakery,Exhibit,Garden,Coffee Shop,Italian Restaurant,Pizza Place,Café
7,1,Coffee Shop,Hotel,Indian Restaurant,Pub,Clothing Store,Pizza Place,Café,Italian Restaurant,Flea Market,Canal Lock
8,1,Café,Pub,Coffee Shop,Grocery Store,Bus Stop,Gym / Fitness Center,Hotel,Platform,Park,Cocktail Bar
9,1,Clothing Store,Café,Hotel,Boutique,Italian Restaurant,Art Gallery,Pub,Coffee Shop,Lounge,Japanese Restaurant
10,2,Grocery Store,Golf Course,Indian Restaurant,Athletics & Sports,Bus Stop,Stables,Market,Tennis Court,Beer Garden,Coffee Shop
12,2,Café,Coffee Shop,Pub,Bakery,Italian Restaurant,Hotel,Bookstore,Pizza Place,Park,Museum


### Interesting to see cluster 0 is overwhelmed with cafe, leisure and restaurants. It is randomly distributed all over 7 zones in London. Here are more like shopping malls or busy streets where coffee shops are very popular


In [94]:
#Cluster 2
london_merged.loc[london_merged['Cluster Labels'] == 2, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,London zone,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
49,3,Bowling Alley,Middle Eastern Restaurant,Pub,Fast Food Restaurant,Hookah Bar,Pizza Place,Farmers Market,Field,Fish & Chips Shop,Flea Market
73,4,Bowling Alley,Middle Eastern Restaurant,Pub,Fast Food Restaurant,Hookah Bar,Pizza Place,Farmers Market,Field,Fish & Chips Shop,Flea Market
89,5,Bowling Alley,Middle Eastern Restaurant,Pub,Fast Food Restaurant,Hookah Bar,Pizza Place,Farmers Market,Field,Fish & Chips Shop,Flea Market


### Cluster 3 is mostly for Middle East population, bowling alley seems to be the local activity. Fastfood or Pub are more popular

In [95]:
# Cluster 3
london_merged.loc[london_merged['Cluster Labels'] == 3, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,London zone,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,1,Pub,Hotel,Restaurant,Gym / Fitness Center,Park,Portuguese Restaurant,Café,Art Gallery,Arts & Crafts Store,Nightclub
6,1,Pub,Park,Fast Food Restaurant,Café,Garden,Sandwich Place,Argentinian Restaurant,Italian Restaurant,Bus Stop,Cricket Ground
11,2,Park,Fish & Chips Shop,Yoga Studio,Bar,Pub,Department Store,Fast Food Restaurant,Café,Deli / Bodega,Metro Station
13,2,Pub,Café,Park,Hotel,Gym / Fitness Center,Mini Golf,Chinese Restaurant,Gas Station,Train Station,Scenic Lookout
21,2,Pub,Hotel,Restaurant,Gym / Fitness Center,Park,Portuguese Restaurant,Café,Art Gallery,Arts & Crafts Store,Nightclub
23,2,Bus Stop,Café,Fast Food Restaurant,Historic Site,Pub,Supermarket,Boutique,Park,Gym / Fitness Center,Waterfront
25,2,Pub,Park,Fast Food Restaurant,Café,Garden,Sandwich Place,Argentinian Restaurant,Italian Restaurant,Bus Stop,Cricket Ground
32,3,Park,Fish & Chips Shop,Yoga Studio,Bar,Pub,Department Store,Fast Food Restaurant,Café,Deli / Bodega,Metro Station
36,3,Pub,Café,Park,Hotel,Gym / Fitness Center,Mini Golf,Chinese Restaurant,Gas Station,Train Station,Scenic Lookout
45,3,Pub,Hotel,Restaurant,Gym / Fitness Center,Park,Portuguese Restaurant,Café,Art Gallery,Arts & Crafts Store,Nightclub


### Cluster 3 is quite similar to cluster 1 but with more pub - more "localness" in it. It goes up to the northwest area of London. 