# IBM Coursera Capstone Project

## Battle of the Neighbourhoods: Where in Chelsea, London should a company open a new restaurant?

### Import Libraries

In [341]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from bs4 import BeautifulSoup
import requests

from pandas.io.json import json_normalize

from sklearn.cluster import KMeans

import matplotlib.cm as cm
import matplotlib.colors as colors

### Scrape Data from the Wikipedia Page Into a Data Frame

In [342]:
url = requests.get("https://en.wikipedia.org/wiki/List_of_areas_of_London")
soup = BeautifulSoup(url.text, 'html.parser')
soup.prettify()

'<!DOCTYPE html>\n<html class="client-nojs" dir="ltr" lang="en">\n <head>\n  <meta charset="utf-8"/>\n  <title>\n   List of areas of London - Wikipedia\n  </title>\n  <script>\n   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgMonthNamesShort":["","Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"],"wgRequestId":"XmEQDwpAMNEAA0-yCZ4AAADT","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_areas_of_London","wgTitle":"List of areas of London","wgCurRevisionId":943969323,"wgRevisionId":943969323,"wgArticleId":11915713,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Use dmy dates from August 

In [343]:
table = soup.find('table',{'class':'wikitable sortable'})
table

<table class="wikitable sortable" style="clear:both;">
<tbody><tr>
<th>Location</th>
<th>London borough</th>
<th>Post town</th>
<th>Postcode district</th>
<th>Dial code</th>
<th>OS grid ref
</th></tr>
<tr>
<td><a href="/wiki/Abbey_Wood" title="Abbey Wood">Abbey Wood</a></td>
<td>Bexley,  Greenwich <sup class="reference" id="cite_ref-mills1_7-0"><a href="#cite_note-mills1-7">[7]</a></sup></td>
<td>LONDON</td>
<td>SE2</td>
<td>020</td>
<td><span class="plainlinks nourlexpansion" style="white-space: nowrap"><a class="external text" href="https://tools.wmflabs.org/geohack/en/51.48648031512;0.10859224316653_region:GB_scale:25000?pagename=List_of_areas_of_London">TQ465785</a></span>
</td></tr>
<tr>
<td><a href="/wiki/Acton,_London" title="Acton, London">Acton</a></td>
<td>Ealing, Hammersmith and Fulham<sup class="reference" id="cite_ref-mills2_8-0"><a href="#cite_note-mills2-8">[8]</a></sup></td>
<td>LONDON</td>
<td>W3, W4</td>
<td>020</td>
<td><span class="plainlinks nourlexpansion" style="

In [344]:
links = table.findAll('td')
links

[<td><a href="/wiki/Abbey_Wood" title="Abbey Wood">Abbey Wood</a></td>,
 <td>Bexley,  Greenwich <sup class="reference" id="cite_ref-mills1_7-0"><a href="#cite_note-mills1-7">[7]</a></sup></td>,
 <td>LONDON</td>,
 <td>SE2</td>,
 <td>020</td>,
 <td><span class="plainlinks nourlexpansion" style="white-space: nowrap"><a class="external text" href="https://tools.wmflabs.org/geohack/en/51.48648031512;0.10859224316653_region:GB_scale:25000?pagename=List_of_areas_of_London">TQ465785</a></span>
 </td>,
 <td><a href="/wiki/Acton,_London" title="Acton, London">Acton</a></td>,
 <td>Ealing, Hammersmith and Fulham<sup class="reference" id="cite_ref-mills2_8-0"><a href="#cite_note-mills2-8">[8]</a></sup></td>,
 <td>LONDON</td>,
 <td>W3, W4</td>,
 <td>020</td>,
 <td><span class="plainlinks nourlexpansion" style="white-space: nowrap"><a class="external text" href="https://tools.wmflabs.org/geohack/en/51.51058775877;-0.26498902668911_region:GB_scale:25000?pagename=List_of_areas_of_London">TQ205805</a></

In [345]:
Neighbourhood = []
Borough = []
PostTown = []
PostCodeDistrict = []
DialCode = []
GridRef = []

In [346]:
for i in range(0, len(links), 6):
    Neighbourhood.append(links[i].find(text=True))
    Borough.append(links[i+1].find(text=True))
    PostTown.append(links[i+2].find(text=True))
    PostCodeDistrict.append(links[i+3].find(text=True))
    DialCode.append(links[i+4].find(text=True))
    GridRef.append(links[i+5].find(text=True).rstrip())
    
df = pd.DataFrame(data=[Neighbourhood, Borough, PostTown, PostCodeDistrict, DialCode, GridRef]).transpose()
df.columns = ['Neighbourhood', 'Borough', 'PostTown', 'PostCode', 'DialCode', 'GridRef']
df.head(10)

Unnamed: 0,Neighbourhood,Borough,PostTown,PostCode,DialCode,GridRef
0,Abbey Wood,"Bexley, Greenwich",LONDON,SE2,20,TQ465785
1,Acton,"Ealing, Hammersmith and Fulham",LONDON,"W3, W4",20,TQ205805
2,Addington,Croydon,CROYDON,CR0,20,TQ375645
3,Addiscombe,Croydon,CROYDON,CR0,20,TQ345665
4,Albany Park,Bexley,"BEXLEY, SIDCUP","DA5, DA14",20,TQ478728
5,Aldborough Hatch,Redbridge,ILFORD,IG2,20,TQ455895
6,Aldgate,City,LONDON,EC3,20,TQ334813
7,Aldwych,Westminster,LONDON,WC2,20,TQ307810
8,Alperton,Brent,WEMBLEY,HA0,20,TQ185835
9,Anerley,Bromley,LONDON,SE20,20,TQ345695


#### Where there are locations with two postcodes, these locations will be split onto different rows

In [347]:
df1 = df.drop('PostCode', axis=1).join(df['PostCode'].str.split(',', expand=True).stack().reset_index(level=1, drop=True).rename('PostCode'))
df1.head()

Unnamed: 0,Neighbourhood,Borough,PostTown,DialCode,GridRef,PostCode
0,Abbey Wood,"Bexley, Greenwich",LONDON,20,TQ465785,SE2
1,Acton,"Ealing, Hammersmith and Fulham",LONDON,20,TQ205805,W3
1,Acton,"Ealing, Hammersmith and Fulham",LONDON,20,TQ205805,W4
2,Addington,Croydon,CROYDON,20,TQ375645,CR0
3,Addiscombe,Croydon,CROYDON,20,TQ345665,CR0


In [348]:
df1.shape

(636, 6)

#### Drop the columns we do not need, DialCode and GridRef

In [349]:
df1.drop(['DialCode', 'GridRef'], axis=1, inplace=True)
df1.head()

Unnamed: 0,Neighbourhood,Borough,PostTown,PostCode
0,Abbey Wood,"Bexley, Greenwich",LONDON,SE2
1,Acton,"Ealing, Hammersmith and Fulham",LONDON,W3
1,Acton,"Ealing, Hammersmith and Fulham",LONDON,W4
2,Addington,Croydon,CROYDON,CR0
3,Addiscombe,Croydon,CROYDON,CR0


### Use only the SW Postcodes

In [350]:
df_london = df1 # re-assigns to df_london
# Strips whitespaces before postcode
df_london.PostCode = df_london.PostCode.str.strip()
# New dataframe for South East London postcodes - df_se
df_sw = df_london[df_london['PostCode'].str.startswith(('SW'))].reset_index(drop=True)
df_sw.head()

Unnamed: 0,Neighbourhood,Borough,PostTown,PostCode
0,Balham,Wandsworth,LONDON,SW12
1,Barnes,Richmond upon Thames,LONDON,SW13
2,Battersea,Wandsworth,LONDON,SW11
3,Belgravia,Westminster,LONDON,SW1
4,Brixton,Lambeth,LONDON,SW2


In [351]:
df_sw.shape

(50, 4)

### Get the Latitude and Longitude coordinates of each Neighbourhood

Next, we will get the Latitude and Longitude coordinates of each Neighbourhood. To do this, we will use the Geocoder package with the arcgis_geocoder to obtain the location coordinates. We will first import the relevent packages, and then define a function to get the Lat/Long coordinates of each location. We will then be able to get the Lat/Long coordinates of any of the Post Codes in df1.

In [352]:
!pip -q install geopy
print('Geopy Installed')

Geopy Installed


In [353]:
!pip -q install geocoder
print('Geocoder Installed')

Geocoder Installed


In [354]:
import geocoder
from geopy.geocoders import Nominatim

In [355]:
def get_latlong(arcgis_geocoder):
    
    lat_long = None
    
    # While loop helps to create a continous run until all the location coordinates are geocoded
    while(lat_long is None):
        g = geocoder.arcgis('{}, London, United Kingdom'.format(arcgis_geocoder))
        lat_long = g.latlng
    return lat_long

#### Get the Lat/Long coordinates for the postcode 'CRO' to test the function works

In [356]:
example = get_latlong('SW12')
example

[51.44822000000005, -0.1483899999999494]

In [357]:
gg = geocoder.geocodefarm(example, method = 'reverse')
gg

<[OK] Geocodefarm - Reverse [Catherine Baird Court, 34 Balham High Road, London, SW12 9DJ, United Kingdom]>

#### Apply the function to the Data Frame

In [358]:
post_codes = df_sw['PostCode']    
coordinates = [get_latlong(post_code) for post_code in post_codes.tolist()]

In [359]:
df2 = df_sw

df_se_coordinates = pd.DataFrame(coordinates, columns = ['Latitude', 'Longitude'])
df2['Latitude'] = df_se_coordinates['Latitude']
df2['Longitude'] = df_se_coordinates['Longitude']
df2.head(5)

Unnamed: 0,Neighbourhood,Borough,PostTown,PostCode,Latitude,Longitude
0,Balham,Wandsworth,LONDON,SW12,51.44822,-0.14839
1,Barnes,Richmond upon Thames,LONDON,SW13,51.47457,-0.24212
2,Battersea,Wandsworth,LONDON,SW11,51.4676,-0.1629
3,Belgravia,Westminster,LONDON,SW1,51.49713,-0.13829
4,Brixton,Lambeth,LONDON,SW2,51.45295,-0.12083


In [360]:
df2.shape

(50, 6)

### Foursquare API Location Data

Now we can start our analysis on the final Data Frame. First, we need to get the venu location data for each Neighbourhood. We are most interested in the Movie Theatre category within the Arts and Entertainment category. We need to know exactly where there are already cinemas located, as the company does not want to open their cinema in close proximity to another due to the risk of decreased profit. When we know which Neighbourhoods have the least cinemas, we can use them for further analysis.

#### Firstly, for visualisation, we will create a map of Neighbourhoods in London using the Folium package

In [361]:
#Import the Folium Package
!conda install -c conda-forge folium=0.5.0 --yes
import folium

Solving environment: done

# All requested packages already installed.



In [362]:
#Get the Latitude and Longitude coordinates of London
address = 'London, United Kingdom'

geolocator = Nominatim(user_agent="london_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of London are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of London are 51.5073219, -0.1276474.


In [363]:
#Create the map of the Neighbourhoods in London
map_london = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough, neighbourhood, in zip(df2['Latitude'], df2['Longitude'], df2['Borough'], df2['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='purple',
        fill=True,
        fill_color='#3199cc',
        fill_opacity=0.3,
        parse_html=False).add_to(map_london)  
    
map_london

#### We will use Foursquare to obtain the venu data and create a function to extract the category data of the venues.

We need to get the Foursquare credentials and version

In [364]:
LIMIT = 1000
CLIENT_ID = '2PKR3OO53ZLSCZYTCDF1FQ12XN0HQS2LO21BMGPEGYKCMIYO' 
CLIENT_SECRET = 'MCE5RBGJJY3UZVHB1IB2F5JJSMWZLUNGXY0RXOU5URK3WWNO' 
VERSION = '20180605' 

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 2PKR3OO53ZLSCZYTCDF1FQ12XN0HQS2LO21BMGPEGYKCMIYO
CLIENT_SECRET:MCE5RBGJJY3UZVHB1IB2F5JJSMWZLUNGXY0RXOU5URK3WWNO


In [365]:
radius = 500
LIMIT = 1000

venues = []

for lat, long, neighbourhood in zip(df2['Latitude'], df2['Longitude'], df2['Neighbourhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighbourhood,
            lat, 
            long,
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))


In [366]:
# convert the venues list into a new DataFrame
London_venues = pd.DataFrame(venues)

# define the column names
London_venues.columns = ['Neighborhood', 'Latitude', 'Longitude', 'Venue Name', 'Venue Latitude', 'Venue Longitude', 'Venue Category']

print("The venues data set contains",London_venues.shape[0],"venues.")
London_venues.head()

The venues data set contains 2539 venues.


Unnamed: 0,Neighborhood,Latitude,Longitude,Venue Name,Venue Latitude,Venue Longitude,Venue Category
0,Balham,51.44822,-0.14839,Ciullosteria,51.447144,-0.148981,Italian Restaurant
1,Balham,51.44822,-0.14839,M1LK,51.44445,-0.150913,Coffee Shop
2,Balham,51.44822,-0.14839,We Brought Beer,51.444324,-0.150656,Beer Store
3,Balham,51.44822,-0.14839,Brickwood Coffee & Bread,51.444509,-0.151127,Coffee Shop
4,Balham,51.44822,-0.14839,The Grove,51.448286,-0.150964,Pub


In [367]:
unique = London_venues['Venue Category'].value_counts().to_frame(name='Count')
unique.head(10)

Unnamed: 0,Count
Coffee Shop,174
Pub,153
Hotel,124
Café,112
Italian Restaurant,85
Sandwich Place,67
Indian Restaurant,67
Bar,62
Gym / Fitness Center,62
Bakery,53


#### Extracting only Restaurants

In [368]:
df_restaurants = London_venues.loc[London_venues['Venue Category'].str.contains('Restaurant') == True]
df_restaurants.head(10)

Unnamed: 0,Neighborhood,Latitude,Longitude,Venue Name,Venue Latitude,Venue Longitude,Venue Category
0,Balham,51.44822,-0.14839,Ciullosteria,51.447144,-0.148981,Italian Restaurant
9,Balham,51.44822,-0.14839,The Georgian,51.452242,-0.147677,Caucasian Restaurant
16,Balham,51.44822,-0.14839,Holy Cow,51.447371,-0.149012,Indian Restaurant
17,Balham,51.44822,-0.14839,The Kebab Company,51.445773,-0.150407,Fast Food Restaurant
20,Balham,51.44822,-0.14839,Nando's,51.445837,-0.150603,Portuguese Restaurant
29,Balham,51.44822,-0.14839,Megan’s On The Hill,51.443974,-0.150208,Restaurant
31,Balham,51.44822,-0.14839,Chatkhara,51.452637,-0.147465,Indian Restaurant
33,Balham,51.44822,-0.14839,Chicken Cottage,51.452127,-0.147835,Fast Food Restaurant
37,Barnes,51.47457,-0.24212,Awesome Thai Cuisine,51.474905,-0.240909,Thai Restaurant
45,Barnes,51.47457,-0.24212,Côte Brasserie,51.472424,-0.246774,French Restaurant


In [369]:
df_restaurants.shape

(597, 7)

#### Create a Folium map of the Restaurant Data Frame

In [370]:
map_restaurants = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, lng, neighbourhood, venue in zip(df_restaurants['Venue Latitude'], df_restaurants['Venue Longitude'], df_restaurants['Neighborhood'], df_restaurants['Venue Name']):
    label = '{}, {}'.format(neighbourhood, venue)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='purple',
        fill=True,
        fill_color='#3199cc',
        fill_opacity=0.3,
        parse_html=False).add_to(map_restaurants)  
    
map_restaurants

#### We will now analyse each venue in their neighbourhood locations

First, we will use one hot encoding to turn the two venue categories into numerical data. 

In [301]:
london_onehot = pd.get_dummies(London_venues[['Venue Category']], prefix = "", prefix_sep = "")
london_onehot['Neighborhood'] = London_venues['Neighborhood']
fixed_columns = [london_onehot.columns[-1]] + list(london_onehot.columns[:-1])
london_onehot = london_onehot[fixed_columns]

In [302]:
london_onehot.head(5)

Unnamed: 0,Neighborhood,American Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Australian Restaurant,Auto Garage,BBQ Joint,Bagel Shop,Bakery,Bar,Bed & Breakfast,Beer Bar,Beer Store,Bike Shop,Bistro,Bookstore,Boutique,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Burger Joint,Burrito Place,Bus Stop,Café,Camera Store,Candy Store,Caribbean Restaurant,Caucasian Restaurant,Champagne Bar,Cheese Shop,Chinese Restaurant,Chocolate Shop,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Quad,Concert Hall,Convenience Store,Convention Center,Cosmetics Shop,Creperie,Cupcake Shop,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Donut Shop,Electronics Store,English Restaurant,Exhibit,Fabric Shop,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Garden,Gastropub,Gay Bar,Gift Shop,Gourmet Shop,Grilled Meat Restaurant,Grocery Store,Gym,Gym / Fitness Center,Health & Beauty Service,Health Food Store,Historic Site,Hookah Bar,Hostel,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Irish Pub,Italian Restaurant,Japanese Restaurant,Juice Bar,Kids Store,Kitchen Supply Store,Korean Restaurant,Lake,Lebanese Restaurant,Library,Lounge,Market,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Modern European Restaurant,Moroccan Restaurant,Movie Theater,Multiplex,Music Venue,Nightclub,Office,Optical Shop,Pakistani Restaurant,Park,Pedestrian Plaza,Performing Arts Venue,Peruvian Restaurant,Pharmacy,Pizza Place,Playground,Plaza,Polish Restaurant,Portuguese Restaurant,Pub,Restaurant,River,Road,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,Science Museum,Seafood Restaurant,Shop & Service,Skate Park,Skating Rink,South American Restaurant,South Indian Restaurant,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Stationery Store,Steakhouse,Street Food Gathering,Student Center,Supermarket,Sushi Restaurant,Tapas Restaurant,Tea Room,Tennis Court,Thai Restaurant,Theater,Trail,Train Station,Tram Station,Turkish Restaurant,University,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Windmill,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Balham,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Balham,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Balham,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Balham,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Balham,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [303]:
london_onehot.shape

(2539, 169)

In [313]:
london_onehot.loc[london_onehot['Asian Restaurant'] != 0].head()

Unnamed: 0,Neighborhood,American Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Australian Restaurant,Auto Garage,BBQ Joint,Bagel Shop,Bakery,Bar,Bed & Breakfast,Beer Bar,Beer Store,Bike Shop,Bistro,Bookstore,Boutique,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Burger Joint,Burrito Place,Bus Stop,Café,Camera Store,Candy Store,Caribbean Restaurant,Caucasian Restaurant,Champagne Bar,Cheese Shop,Chinese Restaurant,Chocolate Shop,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Quad,Concert Hall,Convenience Store,Convention Center,Cosmetics Shop,Creperie,Cupcake Shop,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Donut Shop,Electronics Store,English Restaurant,Exhibit,Fabric Shop,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Garden,Gastropub,Gay Bar,Gift Shop,Gourmet Shop,Grilled Meat Restaurant,Grocery Store,Gym,Gym / Fitness Center,Health & Beauty Service,Health Food Store,Historic Site,Hookah Bar,Hostel,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Irish Pub,Italian Restaurant,Japanese Restaurant,Juice Bar,Kids Store,Kitchen Supply Store,Korean Restaurant,Lake,Lebanese Restaurant,Library,Lounge,Market,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Modern European Restaurant,Moroccan Restaurant,Movie Theater,Multiplex,Music Venue,Nightclub,Office,Optical Shop,Pakistani Restaurant,Park,Pedestrian Plaza,Performing Arts Venue,Peruvian Restaurant,Pharmacy,Pizza Place,Playground,Plaza,Polish Restaurant,Portuguese Restaurant,Pub,Restaurant,River,Road,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,Science Museum,Seafood Restaurant,Shop & Service,Skate Park,Skating Rink,South American Restaurant,South Indian Restaurant,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Stationery Store,Steakhouse,Street Food Gathering,Student Center,Supermarket,Sushi Restaurant,Tapas Restaurant,Tea Room,Tennis Court,Thai Restaurant,Theater,Trail,Train Station,Tram Station,Turkish Restaurant,University,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Windmill,Wine Bar,Wine Shop,Women's Store,Yoga Studio
458,Clapham,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
507,Colliers Wood,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
664,Earls Court,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
674,Earls Court,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
682,Earlsfield,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [314]:
London_grouped = london_onehot.groupby('Neighborhood').mean().reset_index()
London_grouped.head()

Unnamed: 0,Neighborhood,American Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Australian Restaurant,Auto Garage,BBQ Joint,Bagel Shop,Bakery,Bar,Bed & Breakfast,Beer Bar,Beer Store,Bike Shop,Bistro,Bookstore,Boutique,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Burger Joint,Burrito Place,Bus Stop,Café,Camera Store,Candy Store,Caribbean Restaurant,Caucasian Restaurant,Champagne Bar,Cheese Shop,Chinese Restaurant,Chocolate Shop,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Quad,Concert Hall,Convenience Store,Convention Center,Cosmetics Shop,Creperie,Cupcake Shop,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Donut Shop,Electronics Store,English Restaurant,Exhibit,Fabric Shop,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Garden,Gastropub,Gay Bar,Gift Shop,Gourmet Shop,Grilled Meat Restaurant,Grocery Store,Gym,Gym / Fitness Center,Health & Beauty Service,Health Food Store,Historic Site,Hookah Bar,Hostel,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Irish Pub,Italian Restaurant,Japanese Restaurant,Juice Bar,Kids Store,Kitchen Supply Store,Korean Restaurant,Lake,Lebanese Restaurant,Library,Lounge,Market,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Modern European Restaurant,Moroccan Restaurant,Movie Theater,Multiplex,Music Venue,Nightclub,Office,Optical Shop,Pakistani Restaurant,Park,Pedestrian Plaza,Performing Arts Venue,Peruvian Restaurant,Pharmacy,Pizza Place,Playground,Plaza,Polish Restaurant,Portuguese Restaurant,Pub,Restaurant,River,Road,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,Science Museum,Seafood Restaurant,Shop & Service,Skate Park,Skating Rink,South American Restaurant,South Indian Restaurant,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Stationery Store,Steakhouse,Street Food Gathering,Student Center,Supermarket,Sushi Restaurant,Tapas Restaurant,Tea Room,Tennis Court,Thai Restaurant,Theater,Trail,Train Station,Tram Station,Turkish Restaurant,University,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Windmill,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Balham,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.028571,0.0,0.0,0.028571,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.114286,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.085714,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.028571,0.085714,0.028571,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.028571
1,Barnes,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.090909,0.0,0.0,0.045455,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.090909,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Battersea,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.04,0.04,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.08,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.08,0.04,0.0,0.0,0.04,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0
3,Belgravia,0.0,0.0,0.010101,0.0,0.010101,0.0,0.010101,0.0,0.0,0.0,0.010101,0.010101,0.0,0.010101,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.010101,0.020202,0.0,0.0,0.020202,0.010101,0.0,0.0,0.0,0.0,0.010101,0.020202,0.0,0.0,0.0,0.010101,0.10101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.020202,0.0,0.010101,0.0,0.0,0.0,0.0,0.020202,0.0,0.0,0.0,0.010101,0.0,0.010101,0.0,0.0,0.010101,0.0,0.0,0.010101,0.0,0.0,0.0,0.010101,0.030303,0.0,0.0,0.020202,0.0,0.0,0.090909,0.020202,0.0,0.020202,0.0,0.0,0.0,0.030303,0.0,0.020202,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.010101,0.0,0.0,0.010101,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.010101,0.010101,0.0,0.0,0.010101,0.030303,0.030303,0.0,0.0,0.070707,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020202,0.0,0.0,0.0,0.010101,0.0,0.020202,0.020202,0.010101,0.010101,0.0,0.010101,0.040404,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0
4,Brixton,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.115385,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.038462,0.153846,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0


In [305]:
num_top_venues = 4 # Top common venues needed
for hood in London_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = London_grouped[London_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue', 'freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending = False).reset_index(drop = True).head(num_top_venues))
    print('\n')

----Balham----
                  venue  freq
0           Coffee Shop  0.11
1         Grocery Store  0.09
2                   Pub  0.09
3  Fast Food Restaurant  0.06


----Barnes----
               venue  freq
0        Pizza Place  0.09
1     Farmers Market  0.09
2               Park  0.09
3  Food & Drink Shop  0.09


----Battersea----
               venue  freq
0               Café  0.12
1                Pub  0.08
2  Indian Restaurant  0.08
3        Supermarket  0.08


----Belgravia----
            venue  freq
0     Coffee Shop  0.10
1           Hotel  0.09
2  Sandwich Place  0.07
3         Theater  0.04


----Brixton----
                  venue  freq
0                   Pub  0.15
1           Coffee Shop  0.12
2     Indian Restaurant  0.08
3  Caribbean Restaurant  0.08


----Brompton----
                 venue  freq
0                 Café  0.06
1                  Pub  0.05
2                Hotel  0.05
3  Japanese Restaurant  0.05


----Castelnau----
               venue  freq
0        

In [306]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending = False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [307]:
num_top_venues = 10
indicators = ['st', 'nd', 'rd']
# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighborhood'] = London_grouped['Neighborhood']
for ind in np.arange(London_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(London_grouped.iloc[ind, :], num_top_venues)
neighbourhoods_venues_sorted.head(5)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Balham,Coffee Shop,Grocery Store,Pub,Bakery,Indian Restaurant,Fast Food Restaurant,Burger Joint,Café,Caucasian Restaurant,Shop & Service
1,Barnes,Food & Drink Shop,Farmers Market,Pub,Pizza Place,Park,Bakery,French Restaurant,Thai Restaurant,Movie Theater,Gastropub
2,Battersea,Café,Bar,Supermarket,Pub,Indian Restaurant,Seafood Restaurant,Breakfast Spot,Sandwich Place,Restaurant,Italian Restaurant
3,Belgravia,Coffee Shop,Hotel,Sandwich Place,Theater,Restaurant,Gym / Fitness Center,Italian Restaurant,Pub,Hotel Bar,Sporting Goods Shop
4,Brixton,Pub,Coffee Shop,Indian Restaurant,Caribbean Restaurant,Portuguese Restaurant,Bike Shop,Convenience Store,Pizza Place,Restaurant,Music Venue


### Cluster the Neighbourhoods

In [315]:
london_grouped_clustering = London_grouped.drop('Neighborhood', 1)

In [316]:
# set number of clusters
kclusters = 5
# run k-means clustering
kmeans = KMeans(n_clusters = kclusters, random_state=0).fit(london_grouped_clustering)
# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([0, 4, 0, 3, 0, 0, 4, 0, 0, 0, 3, 0, 0, 0, 3, 0, 3, 0, 3, 0, 2, 1,
       2, 0, 3, 0, 0, 0, 0, 3, 0, 0, 3, 2, 1, 0, 0, 2, 0, 0, 3, 0],
      dtype=int32)

In [320]:
London_merged = df2
# match/merge SE London data with latitude/longitude for each neighborhood
london_merged_latlong = London_merged.join(neighbourhoods_venues_sorted.set_index('Neighborhood'), on = 'Neighbourhood')
london_merged_latlong.head(5)

Unnamed: 0,Neighbourhood,Borough,PostTown,PostCode,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,Cluster Labels,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Balham,Wandsworth,LONDON,SW12,51.44822,-0.14839,Coffee Shop,Grocery Store,Pub,0,Bakery,Indian Restaurant,Fast Food Restaurant,Burger Joint,Café,Caucasian Restaurant,Shop & Service
1,Barnes,Richmond upon Thames,LONDON,SW13,51.47457,-0.24212,Food & Drink Shop,Farmers Market,Pub,4,Pizza Place,Park,Bakery,French Restaurant,Thai Restaurant,Movie Theater,Gastropub
2,Battersea,Wandsworth,LONDON,SW11,51.4676,-0.1629,Café,Bar,Supermarket,0,Pub,Indian Restaurant,Seafood Restaurant,Breakfast Spot,Sandwich Place,Restaurant,Italian Restaurant
3,Belgravia,Westminster,LONDON,SW1,51.49713,-0.13829,Coffee Shop,Hotel,Sandwich Place,3,Theater,Restaurant,Gym / Fitness Center,Italian Restaurant,Pub,Hotel Bar,Sporting Goods Shop
4,Brixton,Lambeth,LONDON,SW2,51.45295,-0.12083,Pub,Coffee Shop,Indian Restaurant,0,Caribbean Restaurant,Portuguese Restaurant,Bike Shop,Convenience Store,Pizza Place,Restaurant,Music Venue


#### Visualise the resulting clusters

In [372]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(london_merged_latlong['Latitude'], london_merged_latlong['Longitude'], london_merged_latlong['Neighbourhood'], london_merged_latlong['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=20,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.8).add_to(map_clusters)
       
map_clusters

### Examine each cluster

#### Cluster 1

In [335]:
london_merged_latlong.loc[london_merged_latlong['Cluster Labels'] == 0, london_merged_latlong.columns[[1] + list(range(5, london_merged_latlong.shape[1]))]]

Unnamed: 0,Borough,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,Cluster Labels,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Wandsworth,-0.14839,Coffee Shop,Grocery Store,Pub,0,Bakery,Indian Restaurant,Fast Food Restaurant,Burger Joint,Café,Caucasian Restaurant,Shop & Service
2,Wandsworth,-0.1629,Café,Bar,Supermarket,0,Pub,Indian Restaurant,Seafood Restaurant,Breakfast Spot,Sandwich Place,Restaurant,Italian Restaurant
4,Lambeth,-0.12083,Pub,Coffee Shop,Indian Restaurant,0,Caribbean Restaurant,Portuguese Restaurant,Bike Shop,Convenience Store,Pizza Place,Restaurant,Music Venue
5,Lambeth,-0.11249,Pub,Coffee Shop,Indian Restaurant,0,Caribbean Restaurant,Portuguese Restaurant,Bike Shop,Convenience Store,Pizza Place,Restaurant,Music Venue
6,Kensington and Chelsea,-0.16248,Café,Pub,Hotel,0,Japanese Restaurant,Clothing Store,Coffee Shop,Gym / Fitness Center,English Restaurant,Bakery,Plaza
8,Kensington and Chelsea,-0.16248,Café,Pub,Hotel,0,Japanese Restaurant,Clothing Store,Coffee Shop,Gym / Fitness Center,English Restaurant,Bakery,Plaza
9,"Lambeth, Wandsworth",-0.13922,Pub,Café,Burger Joint,0,Bar,Grocery Store,Gym / Fitness Center,Italian Restaurant,Coffee Shop,Restaurant,Bus Stop
10,Merton,-0.20796,Bar,Coffee Shop,Pub,0,Sushi Restaurant,Clothing Store,Sandwich Place,Indian Restaurant,Optical Shop,Grocery Store,Italian Restaurant
13,Wandsworth,-0.18997,Coffee Shop,Pub,Café,0,Supermarket,Gym,Park,Tennis Court,Fast Food Restaurant,Chocolate Shop,Burger Joint
14,Richmond upon Thames,-0.26591,Coffee Shop,Grocery Store,Pub,0,Pizza Place,American Restaurant,Beer Store,Pharmacy,Creperie,Chinese Restaurant,Middle Eastern Restaurant


#### Cluster 2

In [336]:
london_merged_latlong.loc[london_merged_latlong['Cluster Labels'] == 1, london_merged_latlong.columns[[1] + list(range(5, london_merged_latlong.shape[1]))]]

Unnamed: 0,Borough,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,Cluster Labels,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
23,Croydon,-0.12753,Pub,Bar,Pizza Place,1,Park,Diner,Burger Joint,Supermarket,Playground,Video Game Store,Grilled Meat Restaurant
41,Lambeth,-0.12753,Pub,Bar,Pizza Place,1,Park,Diner,Burger Joint,Supermarket,Playground,Video Game Store,Grilled Meat Restaurant


#### Cluster 3

In [337]:
london_merged_latlong.loc[london_merged_latlong['Cluster Labels'] == 2, london_merged_latlong.columns[[1] + list(range(5, london_merged_latlong.shape[1]))]]

Unnamed: 0,Borough,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,Cluster Labels,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,Wandsworth,-0.13056,Pub,Portuguese Restaurant,Gym / Fitness Center,2,Fish & Chips Shop,Indian Restaurant,Pizza Place,Performing Arts Venue,Bakery,Coffee Shop,Bus Stop
24,Lambeth,-0.13056,Pub,Portuguese Restaurant,Indian Restaurant,2,Bakery,Park,Gym / Fitness Center,Pizza Place,Performing Arts Venue,Restaurant,Caribbean Restaurant
25,Lambeth,-0.11249,Pub,Portuguese Restaurant,Indian Restaurant,2,Bakery,Park,Gym / Fitness Center,Pizza Place,Performing Arts Venue,Restaurant,Caribbean Restaurant
39,Lambeth,-0.13056,Pub,Portuguese Restaurant,Indian Restaurant,2,Bakery,Park,Gym / Fitness Center,Pizza Place,Performing Arts Venue,Restaurant,Caribbean Restaurant
40,Lambeth,-0.11249,Pub,Portuguese Restaurant,Indian Restaurant,2,Bakery,Park,Gym / Fitness Center,Pizza Place,Performing Arts Venue,Restaurant,Caribbean Restaurant
44,Lambeth,-0.13056,Pub,Portuguese Restaurant,Gym / Fitness Center,2,Fish & Chips Shop,Indian Restaurant,Pizza Place,Performing Arts Venue,Bakery,Coffee Shop,Bus Stop


#### Cluster 4

In [338]:
london_merged_latlong.loc[london_merged_latlong['Cluster Labels'] == 3, london_merged_latlong.columns[[1] + list(range(5, london_merged_latlong.shape[1]))]]

Unnamed: 0,Borough,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,Cluster Labels,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Westminster,-0.13829,Coffee Shop,Hotel,Sandwich Place,3,Theater,Restaurant,Gym / Fitness Center,Italian Restaurant,Pub,Hotel Bar,Sporting Goods Shop
11,Kensington and Chelsea,-0.18971,Hotel,Italian Restaurant,Garden,3,Pub,Grocery Store,Café,Cocktail Bar,Juice Bar,Indian Restaurant,Chinese Restaurant
12,Kensington and Chelsea,-0.18144,Hotel,Italian Restaurant,Garden,3,Pub,Grocery Store,Café,Cocktail Bar,Juice Bar,Indian Restaurant,Chinese Restaurant
16,Kensington and Chelsea,-0.17404,Café,Exhibit,Science Museum,3,Hotel,Italian Restaurant,Garden,Bar,Gift Shop,Bakery,Ice Cream Shop
18,Westminster,-0.13829,Coffee Shop,Hotel,Sandwich Place,3,Theater,Restaurant,Gym / Fitness Center,Italian Restaurant,Pub,Hotel Bar,Sporting Goods Shop
20,Westminster,-0.13829,Coffee Shop,Hotel,Sandwich Place,3,Theater,Restaurant,Gym / Fitness Center,Italian Restaurant,Pub,Hotel Bar,Sporting Goods Shop
27,Westminster,-0.13829,Coffee Shop,Hotel,Sandwich Place,3,Theater,Restaurant,Gym / Fitness Center,Italian Restaurant,Pub,Hotel Bar,Sporting Goods Shop
32,Kensington and Chelsea,-0.17404,Hotel,Café,Exhibit,3,Pub,Italian Restaurant,Garden,Science Museum,Japanese Restaurant,Bakery,Coffee Shop
33,Kensington and Chelsea,-0.16248,Hotel,Café,Exhibit,3,Pub,Italian Restaurant,Garden,Science Museum,Japanese Restaurant,Bakery,Coffee Shop
34,Kensington and Chelsea,-0.18971,Hotel,Café,Exhibit,3,Pub,Italian Restaurant,Garden,Science Museum,Japanese Restaurant,Bakery,Coffee Shop


#### Cluster 5

In [339]:
london_merged_latlong.loc[london_merged_latlong['Cluster Labels'] == 4, london_merged_latlong.columns[[1] + list(range(5, london_merged_latlong.shape[1]))]]

Unnamed: 0,Borough,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,Cluster Labels,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Richmond upon Thames,-0.24212,Food & Drink Shop,Farmers Market,Pub,4,Pizza Place,Park,Bakery,French Restaurant,Thai Restaurant,Movie Theater,Gastropub
7,Richmond upon Thames,-0.24212,Food & Drink Shop,Farmers Market,Pub,4,Pizza Place,Park,Bakery,French Restaurant,Thai Restaurant,Movie Theater,Gastropub
