# Coffee Shop near British Museum
## Download Data

In [None]:
# import necessary libraries
import pandas as pd
import numpy as np
import json

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

import requests
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium

Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
geopy                     1.18.1                     py_0    conda-forge
Fetching package metadata .............
Solving package specifications: 

### Define Foursquare Credentials and Version

In [None]:
CLIENT_ID = 'HCIWEYMLE0SJAI3ESV4AFX5PNQVBSLP5HQ1YU4GISAHHRIFV' # your Foursquare ID
CLIENT_SECRET = 'P4KVBEVJDIVREULUPIZHUL124JX353PUIP5KWJOGX1PLDB5B' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 50

### Search for a specific venue category

#### Find British Museum's latitude and longitude

In [None]:
address = 'Great Russell St, Bloomsbury, London WC1B 3DG, UK'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

#### Search for coffee that is within 500 metres from the British Museum and define the corresponding URL

In [None]:
search_query = 'coffee'
radius = 500
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
url

#### Send the GET Request and examine the results

In [None]:
results = requests.get(url).json()
results

#### get relevant part of JSON and transform it into a pandas dataframe

In [None]:
# assign relevant part of JSON to venues
venues = results['response']['venues']

# transform venues into a dataframe
dataframe = json_normalize(venues)
dataframe.head()

#### Define information of interest and filter dataframe

In [None]:
# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')]
dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

dataframe_filtered

#### DataFrame Pre-processing

In [None]:
# remove rows with distance > 500 m
dataframe_filtered = dataframe_filtered[dataframe_filtered[('distance')]<=500]
dataframe_filtered.reset_index(drop=True,inplace=True)

dataframe_filtered

Seems like there are quite many coffee shops in radius 500m. Try to reduce to 400 m distance. 

In [None]:
# remove rows with distance > 400 m
dataframe_filtered = dataframe_filtered[dataframe_filtered[('distance')]<=400]
dataframe_filtered.reset_index(drop=True,inplace=True)

dataframe_filtered

Seems like now the amount of coffee shop is suitable for me.     
Let me check the data columns

In [None]:
dataframe_filtered.columns

There are too many unnecessary columns in the dataframe. Only extract the information I need, including coffee shope name, latitude, longitude, and distance to British Museum.

In [None]:
coffee_shop = dataframe_filtered[['name','lat','lng','distance']]
coffee_shop

## Explore coffee shop in the map

In [None]:
venues_map = folium.Map(location=[51.5184748, -0.1281749], zoom_start=15) # generate map centred around the British Museum

# add a red circle marker to represent the Museum
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    popup='British Museum',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

# add the Italian restaurants as blue circle markers
for lat, lng, label in zip(dataframe_filtered.lat, dataframe_filtered.lng, dataframe_filtered.categories):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

# display map
venues_map

## Clustser the shop

In [None]:
# set number of clusters
kclusters = 3

coffee_shop_clustering = coffee_shop.drop('name', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(coffee_shop_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

In [None]:
Cluster_Labels = pd.Series(kmeans.labels_)
coffee_shop = coffee_shop.assign(Cluster_Labels=Cluster_Labels.values)
coffee_shop.head()

## Visualize the resulting clusters

In [None]:
# create map
map_clusters = folium.Map(location=[51.5184748, -0.1281749], zoom_start=15)

# add a red circle marker to represent the Conrad Hotel
folium.features.CircleMarker(
    [latitude, longitude],
    radius=15,
    color='red',
    popup='British Museum',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(map_clusters)



# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(coffee_shop['lat'], coffee_shop['lng'], 
                                  coffee_shop['name'], 
                                  coffee_shop['Cluster_Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=7,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=1).add_to(map_clusters)
       
map_clusters

## Examine Clusters

In [None]:
Cluster0 = coffee_shop.loc[coffee_shop['Cluster_Labels'] == 0, coffee_shop.columns[[0,1,2,3] + list(range(5, coffee_shop.shape[1]))]]
Cluster0

In [None]:
Cluster1 = coffee_shop.loc[coffee_shop['Cluster_Labels'] == 1, coffee_shop.columns[[0,1,2,3] + list(range(5, coffee_shop.shape[1]))]]
Cluster1

In [None]:
Cluster2 = coffee_shop.loc[coffee_shop['Cluster_Labels'] == 2, coffee_shop.columns[[0,1,2,3] + list(range(5, coffee_shop.shape[1]))]]
Cluster2