# Notebook for Coursera Capstone project

In [7]:
import pandas as pd
import numpy as np

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
from matplotlib.colors import to_hex
import matplotlib.pyplot as plt

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

from sklearn.cluster import DBSCAN
import sklearn.utils
from sklearn.preprocessing import StandardScaler


print('Libraries imported.')

Libraries imported.


Setting city's geolocation.

In [11]:
address = 'Zurich'

geolocator = Nominatim(user_agent="zh_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Zurich are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Zurich are 47.3723941, 8.5423328.


Providing Foursquare credentials.

In [12]:
CLIENT_ID =
CLIENT_SECRET =
VERSION = '20180605' # Foursquare API version

Getting venues in a 5 km radius from the city center for the "food" category using the foursquare API and storing name, geolocation and category for each item.

In [13]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 5000 # define radius
categoryId = "4d4b7105d754a06374d81259" # "food" category id

venues_list=[]
for offset in [0, 100, 200, 300, 400, 500]:       
    # create the API request URL
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&categoryId={}&radius={}&limit={}&offset={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        latitude, 
        longitude,
        categoryId, 
        radius, 
        LIMIT,
        offset)
        
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    # return only relevant information for each nearby venue
    venues_list.append([( 
        v['venue']['name'], 
        v['venue']['location']['lat'], 
        v['venue']['location']['lng'],  
        v['venue']['categories'][0]['name']) for v in results])


zrh_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
zrh_venues.columns = [
                'Name', 
                'Latitude', 
                'Longitude', 
                'Category']

zrh_venues

Unnamed: 0,Name,Latitude,Longitude,Category
0,Coco Grill & Bar,47.368959,8.538431,Swiss Restaurant
1,Dachterrasse Hiltl,47.375686,8.539650,Vegetarian / Vegan Restaurant
2,Äss-Bar,47.372561,8.543693,Bakery
3,Chez Marion,47.374133,8.544701,French Restaurant
4,John Baker Ltd,47.367208,8.547293,Bakery
...,...,...,...,...
242,New Point Oerlikon,47.413928,8.545514,Falafel Restaurant
243,NOERD Kantine,47.413696,8.533357,Restaurant
244,Ruen Thai 2,47.385274,8.494748,Thai Restaurant
245,Food Center,47.390779,8.489297,Burger Joint


In [49]:
Displaying all the location on a map.

SyntaxError: invalid syntax (<ipython-input-49-ad00f51fd4be>, line 1)

In [14]:
# create map of Toronto using latitude and longitude values
map_zrh = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, name, category in zip(zrh_venues['Latitude'], zrh_venues['Longitude'], zrh_venues['Name'], zrh_venues['Category']):
    label = '{}, {}'.format(name, category)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_zrh)  
    
map_zrh

Assigning category codes for each food venue category. Codes are assigned in this way, that places of similar character have consecutive codes. The DBSCAN algorithm applied later will make use of that property to find clusters of similar venues.

In [15]:
cat_dict = {
"Swiss Restaurant": 0,

"Italian Restaurant": 10,
"Pizza Place": 11,
"Mediterranean Restaurant": 12,
"Trattoria/Osteria": 13,
"Greek Restaurant": 14,
"Seafood Restaurant": 15,

"Restaurant": 16,
"French Restaurant": 17,

"Vegetarian / Vegan Restaurant": 20,
"Salad Place": 21,
"Southern / Soul Food Restaurant": 22,
"Breakfast Spot": 23,
"Molecular Gastronomy Restaurant":24,
"Modern European Restaurant":25,

"Tapas Restaurant": 30,
"Spanish Restaurant": 31,
"Mexican Restaurant": 32,

"Middle Eastern Restaurant": 40,
"Lebanese Restaurant": 41,
"Ethiopian Restaurant": 42,
"Moroccan Restaurant": 43,
"Argentinian Restaurant": 44,

"Japanese Restaurant": 50,
"Thai Restaurant": 51,
"Indian Restaurant": 52,
"Asian Restaurant": 53,
"Sushi Restaurant": 54,
"Chinese Restaurant": 55,
"Vietnamese Restaurant": 56,
"Tibetan Restaurant": 57,
"Indonesian Restaurant": 58,

"Bagel Shop": 65,
"Diner": 66,
"BBQ Joint": 67,
"American Restaurant": 68,
"Steakhouse": 69,

"Burrito Place": 80,
"Falafel Restaurant": 81,
"Food Court": 82,
"Food Truck": 83,
"Fast Food Restaurant": 84,
"Sandwich Place": 85,
"Snack Place": 86,
"Burger Joint": 87,
"Kebab Restaurant": 88,
"Gastropub": 89,
"Doner Restaurant": 90,
"Bratwurst Joint": 91,

"Café": 100,
"Bakery": 101,
"Bistro": 102,
"Creperie": 103,
"Taverna": 110,
"Irish Pub": 111,
}

zrh_venues["CategoryCode"] = zrh_venues["Category"]

for cat in cat_dict.keys():
    zrh_venues["CategoryCode"] = zrh_venues["CategoryCode"].replace(cat, cat_dict[cat])

zrh_venues

Unnamed: 0,Name,Latitude,Longitude,Category,CategoryCode
0,Coco Grill & Bar,47.368959,8.538431,Swiss Restaurant,0
1,Dachterrasse Hiltl,47.375686,8.539650,Vegetarian / Vegan Restaurant,20
2,Äss-Bar,47.372561,8.543693,Bakery,101
3,Chez Marion,47.374133,8.544701,French Restaurant,17
4,John Baker Ltd,47.367208,8.547293,Bakery,101
...,...,...,...,...,...
242,New Point Oerlikon,47.413928,8.545514,Falafel Restaurant,81
243,NOERD Kantine,47.413696,8.533357,Restaurant,16
244,Ruen Thai 2,47.385274,8.494748,Thai Restaurant,51
245,Food Center,47.390779,8.489297,Burger Joint,87


Applying the DBSCAN algorithm to find clusters of venues of similar characteristics.

In [16]:
sklearn.utils.check_random_state(1000)
Clus_dataSet = zrh_venues[['Latitude','Longitude', 'CategoryCode']]
Clus_dataSet = np.nan_to_num(Clus_dataSet)
Clus_dataSet = StandardScaler().fit_transform(Clus_dataSet)

In [30]:
# Compute DBSCAN
db = DBSCAN(eps=0.2, min_samples=2).fit(Clus_dataSet)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_
zrh_venues["Clus_Db"]=labels

realClusterNum=len(set(labels)) - (1 if -1 in labels else 0)
clusterNum = len(set(labels)) 


# A sample of clusters
zrh_venues[["Name",'Latitude','Longitude','Category','CategoryCode',"Clus_Db"]]

Unnamed: 0,Name,Latitude,Longitude,Category,CategoryCode,Clus_Db
0,Coco Grill & Bar,47.368959,8.538431,Swiss Restaurant,0,0
1,Dachterrasse Hiltl,47.375686,8.539650,Vegetarian / Vegan Restaurant,20,1
2,Äss-Bar,47.372561,8.543693,Bakery,101,2
3,Chez Marion,47.374133,8.544701,French Restaurant,17,3
4,John Baker Ltd,47.367208,8.547293,Bakery,101,-1
...,...,...,...,...,...,...
242,New Point Oerlikon,47.413928,8.545514,Falafel Restaurant,81,-1
243,NOERD Kantine,47.413696,8.533357,Restaurant,16,-1
244,Ruen Thai 2,47.385274,8.494748,Thai Restaurant,51,-1
245,Food Center,47.390779,8.489297,Burger Joint,87,-1


Displaying obtained clusters on a map.

In [56]:
# create map of Toronto using latitude and longitude values
map_zrh = folium.Map(location=[latitude, longitude], zoom_start=10)

# create a color map
#colors = plt.cm.Spectral(np.linspace(0, 1, len(set(labels))), alpha=0.8)
#colors = plt.cm.viridis(np.linspace(0, 1, len(set(labels))),alpha=0.8)
colors = [] # need random colors
for i in range(len(set(labels))):
    colors.append(np.random.rand(3,))

# add markers to map
for lat, lng, name, category, code, clust_number in zip(zrh_venues['Latitude'], zrh_venues['Longitude'], zrh_venues['Name'], zrh_venues['Category'], zrh_venues['CategoryCode'], zrh_venues["Clus_Db"]):
    label = '{}, {}, {}, {}'.format(name, category, code, clust_number)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=("#7b7b7b" if clust_number == -1 else to_hex(colors[np.int(clust_number)])),
        fill=True,
        fill_color='gray',
        fill_opacity=0.5,
        parse_html=False).add_to(map_zrh)  
    
map_zrh