# The Battle of Neighborhoods

## A New Restaurant in Vancouver, BC

### Summary:
There is a businessperson (stakeholder), who is looking forward to opening a french cuisine restaurant in the city of Vancouver, British Columbia, Canada.

The current project aims to find the most suitable location for a french cuisine restaurant in Vancouver.

### Import all libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


### Create Dataframe

The CSV file was uploaded to IBM Watson Studio.

In [2]:
# The code was removed by Watson Studio for sharing.

Unnamed: 0,MAPID,NAME
0,SUN,Sunset
1,MP,Mount Pleasant
2,RP,Riley Park
3,CBD,Downtown
4,KITS,Kitsilano


In [3]:
df_data_1.shape #shape of dataframe

(22, 2)

### Clean data

In [4]:
#Create new column 'LocalArea' and Add 'Vancouver, BC' to each Local Area to get more precise coordinates for each one
df_data_1['LocalArea'] = [str(x)+', Vancouver, BC' for x in df_data_1['NAME']] #add ', Vancouver, BC' after local Area name

In [5]:
df_data_1.head() #read first 5 rows

Unnamed: 0,MAPID,NAME,LocalArea
0,SUN,Sunset,"Sunset, Vancouver, BC"
1,MP,Mount Pleasant,"Mount Pleasant, Vancouver, BC"
2,RP,Riley Park,"Riley Park, Vancouver, BC"
3,CBD,Downtown,"Downtown, Vancouver, BC"
4,KITS,Kitsilano,"Kitsilano, Vancouver, BC"


In [6]:
#Drop column "NAME" because "LocalArea" column has the complete location name
df_data_1 = df_data_1.drop("NAME", axis=1)
df_data_1.head()

Unnamed: 0,MAPID,LocalArea
0,SUN,"Sunset, Vancouver, BC"
1,MP,"Mount Pleasant, Vancouver, BC"
2,RP,"Riley Park, Vancouver, BC"
3,CBD,"Downtown, Vancouver, BC"
4,KITS,"Kitsilano, Vancouver, BC"


### Get latitude and longitude of each local area

In [7]:
#getting latitude and longitude for each LocalArea
geolocator = Nominatim(user_agent="va1_explorer")

df2 = df_data_1.copy()

latlon = df2.LocalArea.apply(lambda addr: geolocator.geocode(addr))

df2["Latitude"] = [x.latitude for x in latlon]
df2["Longitude"] = [x.longitude for x in latlon]

In [8]:
df2 #viewing the new dataframe 

Unnamed: 0,MAPID,LocalArea,Latitude,Longitude
0,SUN,"Sunset, Vancouver, BC",49.219593,-123.090239
1,MP,"Mount Pleasant, Vancouver, BC",49.26333,-123.096588
2,RP,"Riley Park, Vancouver, BC",49.247438,-123.102966
3,CBD,"Downtown, Vancouver, BC",49.283393,-123.117456
4,KITS,"Kitsilano, Vancouver, BC",49.26941,-123.155267
5,DS,"Dunbar-Southlands, Vancouver, BC",49.25346,-123.185044
6,KERR,"Kerrisdale, Vancouver, BC",49.234673,-123.155389
7,AR,"Arbutus-Ridge, Vancouver, BC",49.240968,-123.167001
8,WPG,"West Point Grey, Vancouver, BC",49.264484,-123.185433
9,MARP,"Marpole, Vancouver, BC",49.209223,-123.13615


In [9]:
#checking shape of new dataframe
df2.shape

(22, 4)

### Create a map of Vancouver with its Local Areas

Find coordinates of Vancouver

In [10]:
address_van = 'Vancouver, BC'
geolocator = Nominatim(user_agent="van_explorer")
location_van = geolocator.geocode(address_van)
latitude_van = location_van.latitude
longitude_van = location_van.longitude
print('The geograpical coordinates of Vancouver are {}, {}.'.format(latitude_van, longitude_van))

The geograpical coordinates of Vancouver are 49.2608724, -123.1139529.


Visualise all the Local Areas in Vancouver

In [11]:
# create map using latitude and longitude
map_vancouver = folium.Map(location=[latitude_van, longitude_van], zoom_start=11)

# add markers to map
for lat, lng, LocalArea in zip(df2['Latitude'], df2['Longitude'], df2['LocalArea']):
    label = '{}'.format(LocalArea)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_vancouver)  
    
map_vancouver #vancouver map displays local areas

### Define Foursquare API and Version

In [12]:
# The code was removed by Watson Studio for sharing.

Credentails and Version set!


### Explore all Local Areas in Vancouver

Get the top 100 venues within radius=500m in each Local Area of Vancouver

In [13]:
#Define function to get venues
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['LocalArea', 
                  'LocalArea Latitude', 
                  'LocalArea Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [14]:
#top 100 venues
LIMIT = 100
van_venues = getNearbyVenues(names=df2['LocalArea'],
                                   latitudes=df2['Latitude'],
                                   longitudes=df2['Longitude']
                                  )

Sunset, Vancouver, BC
Mount Pleasant, Vancouver, BC
Riley Park, Vancouver, BC
Downtown, Vancouver, BC
Kitsilano, Vancouver, BC
Dunbar-Southlands, Vancouver, BC
Kerrisdale, Vancouver, BC
Arbutus-Ridge, Vancouver, BC
West Point Grey, Vancouver, BC
Marpole, Vancouver, BC
Oakridge, Vancouver, BC
Shaughnessy, Vancouver, BC
Fairview, Vancouver, BC
South Cambie, Vancouver, BC
West End, Vancouver, BC
Killarney, Vancouver, BC
Renfrew-Collingwood, Vancouver, BC
Hastings-Sunrise, Vancouver, BC
Victoria-Fraserview, Vancouver, BC
Kensington-Cedar Cottage, Vancouver, BC
Strathcona, Vancouver, BC
Grandview-Woodland, Vancouver, BC


In [15]:
print('The shape of the new dataframe is:',van_venues.shape)
print('The content of the new dataframe is the following:')
van_venues.head()

The shape of the new dataframe is: (661, 7)
The content of the new dataframe is the following:


Unnamed: 0,LocalArea,LocalArea Latitude,LocalArea Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Sunset, Vancouver, BC",49.219593,-123.090239,Kalai's Dosa Hut,49.218998,-123.09116,South Indian Restaurant
1,"Sunset, Vancouver, BC",49.219593,-123.090239,New Novelty Restaurant and Sweets,49.223925,-123.090885,Dessert Shop
2,"Sunset, Vancouver, BC",49.219593,-123.090239,Vision 2000,49.224026,-123.090798,Cosmetics Shop
3,"Sunset, Vancouver, BC",49.219593,-123.090239,Sartaj Sweets & Restaurant,49.224046,-123.090841,Indian Restaurant
4,"Mount Pleasant, Vancouver, BC",49.26333,-123.096588,Dude Chilling Park,49.26373,-123.096796,Outdoor Sculpture


In [16]:
print('There are {} uniques categories.'.format(len(van_venues['Venue Category'].unique())))

There are 153 uniques categories.


Group venues by LocalArea, count, and sort them in descending order

In [19]:
van_venues.groupby('LocalArea').count().sort_values(by='Venue', ascending=False)

Unnamed: 0_level_0,LocalArea Latitude,LocalArea Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
LocalArea,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Downtown, Vancouver, BC",100,100,100,100,100,100
"Mount Pleasant, Vancouver, BC",68,68,68,68,68,68
"Grandview-Woodland, Vancouver, BC",65,65,65,65,65,65
"West End, Vancouver, BC",61,61,61,61,61,61
"Riley Park, Vancouver, BC",56,56,56,56,56,56
"Kitsilano, Vancouver, BC",47,47,47,47,47,47
"West Point Grey, Vancouver, BC",44,44,44,44,44,44
"Kerrisdale, Vancouver, BC",40,40,40,40,40,40
"Marpole, Vancouver, BC",33,33,33,33,33,33
"Fairview, Vancouver, BC",28,28,28,28,28,28


In [33]:
#filter only Restaurants
van_venues2 = van_venues[(van_venues['Venue Category'].str.contains('Restaurant'))].reset_index(drop=True)
van_venues2.head()

Unnamed: 0,LocalArea,LocalArea Latitude,LocalArea Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Sunset, Vancouver, BC",49.219593,-123.090239,Kalai's Dosa Hut,49.218998,-123.09116,South Indian Restaurant
1,"Sunset, Vancouver, BC",49.219593,-123.090239,Sartaj Sweets & Restaurant,49.224046,-123.090841,Indian Restaurant
2,"Mount Pleasant, Vancouver, BC",49.26333,-123.096588,Sushiyama,49.262897,-123.097169,Sushi Restaurant
3,"Mount Pleasant, Vancouver, BC",49.26333,-123.096588,Carp,49.262393,-123.09607,Sushi Restaurant
4,"Mount Pleasant, Vancouver, BC",49.26333,-123.096588,Congee Noodle House 粥麵館 (Congee Noodle House),49.263029,-123.102105,Chinese Restaurant


In [31]:
van_venues2.shape

(227, 7)

In [73]:
van_venues2.groupby('LocalArea').count().sort_values(by='Venue', ascending=False)

Unnamed: 0_level_0,LocalArea Latitude,LocalArea Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
LocalArea,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"West End, Vancouver, BC",28,28,28,28,28,28
"Grandview-Woodland, Vancouver, BC",28,28,28,28,28,28
"Downtown, Vancouver, BC",20,20,20,20,20,20
"Riley Park, Vancouver, BC",20,20,20,20,20,20
"Kitsilano, Vancouver, BC",19,19,19,19,19,19
"Kerrisdale, Vancouver, BC",15,15,15,15,15,15
"Mount Pleasant, Vancouver, BC",15,15,15,15,15,15
"Marpole, Vancouver, BC",14,14,14,14,14,14
"West Point Grey, Vancouver, BC",14,14,14,14,14,14
"Fairview, Vancouver, BC",13,13,13,13,13,13


* The local areas of West End, and Grandview-Woodland have the largest number of restaurants, with 28 each.
* The local areas of Killarney, Shaughnessy, and Victoria-Fraserview have the smallest number of restaurants, with 1 each,

### Analysing each Local Area

In [34]:
# one hot encoding
van_onehot = pd.get_dummies(van_venues2[['Venue Category']], prefix="", prefix_sep="")

# add LocalArea column back to dataframe
van_onehot['LocalArea'] = van_venues2['LocalArea'] 

# move localArea column to the first column
fixed_columns = [van_onehot.columns[-1]] + list(van_onehot.columns[:-1])
van_onehot = van_onehot[fixed_columns]

van_onehot.head()

Unnamed: 0,LocalArea,American Restaurant,Asian Restaurant,Australian Restaurant,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Cuban Restaurant,Dim Sum Restaurant,...,Shanghai Restaurant,South American Restaurant,South Indian Restaurant,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,"Sunset, Vancouver, BC",0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,"Sunset, Vancouver, BC",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Mount Pleasant, Vancouver, BC",0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
3,"Mount Pleasant, Vancouver, BC",0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,"Mount Pleasant, Vancouver, BC",0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [35]:
#cheking the shape of the dataframe
print('The shape of the dataframe is:', van_onehot.shape)

The shape of the dataframe is: (227, 45)


Group rows by LocalArea and by taking the mean of the frequency of occurrence of each category

In [36]:
van_grouped = van_onehot.groupby('LocalArea').mean().reset_index()
van_grouped

Unnamed: 0,LocalArea,American Restaurant,Asian Restaurant,Australian Restaurant,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Cuban Restaurant,Dim Sum Restaurant,...,Shanghai Restaurant,South American Restaurant,South Indian Restaurant,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,"Downtown, Vancouver, BC",0.1,0.0,0.05,0.0,0.0,0.0,0.05,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1
1,"Dunbar-Southlands, Vancouver, BC",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0
2,"Fairview, Vancouver, BC",0.0,0.153846,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,...,0.0,0.0,0.0,0.076923,0.076923,0.0,0.0,0.0,0.0,0.076923
3,"Grandview-Woodland, Vancouver, BC",0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.035714,0.0,...,0.0,0.0,0.0,0.142857,0.0,0.0,0.035714,0.035714,0.035714,0.0
4,"Hastings-Sunrise, Vancouver, BC",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.666667
5,"Kensington-Cedar Cottage, Vancouver, BC",0.0,0.0,0.0,0.0,0.0,0.0,0.375,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25
6,"Kerrisdale, Vancouver, BC",0.0,0.066667,0.0,0.0,0.0,0.0,0.2,0.0,0.0,...,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.066667,0.0,0.133333
7,"Killarney, Vancouver, BC",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Kitsilano, Vancouver, BC",0.105263,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.157895,0.0,0.0,0.0,0.105263,0.052632,0.0
9,"Marpole, Vancouver, BC",0.0,0.0,0.0,0.0,0.0,0.0,0.214286,0.0,0.071429,...,0.071429,0.0,0.0,0.214286,0.0,0.071429,0.0,0.071429,0.0,0.071429


In [37]:
#chek the shape of the dataframe
print('The shape of the grouped dataframe is:', van_grouped.shape)

The shape of the grouped dataframe is: (21, 45)


There are now 21 rows because 'Arbutus-Ridge, Vancouver, BC' has no restaurants, hence no venues in that local area were selected.

In [38]:
#checking venues 'Arbutus-Ridge, Vancouver, BC'
van_venues.loc[van_venues['LocalArea'] == 'Arbutus-Ridge, Vancouver, BC']

Unnamed: 0,LocalArea,LocalArea Latitude,LocalArea Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
322,"Arbutus-Ridge, Vancouver, BC",49.240968,-123.167001,Butter Baked Goods,49.242209,-123.170381,Bakery
323,"Arbutus-Ridge, Vancouver, BC",49.240968,-123.167001,The Haven,49.241377,-123.166331,Spa
324,"Arbutus-Ridge, Vancouver, BC",49.240968,-123.167001,Barktholomews Pet Supplies,49.242746,-123.170193,Pet Store
325,"Arbutus-Ridge, Vancouver, BC",49.240968,-123.167001,The Dragon's Layer,49.238518,-123.169029,Nightlife Spot
326,"Arbutus-Ridge, Vancouver, BC",49.240968,-123.167001,The Heights Market,49.237902,-123.170949,Grocery Store


From the above, under 'Venue Category', there are no restaurants within a radius=500m in this particular local area.

Print each Local Area along with the top 5 most common venues

In [39]:
num_top_venues = 5

for hood in van_grouped['LocalArea']:
    print("----"+hood+"----")
    temp = van_grouped[van_grouped['LocalArea'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Downtown, Vancouver, BC----
                      venue  freq
0       American Restaurant  0.10
1                Restaurant  0.10
2        Seafood Restaurant  0.10
3     Vietnamese Restaurant  0.10
4  Mediterranean Restaurant  0.05


----Dunbar-Southlands, Vancouver, BC----
                  venue  freq
0      Sushi Restaurant   0.4
1  Fast Food Restaurant   0.2
2    Italian Restaurant   0.2
3     Indian Restaurant   0.2
4    Seafood Restaurant   0.0


----Fairview, Vancouver, BC----
                   venue  freq
0       Asian Restaurant  0.15
1  Vietnamese Restaurant  0.08
2      Indian Restaurant  0.08
3       Malay Restaurant  0.08
4     Chinese Restaurant  0.08


----Grandview-Woodland, Vancouver, BC----
                 venue  freq
0     Sushi Restaurant  0.14
1   Italian Restaurant  0.11
2    Indian Restaurant  0.11
3  Japanese Restaurant  0.07
4    French Restaurant  0.07


----Hastings-Sunrise, Vancouver, BC----
                      venue  freq
0     Vietnamese Restaurant

Put that into a pandas dataframe

In [40]:
#function to sort venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [51]:
#create dataframe and showing top 10 venues for each local area
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

#create columns according to number of top venues
columns = ['LocalArea']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

#create a new dataframe
localarea_venues_sorted = pd.DataFrame(columns=columns)
localarea_venues_sorted['LocalArea'] = van_grouped['LocalArea']

for ind in np.arange(van_grouped.shape[0]):
    localarea_venues_sorted.iloc[ind, 1:] = return_most_common_venues(van_grouped.iloc[ind, :], num_top_venues)

localarea_venues_sorted.head()

Unnamed: 0,LocalArea,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Downtown, Vancouver, BC",Vietnamese Restaurant,Seafood Restaurant,Restaurant,American Restaurant,Hawaiian Restaurant,Mexican Restaurant,Japanese Curry Restaurant,Australian Restaurant,French Restaurant,Chinese Restaurant
1,"Dunbar-Southlands, Vancouver, BC",Sushi Restaurant,Italian Restaurant,Indian Restaurant,Fast Food Restaurant,Vietnamese Restaurant,Falafel Restaurant,Japanese Curry Restaurant,Hawaiian Restaurant,Greek Restaurant,French Restaurant
2,"Fairview, Vancouver, BC",Asian Restaurant,Vietnamese Restaurant,Chinese Restaurant,Korean Restaurant,Indian Restaurant,Malay Restaurant,Fast Food Restaurant,Falafel Restaurant,Restaurant,Sushi Restaurant
3,"Grandview-Woodland, Vancouver, BC",Sushi Restaurant,Italian Restaurant,Indian Restaurant,Japanese Restaurant,French Restaurant,Seafood Restaurant,Greek Restaurant,Vegetarian / Vegan Restaurant,Mexican Restaurant,Middle Eastern Restaurant
4,"Hastings-Sunrise, Vancouver, BC",Vietnamese Restaurant,Sushi Restaurant,Falafel Restaurant,Japanese Curry Restaurant,Italian Restaurant,Indian Restaurant,Hawaiian Restaurant,Greek Restaurant,French Restaurant,Filipino Restaurant


### Cluster Local Areas

Run k-means to cluster the neighborhood into 6 clusters.

In [52]:
# set number of clusters
kclusters = 6

van_grouped_clustering = van_grouped.drop('LocalArea', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(van_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 0, 0, 4, 0, 0, 1, 0, 0], dtype=int32)

Create new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [53]:
# add clustering labels
localarea_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

van_merged = df2.loc[df2['LocalArea'] != 'Arbutus-Ridge, Vancouver, BC']

# merge van_grouped with df2 to add latitude/longitude for each LocalArea
van_merged = van_merged.join(localarea_venues_sorted.set_index('LocalArea'), on='LocalArea')

van_merged.head() # check the last columns!

Unnamed: 0,MAPID,LocalArea,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,SUN,"Sunset, Vancouver, BC",49.219593,-123.090239,3,Indian Restaurant,South Indian Restaurant,Vietnamese Restaurant,Falafel Restaurant,Japanese Curry Restaurant,Italian Restaurant,Hawaiian Restaurant,Greek Restaurant,French Restaurant,Filipino Restaurant
1,MP,"Mount Pleasant, Vancouver, BC",49.26333,-123.096588,0,Sushi Restaurant,Vietnamese Restaurant,Indian Restaurant,Chinese Restaurant,Mexican Restaurant,New American Restaurant,Peruvian Restaurant,Ethiopian Restaurant,Restaurant,Asian Restaurant
2,RP,"Riley Park, Vancouver, BC",49.247438,-123.102966,0,Restaurant,Thai Restaurant,Japanese Restaurant,Vegetarian / Vegan Restaurant,Sushi Restaurant,Chinese Restaurant,Vietnamese Restaurant,Middle Eastern Restaurant,Caribbean Restaurant,Seafood Restaurant
3,CBD,"Downtown, Vancouver, BC",49.283393,-123.117456,0,Vietnamese Restaurant,Seafood Restaurant,Restaurant,American Restaurant,Hawaiian Restaurant,Mexican Restaurant,Japanese Curry Restaurant,Australian Restaurant,French Restaurant,Chinese Restaurant
4,KITS,"Kitsilano, Vancouver, BC",49.26941,-123.155267,0,Sushi Restaurant,American Restaurant,French Restaurant,Japanese Restaurant,Thai Restaurant,Restaurant,Asian Restaurant,Indian Restaurant,Vegetarian / Vegan Restaurant,Seafood Restaurant


In [54]:
van_merged.shape

(21, 15)

Visualise the resulting clusters

In [55]:
# create map
map_clusters = folium.Map(location=[latitude_van, longitude_van], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(van_merged['Latitude'], van_merged['Longitude'], van_merged['LocalArea'], van_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine Clusters
Examine each cluster and determine the venue categories that distinguish each cluster.

#### Cluster1

In [56]:
van_merged.loc[van_merged['Cluster Labels'] == 0, van_merged.columns[[1] + [4] + list(range(5, van_merged.shape[1]))]]

Unnamed: 0,LocalArea,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,"Mount Pleasant, Vancouver, BC",0,Sushi Restaurant,Vietnamese Restaurant,Indian Restaurant,Chinese Restaurant,Mexican Restaurant,New American Restaurant,Peruvian Restaurant,Ethiopian Restaurant,Restaurant,Asian Restaurant
2,"Riley Park, Vancouver, BC",0,Restaurant,Thai Restaurant,Japanese Restaurant,Vegetarian / Vegan Restaurant,Sushi Restaurant,Chinese Restaurant,Vietnamese Restaurant,Middle Eastern Restaurant,Caribbean Restaurant,Seafood Restaurant
3,"Downtown, Vancouver, BC",0,Vietnamese Restaurant,Seafood Restaurant,Restaurant,American Restaurant,Hawaiian Restaurant,Mexican Restaurant,Japanese Curry Restaurant,Australian Restaurant,French Restaurant,Chinese Restaurant
4,"Kitsilano, Vancouver, BC",0,Sushi Restaurant,American Restaurant,French Restaurant,Japanese Restaurant,Thai Restaurant,Restaurant,Asian Restaurant,Indian Restaurant,Vegetarian / Vegan Restaurant,Seafood Restaurant
5,"Dunbar-Southlands, Vancouver, BC",0,Sushi Restaurant,Italian Restaurant,Indian Restaurant,Fast Food Restaurant,Vietnamese Restaurant,Falafel Restaurant,Japanese Curry Restaurant,Hawaiian Restaurant,Greek Restaurant,French Restaurant
6,"Kerrisdale, Vancouver, BC",0,Chinese Restaurant,Vietnamese Restaurant,Fast Food Restaurant,Sushi Restaurant,Thai Restaurant,Asian Restaurant,Italian Restaurant,Mediterranean Restaurant,Japanese Restaurant,Portuguese Restaurant
8,"West Point Grey, Vancouver, BC",0,Japanese Restaurant,Vegetarian / Vegan Restaurant,Asian Restaurant,Sushi Restaurant,Fast Food Restaurant,Italian Restaurant,South American Restaurant,Falafel Restaurant,Japanese Curry Restaurant,Indian Restaurant
9,"Marpole, Vancouver, BC",0,Sushi Restaurant,Chinese Restaurant,Japanese Restaurant,Vietnamese Restaurant,Thai Restaurant,Taiwanese Restaurant,Shanghai Restaurant,Dim Sum Restaurant,Falafel Restaurant,Fast Food Restaurant
12,"Fairview, Vancouver, BC",0,Asian Restaurant,Vietnamese Restaurant,Chinese Restaurant,Korean Restaurant,Indian Restaurant,Malay Restaurant,Fast Food Restaurant,Falafel Restaurant,Restaurant,Sushi Restaurant
14,"West End, Vancouver, BC",0,Japanese Restaurant,Greek Restaurant,Indian Restaurant,American Restaurant,Ramen Restaurant,Sushi Restaurant,Italian Restaurant,Restaurant,Chinese Restaurant,Falafel Restaurant


#### Cluster2

In [57]:
van_merged.loc[van_merged['Cluster Labels'] == 1, van_merged.columns[[1] + [4] + list(range(5, van_merged.shape[1]))]]

Unnamed: 0,LocalArea,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,"Killarney, Vancouver, BC",1,Italian Restaurant,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Japanese Restaurant,Japanese Curry Restaurant,Indian Restaurant,Hawaiian Restaurant,Greek Restaurant,French Restaurant,Filipino Restaurant


#### Cluster3

In [58]:
van_merged.loc[van_merged['Cluster Labels'] == 2, van_merged.columns[[1] + [4] + list(range(5, van_merged.shape[1]))]]

Unnamed: 0,LocalArea,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,"Shaughnessy, Vancouver, BC",2,French Restaurant,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Japanese Restaurant,Japanese Curry Restaurant,Italian Restaurant,Indian Restaurant,Hawaiian Restaurant,Greek Restaurant,Filipino Restaurant


#### Cluster4

In [59]:
van_merged.loc[van_merged['Cluster Labels'] == 3, van_merged.columns[[1] + [4] + list(range(5, van_merged.shape[1]))]]

Unnamed: 0,LocalArea,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Sunset, Vancouver, BC",3,Indian Restaurant,South Indian Restaurant,Vietnamese Restaurant,Falafel Restaurant,Japanese Curry Restaurant,Italian Restaurant,Hawaiian Restaurant,Greek Restaurant,French Restaurant,Filipino Restaurant


#### Cluster5

In [60]:
van_merged.loc[van_merged['Cluster Labels'] == 4, van_merged.columns[[1] + [4] + list(range(5, van_merged.shape[1]))]]

Unnamed: 0,LocalArea,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,"Oakridge, Vancouver, BC",4,Vietnamese Restaurant,Sushi Restaurant,Fast Food Restaurant,Falafel Restaurant,Japanese Curry Restaurant,Italian Restaurant,Indian Restaurant,Hawaiian Restaurant,Greek Restaurant,French Restaurant
13,"South Cambie, Vancouver, BC",4,Vietnamese Restaurant,Malay Restaurant,Sushi Restaurant,Falafel Restaurant,Japanese Curry Restaurant,Italian Restaurant,Indian Restaurant,Hawaiian Restaurant,Greek Restaurant,French Restaurant
17,"Hastings-Sunrise, Vancouver, BC",4,Vietnamese Restaurant,Sushi Restaurant,Falafel Restaurant,Japanese Curry Restaurant,Italian Restaurant,Indian Restaurant,Hawaiian Restaurant,Greek Restaurant,French Restaurant,Filipino Restaurant


#### Cluster6

In [61]:
van_merged.loc[van_merged['Cluster Labels'] == 5, van_merged.columns[[1] + [4] + list(range(5, van_merged.shape[1]))]]

Unnamed: 0,LocalArea,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,"Victoria-Fraserview, Vancouver, BC",5,Fast Food Restaurant,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Japanese Restaurant,Japanese Curry Restaurant,Italian Restaurant,Indian Restaurant,Hawaiian Restaurant,Greek Restaurant,French Restaurant


### Find French Restaurants in Vancouver

In [62]:
#Define function to search for all French Restaurants
def searchVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&query={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng,
            radius,
            search_query)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    found_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    found_venues.columns = ['LocalArea', 
                  'LocalArea Latitude', 
                  'LocalArea Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(found_venues)

In [63]:
#Searching French Restaurants
search_query = 'French Restaurant'

french_venues = searchVenues(names=df2['LocalArea'],
                                   latitudes=df2['Latitude'],
                                   longitudes=df2['Longitude']
                                  )

Sunset, Vancouver, BC
Mount Pleasant, Vancouver, BC
Riley Park, Vancouver, BC
Downtown, Vancouver, BC
Kitsilano, Vancouver, BC
Dunbar-Southlands, Vancouver, BC
Kerrisdale, Vancouver, BC
Arbutus-Ridge, Vancouver, BC
West Point Grey, Vancouver, BC
Marpole, Vancouver, BC
Oakridge, Vancouver, BC
Shaughnessy, Vancouver, BC
Fairview, Vancouver, BC
South Cambie, Vancouver, BC
West End, Vancouver, BC
Killarney, Vancouver, BC
Renfrew-Collingwood, Vancouver, BC
Hastings-Sunrise, Vancouver, BC
Victoria-Fraserview, Vancouver, BC
Kensington-Cedar Cottage, Vancouver, BC
Strathcona, Vancouver, BC
Grandview-Woodland, Vancouver, BC


In [64]:
print('The shape of the new dataframe is:',french_venues.shape) #cheching the shape of the new dataframe
print('The content of the new dataframe is the following:')
french_venues.head() #displaying dataframe

The shape of the new dataframe is: (18, 7)
The content of the new dataframe is the following:


Unnamed: 0,LocalArea,LocalArea Latitude,LocalArea Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Riley Park, Vancouver, BC",49.247438,-123.102966,Coco et Olive,49.25176,-123.101089,French Restaurant
1,"Riley Park, Vancouver, BC",49.247438,-123.102966,The French Table,49.250073,-123.100949,French Restaurant
2,"Riley Park, Vancouver, BC",49.247438,-123.102966,Yugo Restaurant,49.247488,-123.101384,French Restaurant
3,"Downtown, Vancouver, BC",49.283393,-123.117456,L'Hermitage,49.280139,-123.11748,French Restaurant
4,"Downtown, Vancouver, BC",49.283393,-123.117456,Bacchus Lounge,49.282187,-123.122333,French Restaurant


Find French Venues per Local Area

In [65]:
#Group and count the venues
french_venues2 = french_venues.groupby('LocalArea').count()[['Venue']]
#sort the venues in descending order
french_venues2.sort_values(by='Venue', ascending=False)

Unnamed: 0_level_0,Venue
LocalArea,Unnamed: 1_level_1
"Downtown, Vancouver, BC",4
"Grandview-Woodland, Vancouver, BC",4
"Kitsilano, Vancouver, BC",4
"Riley Park, Vancouver, BC",3
"Kerrisdale, Vancouver, BC",1
"Shaughnessy, Vancouver, BC",1
"West End, Vancouver, BC",1


In [72]:
# create map using latitude and longitude
map_fvenues = folium.Map(location=[latitude_van, longitude_van], zoom_start=13)

# add markers to map
for lat, lng, Venue, LocalArea in zip(french_venues['Venue Latitude'], french_venues['Venue Longitude'], french_venues['Venue'], french_venues['LocalArea']):
    label = '{}, {}'.format(Venue, LocalArea)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_fvenues)  
    
map_fvenues