# Finding a Best Location in Montreal, Canada to Open a new Restaurant
* Opening a New Restaurant in Montreal, QC, Canada
* Build a dataframe of borough or subarbs in Montreal, Canada by web scraping the data from Wikipedia page
* Get the geographical coordinates of the neighborhoods
* Obtain the venue data for the borough from Foursquare API
* Explore and cluster the borough
* Select the best cluster to open a new Restaurant

### 1. Upload all library

In [None]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

# !conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import geocoder # to get coordinates

import requests # library to handle requests
from bs4 import BeautifulSoup
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

# !conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

### 2. Get all the borought form Wiki and create Dataframe

In [2]:
url = "https://en.wikipedia.org/wiki/Boroughs_of_Montreal"
mtl_data = requests.get(url).text
soup = BeautifulSoup(mtl_data, 'html.parser')

In [3]:
# create lists to store table data
borough = []
for row in soup.find('table').find_all('tr'): # for each row of the table
    cells = row.find_all('td') # find all to get the lists includine 'td'
    if(len(cells) > 0):
        s = cells[1].text.strip('\n')
        borough.append(s)

In [4]:
borough

['Ahuntsic-Cartierville',
 'Anjou',
 'Côte-des-Neiges–Notre-Dame-de-Grâce',
 'Lachine',
 'LaSalle',
 'Le Plateau-Mont-Royal',
 'Le Sud-Ouest',
 "L'Île-Bizard–Sainte-Geneviève",
 'Mercier–Hochelaga-Maisonneuve',
 'Montréal-Nord',
 'Outremont',
 'Pierrefonds-Roxboro',
 'Rivière-des-Prairies–Pointe-aux-Trembles',
 'Rosemont–La Petite-Patrie',
 'Saint-Laurent',
 'Saint-Léonard',
 'Verdun',
 'Ville-Marie',
 'Villeray–Saint-Michel–Parc-Extension']

In [5]:
mtl_df = pd.DataFrame({"Borough": borough})
mtl_df

Unnamed: 0,Borough
0,Ahuntsic-Cartierville
1,Anjou
2,Côte-des-Neiges–Notre-Dame-de-Grâce
3,Lachine
4,LaSalle
5,Le Plateau-Mont-Royal
6,Le Sud-Ouest
7,L'Île-Bizard–Sainte-Geneviève
8,Mercier–Hochelaga-Maisonneuve
9,Montréal-Nord


### 3. Get all the coordinates and add to dataframe

In [6]:
# define a function to get coordinates
def get_latlng(borough):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Montreal, Canada'.format(borough))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [7]:
mtl_coords = [ get_latlng(borough) for borough in mtl_df["Borough"].tolist() ]
mtl_coords

[[45.54003000000006, -73.68184999999994],
 [45.60218000000003, -73.55959999999999],
 [45.49073000000004, -73.62947999999994],
 [45.43297000000007, -73.68111999999996],
 [45.42965000000004, -73.66282999999999],
 [45.52919000000003, -73.57861999999994],
 [45.47426000000007, -73.58157999999997],
 [45.497340000000065, -73.89176999999995],
 [45.58211000000006, -73.53065999999995],
 [45.59490000000005, -73.63947999999993],
 [45.51413000000008, -73.61096999999995],
 [45.49642000000006, -73.84702999999996],
 [45.65175606375262, -73.5625510327336],
 [45.552820000000054, -73.57926999999995],
 [45.50735000000003, -73.68355999999994],
 [45.58624000000003, -73.59600999999998],
 [45.45927000000006, -73.57173999999998],
 [45.52974000000006, -73.55360999999994],
 [45.560280000000034, -73.60465999999997]]

In [8]:
mtl_df_coords = pd.DataFrame(mtl_coords, columns=['Latitude', 'Longitude'])

In [9]:
mtl_df['Latitude'] = mtl_df_coords['Latitude']
mtl_df['Longitude'] = mtl_df_coords['Longitude']
mtl_df

Unnamed: 0,Borough,Latitude,Longitude
0,Ahuntsic-Cartierville,45.54003,-73.68185
1,Anjou,45.60218,-73.5596
2,Côte-des-Neiges–Notre-Dame-de-Grâce,45.49073,-73.62948
3,Lachine,45.43297,-73.68112
4,LaSalle,45.42965,-73.66283
5,Le Plateau-Mont-Royal,45.52919,-73.57862
6,Le Sud-Ouest,45.47426,-73.58158
7,L'Île-Bizard–Sainte-Geneviève,45.49734,-73.89177
8,Mercier–Hochelaga-Maisonneuve,45.58211,-73.53066
9,Montréal-Nord,45.5949,-73.63948


In [10]:
mtl_df.to_csv("mtl_df.csv", index=False)

### 4. Create a map of Montreal with borough superimposed on top

In [11]:
address = 'Montreal'

geolocator = Nominatim(user_agent="rokibmath00@gmail.com")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 45.4972159, -73.6103642.


In [12]:
map_mtl = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough in zip(mtl_df['Latitude'], mtl_df['Longitude'], mtl_df['Borough']):
    label = '{}'.format(borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_mtl)  
    
map_mtl

### 5. Use the Foursquare API to explore the borough and get the top 100 venues that are within a radius of 500 meters.

In [211]:
CLIENT_ID = 'X4EEQWLR3KS2O0K14D5OU4WGZIRE134VJIK2BINL31A2GCYU' # your Foursquare ID
CLIENT_SECRET = 'TYCATYZDN3ZBOBICH1SJKIM5PLXSJXTOPGR5C30IF3GE0HXP' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: X4EEQWLR3KS2O0K14D5OU4WGZIRE134VJIK2BINL31A2GCYU
CLIENT_SECRET:TYCATYZDN3ZBOBICH1SJKIM5PLXSJXTOPGR5C30IF3GE0HXP


In [212]:
radius = 500
LIMIT = 100

venues = []

for lat, long, borough in zip(mtl_df['Latitude'], mtl_df['Longitude'], mtl_df['Borough']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            borough,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

### 6. convert the venues list into a new DataFrame

In [213]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Borough', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(228, 7)


Unnamed: 0,Borough,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Ahuntsic-Cartierville,45.54003,-73.68185,Sapori Di Napoli,45.540799,-73.685707,Italian Restaurant
1,Ahuntsic-Cartierville,45.54003,-73.68185,Parc Marcelin-Wilson,45.540585,-73.68573,Park
2,Ahuntsic-Cartierville,45.54003,-73.68185,Subway,45.537975,-73.679188,Sandwich Place
3,Ahuntsic-Cartierville,45.54003,-73.68185,Aréna Marcellin-Wilson,45.540662,-73.685701,Hockey Arena
4,Ahuntsic-Cartierville,45.54003,-73.68185,Restaurant Dima,45.540614,-73.685741,Middle Eastern Restaurant


In [183]:
venues_df.groupby(["Borough"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ahuntsic-Cartierville,8,8,8,8,8,8
Anjou,26,26,26,26,26,26
Côte-des-Neiges–Notre-Dame-de-Grâce,7,7,7,7,7,7
L'Île-Bizard–Sainte-Geneviève,1,1,1,1,1,1
LaSalle,5,5,5,5,5,5
Lachine,13,13,13,13,13,13
Le Plateau-Mont-Royal,45,45,45,45,45,45
Le Sud-Ouest,22,22,22,22,22,22
Mercier–Hochelaga-Maisonneuve,4,4,4,4,4,4
Montréal-Nord,2,2,2,2,2,2


In [214]:
#Find out how many unique categories in the result.
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 102 uniques categories.


In [185]:
# print out the list of categories
venues_df['VenueCategory'].unique()

array(['Italian Restaurant', 'Park', 'Sandwich Place', 'Hockey Arena',
       'Middle Eastern Restaurant', 'Chinese Restaurant', 'Train Station',
       'Convenience Store', 'Grocery Store', 'Shopping Mall',
       'Restaurant', 'Tea Room', 'Coffee Shop', 'Sporting Goods Shop',
       'Burger Joint', 'Clothing Store', 'Pizza Place', 'Pharmacy',
       'Fabric Shop', 'Fast Food Restaurant', 'Toy / Game Store',
       'Hardware Store', 'Baby Store', 'Electronics Store',
       'Furniture / Home Store', 'Greek Restaurant', 'Cosmetics Shop',
       'Vietnamese Restaurant', 'Sri Lankan Restaurant', 'Gym',
       'Filipino Restaurant', 'Café', 'Mexican Restaurant',
       'Farmers Market', 'Pub', 'Ice Cream Shop', 'Cheese Shop',
       'Historic Site', 'History Museum', 'Playground', 'Record Shop',
       'Cocktail Bar', 'Dessert Shop', 'Creperie', 'Bagel Shop',
       'Wine Bar', 'Bookstore', 'French Restaurant', 'Bar', 'Bakery',
       'Breakfast Spot', 'Asian Restaurant', 'Swiss Restauran

In [216]:
new_venues_df = venues_df.copy()

### 7. Convert all Reataurant categories to single category "Restaurant". Same as for Coffee shop. 

In [217]:
def encode_row(row):
    s = row[6]
    if s.find("Restaurant") == - 1:
        return s
    else:
        return "Restaurant"

In [218]:
def encode_row_Café(row):
    s = row[7]
    if s.find("Café") == - 1:
        return s
    else:
        return "Coffee Shop"

In [219]:
new_venues_df["New_VenueCategory"] = new_venues_df.apply(lambda row : encode_row(row), axis=1)
# new_venues_df

In [220]:
new_venues_df["New_VenueCategory"] = new_venues_df.apply(lambda row : encode_row_Café(row), axis=1)
# new_venues_df

In [221]:
new_venues_df = new_venues_df[["Borough","New_VenueCategory"]]
new_venues_df

Unnamed: 0,Borough,New_VenueCategory
0,Ahuntsic-Cartierville,Restaurant
1,Ahuntsic-Cartierville,Park
2,Ahuntsic-Cartierville,Sandwich Place
3,Ahuntsic-Cartierville,Hockey Arena
4,Ahuntsic-Cartierville,Restaurant
5,Ahuntsic-Cartierville,Restaurant
6,Ahuntsic-Cartierville,Train Station
7,Ahuntsic-Cartierville,Convenience Store
8,Anjou,Grocery Store
9,Anjou,Shopping Mall


### 8. Analyzing each borough now in Montreal.

In [222]:
# Analize each neighbour
# one hot encoding
mtl_onehot = pd.get_dummies(new_venues_df[['New_VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
mtl_onehot['Borough'] = new_venues_df['Borough'] 

# move neighborhood column to the first column
fixed_columns = [mtl_onehot.columns[-1]] + list(mtl_onehot.columns[:-1])
mtl_onehot = mtl_onehot[fixed_columns]

print(mtl_onehot.shape)
mtl_onehot.head()

(228, 81)


Unnamed: 0,Borough,Adult Boutique,Arts & Entertainment,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Bed & Breakfast,Beer Store,Bike Rental / Bike Share,Bookstore,Breakfast Spot,Brewery,Building,Burger Joint,Business Service,Cheese Shop,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,Comedy Club,Construction & Landscaping,Convenience Store,Cosmetics Shop,Creperie,Department Store,Dessert Shop,Diner,Discount Store,Dive Bar,Drugstore,Electronics Store,Fabric Shop,Farmers Market,Fish & Chips Shop,Flea Market,Flower Shop,Food,Frozen Yogurt Shop,Furniture / Home Store,Gas Station,Gastropub,Golf Course,Grocery Store,Gym,Gym Pool,Hardware Store,Historic Site,History Museum,Hockey Arena,Ice Cream Shop,Intersection,Irish Pub,Metro Station,Motorcycle Shop,Music Store,Office,Park,Performing Arts Venue,Pharmacy,Pizza Place,Playground,Pool,Pub,Record Shop,Restaurant,Sandwich Place,Shopping Mall,Sporting Goods Shop,Sports Bar,Taco Place,Tea Room,Thrift / Vintage Store,Toy / Game Store,Track,Train,Train Station,Wine Bar
0,Ahuntsic-Cartierville,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
1,Ahuntsic-Cartierville,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Ahuntsic-Cartierville,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
3,Ahuntsic-Cartierville,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Ahuntsic-Cartierville,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0


In [226]:
mtl_grouped = mtl_onehot.groupby(["Borough"]).mean().reset_index()
print(mtl_grouped.shape)
mtl_grouped

(19, 81)


Unnamed: 0,Borough,Adult Boutique,Arts & Entertainment,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Bed & Breakfast,Beer Store,Bike Rental / Bike Share,Bookstore,Breakfast Spot,Brewery,Building,Burger Joint,Business Service,Cheese Shop,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,Comedy Club,Construction & Landscaping,Convenience Store,Cosmetics Shop,Creperie,Department Store,Dessert Shop,Diner,Discount Store,Dive Bar,Drugstore,Electronics Store,Fabric Shop,Farmers Market,Fish & Chips Shop,Flea Market,Flower Shop,Food,Frozen Yogurt Shop,Furniture / Home Store,Gas Station,Gastropub,Golf Course,Grocery Store,Gym,Gym Pool,Hardware Store,Historic Site,History Museum,Hockey Arena,Ice Cream Shop,Intersection,Irish Pub,Metro Station,Motorcycle Shop,Music Store,Office,Park,Performing Arts Venue,Pharmacy,Pizza Place,Playground,Pool,Pub,Record Shop,Restaurant,Sandwich Place,Shopping Mall,Sporting Goods Shop,Sports Bar,Taco Place,Tea Room,Thrift / Vintage Store,Toy / Game Store,Track,Train,Train Station,Wine Bar
0,Ahuntsic-Cartierville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.375,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0
1,Anjou,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.192308,0.0,0.115385,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.038462,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.038462,0.0,0.0,0.0,0.0,0.153846,0.0,0.038462,0.038462,0.0,0.0,0.038462,0.0,0.038462,0.0,0.0,0.0,0.0
2,Côte-des-Neiges–Notre-Dame-de-Grâce,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.857143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,L'Île-Bizard–Sainte-Geneviève,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,LaSalle,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Lachine,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.153846,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.076923,0.0,0.0,0.076923,0.0,0.307692,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Le Plateau-Mont-Royal,0.0,0.0,0.0,0.0,0.022222,0.088889,0.0,0.066667,0.0,0.0,0.0,0.066667,0.022222,0.022222,0.0,0.0,0.0,0.022222,0.0,0.022222,0.177778,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.022222,0.177778,0.022222,0.0,0.0,0.022222,0.0,0.044444,0.0,0.0,0.0,0.0,0.0,0.022222
7,Le Sud-Ouest,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.045455,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.045455,0.045455,0.090909,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.318182,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.045455,0.0,0.0
8,Mercier–Hochelaga-Maisonneuve,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Montréal-Nord,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### 8.1 Create the new dataframe and display the top 8 venues for each neighborhood.

In [224]:
# sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [225]:
# create the new dataframe and display the top 8 venues for each neighborhood.
num_top_venues = 8

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Borough']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Borough'] = mtl_grouped['Borough']

for ind in np.arange(mtl_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(mtl_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue
0,Ahuntsic-Cartierville,Restaurant,Sandwich Place,Train Station,Park,Hockey Arena,Convenience Store,College Gym,Comedy Club
1,Anjou,Clothing Store,Restaurant,Coffee Shop,Electronics Store,Fabric Shop,Pharmacy,Pizza Place,Cosmetics Shop
2,Côte-des-Neiges–Notre-Dame-de-Grâce,Restaurant,Gym,Wine Bar,Dessert Shop,College Gym,Comedy Club,Construction & Landscaping,Convenience Store
3,L'Île-Bizard–Sainte-Geneviève,Golf Course,Wine Bar,Dessert Shop,College Gym,Comedy Club,Construction & Landscaping,Convenience Store,Cosmetics Shop
4,LaSalle,Park,Playground,History Museum,Historic Site,Restaurant,Wine Bar,Cocktail Bar,College Gym
5,Lachine,Restaurant,Coffee Shop,Farmers Market,Cheese Shop,Park,Ice Cream Shop,Pizza Place,Historic Site
6,Le Plateau-Mont-Royal,Coffee Shop,Restaurant,Bakery,Dessert Shop,Bar,Bookstore,Tea Room,Wine Bar
7,Le Sud-Ouest,Restaurant,Grocery Store,Park,Music Store,Department Store,Breakfast Spot,Building,Ice Cream Shop
8,Mercier–Hochelaga-Maisonneuve,Coffee Shop,Park,Restaurant,Department Store,College Gym,Comedy Club,Construction & Landscaping,Convenience Store
9,Montréal-Nord,Comedy Club,Restaurant,Wine Bar,Cocktail Bar,College Gym,Construction & Landscaping,Convenience Store,Cosmetics Shop


In [196]:
len(mtl_grouped[mtl_grouped["Restaurant"] > 0])

15

#### 8.2 Create a new DataFrame for Restaurant data only

In [258]:
mtl_restaurant = mtl_grouped[["Borough","Restaurant"]]
mtl_restaurant

Unnamed: 0,Borough,Restaurant
0,Ahuntsic-Cartierville,0.375
1,Anjou,0.153846
2,Côte-des-Neiges–Notre-Dame-de-Grâce,0.857143
3,L'Île-Bizard–Sainte-Geneviève,0.0
4,LaSalle,0.2
5,Lachine,0.307692
6,Le Plateau-Mont-Royal,0.177778
7,Le Sud-Ouest,0.318182
8,Mercier–Hochelaga-Maisonneuve,0.25
9,Montréal-Nord,0.5


### 9. Cluster Borough

In [259]:
# set number of clusters
kclusters = 4

mtl_clustering = mtl_restaurant.drop(["Borough"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(mtl_clustering)

# check cluster labels generated for each row in the dataframe
# kmeans.labels_[0:10]

#### 9.1 create a new dataframe that includes the cluster, latitude/longitude

In [260]:
# create a new dataframe that includes the cluster 
mtl_merged = mtl_restaurant.copy()

# add clustering labels
mtl_merged["Cluster Labels"] = kmeans.labels_
# merge mtl_grouped with mtl_data to add latitude/longitude for each borough
mtl_merged = mtl_merged.join(mtl_df.set_index("Borough"), on="Borough")
mtl_merged

Unnamed: 0,Borough,Restaurant,Cluster Labels,Latitude,Longitude
0,Ahuntsic-Cartierville,0.375,2,45.54003,-73.68185
1,Anjou,0.153846,0,45.60218,-73.5596
2,Côte-des-Neiges–Notre-Dame-de-Grâce,0.857143,1,45.49073,-73.62948
3,L'Île-Bizard–Sainte-Geneviève,0.0,3,45.49734,-73.89177
4,LaSalle,0.2,0,45.42965,-73.66283
5,Lachine,0.307692,2,45.43297,-73.68112
6,Le Plateau-Mont-Royal,0.177778,0,45.52919,-73.57862
7,Le Sud-Ouest,0.318182,2,45.47426,-73.58158
8,Mercier–Hochelaga-Maisonneuve,0.25,0,45.58211,-73.53066
9,Montréal-Nord,0.5,2,45.5949,-73.63948


#### 9.2 Sort dataframe according to "Cluster Labels"

In [261]:
# sort the results by Cluster Labels
print(mtl_merged.shape)
mtl_merged.sort_values(["Cluster Labels"], inplace=True)
mtl_merged

(19, 5)


Unnamed: 0,Borough,Restaurant,Cluster Labels,Latitude,Longitude
1,Anjou,0.153846,0,45.60218,-73.5596
16,Verdun,0.285714,0,45.45927,-73.57174
4,LaSalle,0.2,0,45.42965,-73.66283
6,Le Plateau-Mont-Royal,0.177778,0,45.52919,-73.57862
13,Rosemont–La Petite-Patrie,0.166667,0,45.55282,-73.57927
8,Mercier–Hochelaga-Maisonneuve,0.25,0,45.58211,-73.53066
17,Ville-Marie,0.227273,0,45.52974,-73.55361
11,Pierrefonds-Roxboro,0.25,0,45.49642,-73.84703
2,Côte-des-Neiges–Notre-Dame-de-Grâce,0.857143,1,45.49073,-73.62948
14,Saint-Laurent,0.75,1,45.50735,-73.68356


#### 9.3 Finally, visualize the resulting clusters

In [262]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(.15, 1.1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(mtl_merged['Latitude'], mtl_merged['Longitude'], mtl_merged['Borough'], mtl_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### 10. Examine Clusters
#### Clusters 0

In [263]:
mtl_merged.loc[mtl_merged['Cluster Labels'] == 0]

Unnamed: 0,Borough,Restaurant,Cluster Labels,Latitude,Longitude
1,Anjou,0.153846,0,45.60218,-73.5596
16,Verdun,0.285714,0,45.45927,-73.57174
4,LaSalle,0.2,0,45.42965,-73.66283
6,Le Plateau-Mont-Royal,0.177778,0,45.52919,-73.57862
13,Rosemont–La Petite-Patrie,0.166667,0,45.55282,-73.57927
8,Mercier–Hochelaga-Maisonneuve,0.25,0,45.58211,-73.53066
17,Ville-Marie,0.227273,0,45.52974,-73.55361
11,Pierrefonds-Roxboro,0.25,0,45.49642,-73.84703


#### Clusters 1

In [264]:
mtl_merged.loc[mtl_merged['Cluster Labels'] == 1]

Unnamed: 0,Borough,Restaurant,Cluster Labels,Latitude,Longitude
2,Côte-des-Neiges–Notre-Dame-de-Grâce,0.857143,1,45.49073,-73.62948
14,Saint-Laurent,0.75,1,45.50735,-73.68356


#### Clusters 2

In [265]:
mtl_merged.loc[mtl_merged['Cluster Labels'] == 2]

Unnamed: 0,Borough,Restaurant,Cluster Labels,Latitude,Longitude
0,Ahuntsic-Cartierville,0.375,2,45.54003,-73.68185
9,Montréal-Nord,0.5,2,45.5949,-73.63948
7,Le Sud-Ouest,0.318182,2,45.47426,-73.58158
5,Lachine,0.307692,2,45.43297,-73.68112
18,Villeray–Saint-Michel–Parc-Extension,0.4,2,45.56028,-73.60466


#### Clusters 3

In [266]:
mtl_merged.loc[mtl_merged['Cluster Labels'] == 3]

Unnamed: 0,Borough,Restaurant,Cluster Labels,Latitude,Longitude
12,Rivière-des-Prairies–Pointe-aux-Trembles,0.0,3,45.651756,-73.562551
15,Saint-Léonard,0.0,3,45.58624,-73.59601
3,L'Île-Bizard–Sainte-Geneviève,0.0,3,45.49734,-73.89177
10,Outremont,0.0,3,45.51413,-73.61097


### Conclusion
Most of the restaurant are concentrated in the central area of Montreal city, 
with the highest number in cluster 1 and moderate numbers are in cluster 2 and cluster 0. On the other hand, in cluster 3 has very low number to totally no restaurant in the borough. This represents that cluster 3 has a great opportunity and high potential to open new restaurant as there are no competition from existing restaurant. Meanwhile, restaurant in cluster 1 are likely suffering from intense competition due to oversupply and high concentration of restaurants. \
Therefore, this project recommends restaurant owner to capitalize on these findings to open new restaurant
in neighborhoods in cluster 3 with little to no competition. 