# Final project 

## Packages import

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


### Creation of the neighbourhood data frame & map

In [2]:
# load the data
with open('quartier_paris.json') as json_data:
    paris_data = json.load(json_data)

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=['Borough', 'Neighborhood', 'Latitude', 'Longitude'])

# loop over the items and append the dataframe
for data in paris_data:
    #numéro arrondissement
    borough = data['fields']['c_ar']

    #nom quartier
    neighborhood_name = data['fields']['l_qu']

    #latitude / longitude
    neighborhood_lat = data['fields']['geom_x_y'][0]
    neighborhood_lon = data['fields']['geom_x_y'][1]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,3,Enfants-Rouges,48.863887,2.363123
1,4,Notre-Dame,48.852896,2.352775
2,5,Jardin-des-Plantes,48.84194,2.356894
3,7,Saint-Thomas-d'Aquin,48.855263,2.325588
4,9,Faubourg-Montmartre,48.873935,2.343253


In [3]:
#neighborhoods.loc[neighborhoods["Neighborhood"] == 'Picpus'].drop()
ind = neighborhoods.loc[neighborhoods["Neighborhood"] == 'Picpus'].index

neighborhoods.drop(index = ind, inplace = True)

In [4]:
# getting the coordinates of paris
address = 'Paris, FR'

geolocator = Nominatim(user_agent="paris_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude


# create map of New York using latitude and longitude values
map_paris = folium.Map(location=[latitude, longitude], zoom_start=15)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_paris)  
    
map_paris


### Foursquare parametrization & request

Foursquare login

In [5]:
# foursquare
CLIENT_ID = '4BXUD5U1B2DD3RX1IFSRMZYCSAMGP5WUZINDB4GXI035XOGY' # your Foursquare ID
CLIENT_SECRET = 'ZIUT2PKYPRKSDJXSRIISVETCDK0OXAQBJIVOF3GHU3SAKOAC' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
radius = 1000 # radius in meters
LIMIT = 1000 # limit of items

Function to batch requests for all neighborhoods

In [6]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Calling the function

In [7]:
paris_venues = getNearbyVenues(names=neighborhoods['Neighborhood'],
                               latitudes=neighborhoods['Latitude'],
                                   longitudes=neighborhoods['Longitude']
                                  )

Enfants-Rouges
Notre-Dame
Jardin-des-Plantes
Saint-Thomas-d'Aquin
Faubourg-Montmartre
Rochechouart
Porte-Saint-Denis
Porte-Saint-Martin
Sainte-Marguerite
Bercy
Combat
Ternes
Epinettes
Arts-et-Métiers
Sainte-Avoie
Monnaie
Notre-Dame-des-Champs
Ecole-Militaire
Saint-Georges
Maison-Blanche
Parc-de-Montsouris
Plaisance
Palais-Royal
Pont-de-Flandre
Muette
Chaillot
Mail
Archives
Arsenal
Val-de-Grâce
Champs-Elysées
Chaussée-d'Antin
Saint-Vincent-de-Paul
Hôpital-Saint-Louis
Petit-Montrouge
Necker
Saint-Germain-l'Auxerrois
Charonne
Porte-Dauphine
Plaine de Monceaux
Batignolles
Bonne-Nouvelle
Saint-Merri
Saint-Gervais
Saint-Victor
Sorbonne
Odéon
Folie-Méricourt
Bel-Air
Quinze-Vingts
Salpêtrière
Saint-Lambert
Grenelle
Saint-Fargeau
Père-Lachaise
Montparnasse
Javel
Grandes-Carrières
La Chapelle
Villette
Vivienne
Saint-Germain-des-Prés
Invalides
Gros-Caillou
Faubourg-du-Roule
Madeleine
Europe
Saint-Ambroise
Roquette
Gare
Croulebarbe
Halles
Place-Vendôme
Gaillon
Amérique
Belleville
Auteuil
Clignanco

### Duplicates removal

A lot of duplicated venues exist because the function operates within a specific radius. We don't want to miss venues so the radius is bigger than the distance between bouroughs. For each duplicate we will retain the venue that is the closest to the center of the borough
- 1st step : computing the distance between all venues and the center of their respective boroughs
- 2nd step : removing duplicated from the main data frame and storing them in another data frame
- 3rd step : comparing the distances of all duplicated pairs of venues and retaining the closest one

In [8]:
# 1st step : computing the distance between all venues and the center of their respective boroughs
paris_venues['Distance'] = 0

for index, row in paris_venues.iterrows():
    paris_venues.iloc[index, 7] = np.sqrt((row['Neighborhood Latitude']-row['Venue Latitude'])**2 + (row['Neighborhood Longitude']-row['Venue Longitude'])**2)

# 2nd step : removing duplicated from the main data frame and storing them in another data frame
dupl = paris_venues[paris_venues.duplicated(['Venue', 'Venue Latitude', 'Venue Longitude'])]

print("There are " + str(dupl.shape[0]) + "/" + str(paris_venues.shape[0]) + " lines which are duplicates")

ind = dupl.index

paris_venues.drop(index = ind, inplace = True)
paris_venues.reset_index(inplace = True, drop = True)
dupl.reset_index(inplace = True, drop = True)


There are 3449/7662 lines which are duplicates


--> Almost half of the venues are present 2 times in the list

In [9]:
#- 3rd step : comparing the distances of all duplicated pairs of venues and retaining the closest one

for index, row in dupl.iterrows():
    indexFind = paris_venues[(row['Venue'] == paris_venues['Venue']) & (row['Venue Latitude'] == paris_venues['Venue Latitude']) & (row['Venue Longitude'] == paris_venues['Venue Longitude'])].index
    
    if (paris_venues.iloc[indexFind, 7] > dupl.iloc[index, 7]).any():
        for i in range(0,7):
            paris_venues.iloc[indexFind, i ] = dupl.iloc[index, i]
    


In [10]:
paris_venues.shape

(4213, 8)

The number of remaining venues seem correct

### Data pre-processing for clustering

Creating dummies dataframe

In [11]:
paris_onehot = pd.get_dummies(paris_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
paris_onehot['Neighborhood'] = paris_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [paris_onehot.columns[-1]] + list(paris_onehot.columns[:-1])
paris_onehot = paris_onehot[fixed_columns]

paris_grouped = paris_onehot.groupby('Neighborhood').mean().reset_index()
paris_grouped.head()

Unnamed: 0,Neighborhood,Accessories Store,African Restaurant,Alsatian Restaurant,American Restaurant,Antique Shop,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,Auvergne Restaurant,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bar,Basketball Court,Basque Restaurant,Bed & Breakfast,Beer Bar,Beer Garden,Beer Store,Belgian Restaurant,Bike Rental / Bike Share,Bike Trail,Bistro,Boat or Ferry,Bookstore,Botanical Garden,Boutique,Bowling Alley,Boxing Gym,Brasserie,Brazilian Restaurant,Breakfast Spot,Breton Restaurant,Brewery,Bridge,Bubble Tea Shop,Burger Joint,Bus Station,Bus Stop,Butcher,Cafeteria,Café,Cajun / Creole Restaurant,Cambodian Restaurant,Camera Store,Canal,Canal Lock,Candy Store,Cantonese Restaurant,Caribbean Restaurant,Castle,Cemetery,Champagne Bar,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Circus,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Corsican Restaurant,Cosmetics Shop,Creperie,Cultural Center,Cupcake Shop,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Doner Restaurant,Donut Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Space,Exhibit,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Film Studio,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food & Drink Shop,Food Truck,Forest,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,Gift Shop,Gluten-free Restaurant,Gourmet Shop,Government Building,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Hawaiian Restaurant,Health Food Store,Heliport,Historic Site,History Museum,Hobby Shop,Hookah Bar,Hostel,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indonesian Restaurant,Island,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Jiangxi Restaurant,Juice Bar,Karaoke Bar,Kebab Restaurant,Kids Store,Korean Restaurant,Lake,Latin American Restaurant,Leather Goods Store,Lebanese Restaurant,Library,Lingerie Store,Liquor Store,Lounge,Lyonese Bouchon,Market,Martial Arts Dojo,Massage Studio,Mediterranean Restaurant,Memorial Site,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Motel,Movie Theater,Multiplex,Museum,Music Store,Music Venue,National Park,New American Restaurant,Nightclub,Noodle House,Office,Okonomiyaki Restaurant,Opera House,Optical Shop,Organic Grocery,Outdoor Sculpture,Outdoors & Recreation,Park,Pastry Shop,Pedestrian Plaza,Performing Arts Venue,Perfume Shop,Persian Restaurant,Peruvian Restaurant,Pet Café,Pet Store,Pharmacy,Pizza Place,Planetarium,Playground,Plaza,Pool,Pop-Up Shop,Portuguese Restaurant,Provençal Restaurant,Pub,Public Art,Racecourse,Ramen Restaurant,Record Shop,Recording Studio,Recreation Center,Resort,Restaurant,Road,Rock Club,Roof Deck,Rugby Stadium,Russian Restaurant,Salad Place,Sandwich Place,Savoyard Restaurant,Scandinavian Restaurant,Scenic Lookout,Science Museum,Sculpture Garden,Seafood Restaurant,Shanxi Restaurant,Shoe Store,Shopping Mall,Shopping Plaza,Skate Park,Smoke Shop,Soba Restaurant,Soccer Field,Soccer Stadium,South American Restaurant,Southern / Soul Food Restaurant,Southwestern French Restaurant,Souvenir Shop,Souvlaki Shop,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Stadium,Steakhouse,Street Art,Street Food Gathering,Supermarket,Sushi Restaurant,Szechuan Restaurant,Taco Place,Tailor Shop,Tapas Restaurant,Tattoo Parlor,Tea Room,Tech Startup,Tennis Court,Tennis Stadium,Thai Restaurant,Theater,Theme Park Ride / Attraction,Thrift / Vintage Store,Tourist Information Center,Toy / Game Store,Track,Trail,Train Station,Tram Station,Trattoria/Osteria,Turkish Restaurant,Udon Restaurant,Used Bookstore,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Vietnamese Restaurant,Vineyard,Water Park,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,Amérique,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.014706,0.088235,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.014706,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.147059,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.014706,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.073529,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.014706,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.058824,0.0,0.0,0.044118,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.044118,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Archives,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.027778,0.0,0.0,0.0,0.027778,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.027778,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Arsenal,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.017857,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.017857,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.053571,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.178571,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.017857,0.035714,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.053571,0.0,0.0,0.053571,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Arts-et-Métiers,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.222222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.222222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Auteuil,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.132353,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.014706,0.014706,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.044118,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.014706,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Defining the most common venues per borough

In [12]:
# function most common venues
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]
num_top_venues = 10

### getting the sorted venues
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = paris_grouped['Neighborhood']

for ind in np.arange(paris_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(paris_grouped.iloc[ind, :], num_top_venues)

In [13]:
neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Amérique,French Restaurant,Bar,Hotel,Pizza Place,Supermarket,Theater,Plaza,Dance Studio,Japanese Restaurant,Sandwich Place
1,Archives,Art Gallery,French Restaurant,Clothing Store,Café,Hotel,Tea Room,Pizza Place,Provençal Restaurant,Coffee Shop,Burger Joint
2,Arsenal,French Restaurant,Plaza,Cocktail Bar,Pizza Place,Bakery,Tapas Restaurant,Pastry Shop,Thai Restaurant,Coffee Shop,Pedestrian Plaza
3,Arts-et-Métiers,Hotel,Coffee Shop,French Restaurant,Cocktail Bar,Breakfast Spot,Italian Restaurant,Juice Bar,Food Truck,Film Studio,Event Space
4,Auteuil,Tennis Court,French Restaurant,Supermarket,Plaza,Restaurant,Italian Restaurant,Sporting Goods Shop,Shopping Plaza,Roof Deck,Market


### Clustering

#### 1st step : computation
Let's cluster the venues data with a kMeans method.
Note : The number of clusters is incremented until clusters with a single borough are found

In [14]:
# set number of clusters

cond = True # initialisation of the stop condition
kclusters = 1 # initialisation of the clusters number

paris_grouped_clustering = paris_grouped.drop('Neighborhood', 1)
neighborhoods_venues_sorted['Cluster Labels'] = 0

# while the condition is satisfied the number of clusters is incremented and the computation is re-run
while cond == True:
    
    kclusters = kclusters + 1

    # run k-means clustering
    kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(paris_grouped_clustering)

    # add clustering labels
    kmeans.labels_[0:10] 
    neighborhoods_venues_sorted['Cluster Labels'] = kmeans.labels_
    
    # checks if all clusters contain at least 2 borough
    cond = (neighborhoods_venues_sorted['Cluster Labels'].value_counts() > 1).all()
    # if true the dataframe is updated and we go on for another cycle
    if cond == True:
        paris_merged = neighborhoods
        paris_merged = paris_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
    

#### 2nd step : Map rendering of the results

In [15]:
# referencing the focus point on Paris
address = 'Paris'

geolocator = Nominatim(user_agent="paris_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude


# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(paris_merged['Latitude'], paris_merged['Longitude'], paris_merged['Neighborhood'], paris_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

There are 3 clusters let's now look at the content

### 3rd step : Clusters interpretation 
We are going to look at all clusters one by one and take our decision on qualitative aspects

#### Cluster 1

In [16]:
paris_merged.loc[paris_merged['Cluster Labels'] == 0, paris_merged.columns[[1] + list(range(4, paris_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
9,Bercy,French Restaurant,Hotel,Bistro,Bookstore,Bakery,Italian Restaurant,Garden,Coffee Shop,Chinese Restaurant,Plaza,0
21,Parc-de-Montsouris,Hotel,Café,Bistro,French Restaurant,Plaza,Vietnamese Restaurant,Supermarket,Park,Sandwich Place,Chinese Restaurant,0
23,Palais-Royal,Japanese Restaurant,Hotel,French Restaurant,Café,Udon Restaurant,Theater,Italian Restaurant,Plaza,New American Restaurant,Furniture / Home Store,0
24,Pont-de-Flandre,French Restaurant,Hotel,Music Venue,Bistro,Japanese Restaurant,Bar,Tram Station,Pizza Place,Concert Hall,Restaurant,0
27,Mail,Cocktail Bar,Souvlaki Shop,Greek Restaurant,Pedestrian Plaza,Pastry Shop,Bakery,Donut Shop,Hotel,Peruvian Restaurant,Ice Cream Shop,0
28,Archives,Art Gallery,French Restaurant,Clothing Store,Café,Hotel,Tea Room,Pizza Place,Provençal Restaurant,Coffee Shop,Burger Joint,0
32,Chaussée-d'Antin,Hotel,Theater,Opera House,Pastry Shop,Electronics Store,Bistro,Men's Store,Cheese Shop,Tea Room,Scenic Lookout,0
37,Saint-Germain-l'Auxerrois,Art Museum,Exhibit,Plaza,Italian Restaurant,Pedestrian Plaza,Bridge,Historic Site,Hotel,Café,Garden,0
43,Saint-Merri,Plaza,Art Gallery,Cocktail Bar,Flower Shop,Burger Joint,Bookstore,Middle Eastern Restaurant,Gourmet Shop,Beer Store,Theater,0
54,Saint-Fargeau,Bakery,Bar,Hotel,Plaza,Supermarket,French Restaurant,Chinese Restaurant,Japanese Restaurant,Fast Food Restaurant,Market,0


A lot of hotels, french restaurants and bistros = touristic places

#### Cluster 2

In [17]:
paris_merged.loc[paris_merged['Cluster Labels'] == 1, paris_merged.columns[[1] + list(range(4, paris_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
1,Notre-Dame,French Restaurant,Ice Cream Shop,Park,Pub,Auvergne Restaurant,Pedestrian Plaza,Bistro,Mexican Restaurant,Scenic Lookout,Scandinavian Restaurant,1
4,Faubourg-Montmartre,French Restaurant,Pizza Place,Hotel,Candy Store,Wine Bar,Indie Movie Theater,Empanada Restaurant,Restaurant,Bed & Breakfast,Gourmet Shop,1
5,Rochechouart,French Restaurant,Bakery,Hotel,Music Venue,Record Shop,Vegetarian / Vegan Restaurant,Bar,Wine Bar,Italian Restaurant,Candy Store,1
8,Sainte-Marguerite,French Restaurant,Hotel,Thai Restaurant,Wine Bar,Café,Vietnamese Restaurant,Coffee Shop,Moroccan Restaurant,Burger Joint,Music Venue,1
11,Ternes,French Restaurant,Hotel,Italian Restaurant,Seafood Restaurant,Bakery,Breton Restaurant,Tea Room,Coffee Shop,Bistro,Gym / Fitness Center,1
12,Epinettes,French Restaurant,Hotel,Italian Restaurant,Japanese Restaurant,Pizza Place,Bistro,Restaurant,Supermarket,Bar,Bakery,1
16,Notre-Dame-des-Champs,French Restaurant,Hotel,Italian Restaurant,Japanese Restaurant,Creperie,Bakery,Auvergne Restaurant,Bistro,Gourmet Shop,Market,1
17,Ecole-Militaire,French Restaurant,Hotel,Plaza,Italian Restaurant,Historic Site,Café,Asian Restaurant,Bistro,Farmers Market,Bakery,1
18,Saint-Georges,Hotel,Italian Restaurant,French Restaurant,Bakery,Cocktail Bar,Pub,Sushi Restaurant,Café,Bar,Comedy Club,1
29,Arsenal,French Restaurant,Plaza,Cocktail Bar,Pizza Place,Bakery,Tapas Restaurant,Pastry Shop,Thai Restaurant,Coffee Shop,Pedestrian Plaza,1


A lot of bars, international food and coffee shops that's were the cool places sure are.

#### Cluster 3

In [18]:
paris_merged.loc[paris_merged['Cluster Labels'] == 2, paris_merged.columns[[1] + list(range(4, paris_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
0,Enfants-Rouges,Bistro,Coffee Shop,French Restaurant,Wine Bar,Sandwich Place,Cocktail Bar,Burger Joint,Clothing Store,Japanese Restaurant,Circus,2
6,Porte-Saint-Denis,Pizza Place,French Restaurant,Burger Joint,Hotel,Coffee Shop,Bakery,Seafood Restaurant,Restaurant,Cocktail Bar,Thai Restaurant,2
7,Porte-Saint-Martin,French Restaurant,Coffee Shop,Wine Bar,Pizza Place,Restaurant,Asian Restaurant,Comedy Club,Tapas Restaurant,Bookstore,Gaming Cafe,2
10,Combat,Bar,French Restaurant,Chinese Restaurant,Italian Restaurant,Thai Restaurant,Wine Bar,Park,Historic Site,Scenic Lookout,Diner,2
14,Sainte-Avoie,French Restaurant,Moroccan Restaurant,Vegetarian / Vegan Restaurant,Thai Restaurant,Chinese Restaurant,Bar,Bakery,Garden,Historic Site,Café,2
15,Monnaie,French Restaurant,Hotel,Restaurant,Chocolate Shop,Tea Room,Plaza,Ice Cream Shop,Cocktail Bar,Sandwich Place,Seafood Restaurant,2
30,Val-de-Grâce,French Restaurant,Bar,Creperie,Italian Restaurant,Café,Hotel,Asian Restaurant,Cupcake Shop,Falafel Restaurant,Tapas Restaurant,2
34,Hôpital-Saint-Louis,French Restaurant,Pizza Place,Bistro,Restaurant,Bar,Asian Restaurant,Breakfast Spot,Middle Eastern Restaurant,Thai Restaurant,Bakery,2
38,Charonne,French Restaurant,Bar,Hotel,Pizza Place,Japanese Restaurant,Supermarket,Sandwich Place,Bistro,Gym,Tram Station,2
41,Batignolles,French Restaurant,Bar,Wine Bar,Park,Mediterranean Restaurant,Coffee Shop,Pizza Place,Restaurant,BBQ Joint,Cheese Shop,2


Diversificated places, it looks like these are quite residential areas, so maybe not the target for our audience.

Given all of these information we choose cluster 2, it has a full potential for the success of our business.

### Cluster 4

In [19]:
paris_merged.loc[paris_merged['Cluster Labels'] == 3, paris_merged.columns[[1] + list(range(4, paris_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
3,Saint-Thomas-d'Aquin,French Restaurant,Hotel,Garden,Art Gallery,Bookstore,Bakery,Art Museum,Bistro,Chocolate Shop,Restaurant,3
13,Arts-et-Métiers,Hotel,Coffee Shop,French Restaurant,Cocktail Bar,Breakfast Spot,Italian Restaurant,Juice Bar,Food Truck,Film Studio,Event Space,3
22,Plaisance,Hotel,French Restaurant,Bakery,Café,Japanese Restaurant,Bistro,Grocery Store,Thai Restaurant,Bar,Seafood Restaurant,3
26,Chaillot,French Restaurant,Hotel,Plaza,Italian Restaurant,Art Museum,Bakery,Hotel Bar,Museum,Burger Joint,Yoga Studio,3
31,Champs-Elysées,French Restaurant,Boutique,Hotel,Art Gallery,Garden,Italian Restaurant,Plaza,Steakhouse,Historic Site,Japanese Restaurant,3
47,Odéon,Hotel,French Restaurant,Fountain,Italian Restaurant,Playground,Theater,Bistro,Toy / Game Store,Garden,Boutique,3
65,Faubourg-du-Roule,Hotel,French Restaurant,Cosmetics Shop,Jewelry Store,Pastry Shop,Tea Room,Café,Italian Restaurant,Japanese Restaurant,Clothing Store,3
66,Madeleine,Boutique,Hotel,French Restaurant,Men's Store,Hotel Bar,Women's Store,Clothing Store,Salad Place,Garden,Lingerie Store,3
73,Place-Vendôme,Hotel,French Restaurant,Gourmet Shop,Accessories Store,Plaza,Art Museum,Bookstore,Hotel Bar,Japanese Restaurant,Leather Goods Store,3
74,Gaillon,Hotel,French Restaurant,Japanese Restaurant,Plaza,Bookstore,Pastry Shop,Jewelry Store,Tapas Restaurant,Men's Store,Boutique,3


In [20]:
# selection of the cluster 2
chosenCluster = 1 
paris_cluster = paris_merged[paris_merged['Cluster Labels'] == 1]

### Getting the trendiest bouroughs of the cluster

As mentioned in the report, we will run a new 4square request batch on all selected borough, looking for trendy places.
 * We will retain the 50% boroughs where the trendy places concentrate.
 * Because we want to evaluate which boroughs are the trendiest at night, the request was performed at 8 pm 

In [21]:
# batch function to trending places
def getTrendingVenues(names, latitudes, longitudes):
    
    nbVenues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/trending?client_id={}&client_secret={}&ll={},{}&v={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            lat, 
            lng, 
            VERSION)
            
        # make the GET request
        results = requests.get(url).json()
        
        nbVenues = len(results['response']['venues'])
        nbVenues_list.append(nbVenues)
    
    return(nbVenues_list)

In [22]:
# Calling the batch request function
cluster_trending_venues = getTrendingVenues(names=paris_cluster['Neighborhood'],
                                           latitudes=paris_cluster['Latitude'],
                                           longitudes=paris_cluster['Longitude']
                                              )

Notre-Dame
Faubourg-Montmartre
Rochechouart
Sainte-Marguerite
Ternes
Epinettes
Notre-Dame-des-Champs
Ecole-Militaire
Saint-Georges
Arsenal
Saint-Vincent-de-Paul
Petit-Montrouge
Necker
Porte-Dauphine
Plaine de Monceaux
Saint-Victor
Quinze-Vingts
Salpêtrière
Saint-Lambert
Grenelle
Montparnasse
Javel
Grandes-Carrières
Vivienne
Saint-Germain-des-Prés
Invalides
Gros-Caillou
Europe
Croulebarbe


We here compute the median value of trending places

In [23]:
from numpy import median

median_value = median(cluster_trending_venues)
median_value

0.0

It's not a lot then ... we will keep borough which have at least 1 trending place

In [24]:
paris_cluster.insert(2, 'Trending', cluster_trending_venues, allow_duplicates = False)
paris_cluster_trending = paris_cluster.loc[paris_cluster['Trending'] > median_value]

We now have the list of our trendiest borough among our selected data cluster

In [25]:
paris_cluster_trending

Unnamed: 0,Borough,Neighborhood,Trending,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
1,4,Notre-Dame,2,48.852896,2.352775,French Restaurant,Ice Cream Shop,Park,Pub,Auvergne Restaurant,Pedestrian Plaza,Bistro,Mexican Restaurant,Scenic Lookout,Scandinavian Restaurant,1
4,9,Faubourg-Montmartre,1,48.873935,2.343253,French Restaurant,Pizza Place,Hotel,Candy Store,Wine Bar,Indie Movie Theater,Empanada Restaurant,Restaurant,Bed & Breakfast,Gourmet Shop,1
8,11,Sainte-Marguerite,1,48.852097,2.388765,French Restaurant,Hotel,Thai Restaurant,Wine Bar,Café,Vietnamese Restaurant,Coffee Shop,Moroccan Restaurant,Burger Joint,Music Venue,1
16,6,Notre-Dame-des-Champs,1,48.846428,2.327357,French Restaurant,Hotel,Italian Restaurant,Japanese Restaurant,Creperie,Bakery,Auvergne Restaurant,Bistro,Gourmet Shop,Market,1
29,4,Arsenal,1,48.851585,2.364768,French Restaurant,Plaza,Cocktail Bar,Pizza Place,Bakery,Tapas Restaurant,Pastry Shop,Thai Restaurant,Coffee Shop,Pedestrian Plaza,1
45,5,Saint-Victor,2,48.847664,2.354093,French Restaurant,Hotel,Japanese Restaurant,Wine Bar,Historic Site,Indie Movie Theater,Café,Bistro,Sushi Restaurant,Miscellaneous Shop,1
50,12,Quinze-Vingts,1,48.846916,2.374402,French Restaurant,Bar,Italian Restaurant,Hotel,Cocktail Bar,Wine Bar,Farmers Market,Beer Garden,Furniture / Home Store,Butcher,1
51,13,Salpêtrière,1,48.837406,2.363319,French Restaurant,Nightclub,Hotel,Sushi Restaurant,Chinese Restaurant,Italian Restaurant,Noodle House,Bistro,Garden Center,Cajun / Creole Restaurant,1
61,2,Vivienne,1,48.8691,2.339461,French Restaurant,Historic Site,Plaza,Cocktail Bar,Pizza Place,Pedestrian Plaza,Nightclub,Music Store,Sushi Restaurant,Gourmet Shop,1
62,6,Saint-Germain-des-Prés,1,48.855289,2.333657,French Restaurant,Hotel,Clothing Store,Plaza,Italian Restaurant,Café,Wine Bar,Japanese Restaurant,Bistro,Chinese Restaurant,1


We will now compute the occurrence of both labels "Italian restaurant" and "Pizza Place" and will keep the one borough with the least of these items

In [26]:
labels = ['Italian Restaurant', 'Pizza Place']
list_prob = []

for ind, row in paris_cluster_trending.iterrows():
    prob = paris_grouped.loc[paris_grouped['Neighborhood'] == row['Neighborhood'], labels].values
    list_prob.append(sum(prob[0]))

paris_cluster_trending.insert(3, 'Italian', list_prob, allow_duplicates = False)


In [27]:
minProb = min(list_prob) # minimal occurence value
# selection of the final borough
chosen_Borough = paris_cluster_trending.loc[ paris_cluster_trending['Italian'] == minProb]
chosen_Borough

Unnamed: 0,Borough,Neighborhood,Trending,Italian,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
1,4,Notre-Dame,2,0.0,48.852896,2.352775,French Restaurant,Ice Cream Shop,Park,Pub,Auvergne Restaurant,Pedestrian Plaza,Bistro,Mexican Restaurant,Scenic Lookout,Scandinavian Restaurant,1


The borough "Bonne-Nouvelle" will see its first Italian restaurant opening in a few months :)