<h1>Paris' Hotels, Venues and Activities</h1>
<h4>By: Oscar F. D'Cuire<h4>

<b> Step 1: </b> Importing libraries

In [1]:
!conda install -c conda-forge folium=0.5.0
import folium
import pandas as pd

import numpy as np
import json
import requests

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2019.9.11          |           py36_0         147 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    altair-3.2.0               |           py36_0         770 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    openssl-1.1.1c             |       h516909a_0         2.1 MB  conda-forge
    ca-certificates-2019.9.11  |       hecc5488_0         144 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.3 MB

The following NEW packages will be 

<strong>Step 2:</strong> Loading Paris Boroughs Data in Data Frame

In [2]:
Boroughs = []

for i in range(20):
    r = requests.get('https://opendata.paris.fr/api/records/1.0/search/?dataset=arrondissements&rows=21&start=0&sort=-c_ar').json()["records"][i]['fields']

    Boroughs.append([(
        r['objectid'],
        r['l_ar'],
        r['l_aroff'],
        r['c_arinsee'],
        r['geom_x_y'][0],
        r['geom_x_y'][1],
        r['geom']['coordinates']
        )])

dfBorough = pd.DataFrame([item for Boroughs in Boroughs for item in Boroughs])
dfBorough.columns = ['Borough Id', 
              'Short name', 
              'Long name',
              'INSEE',
              'Latitude', 
              'Longitude', 
              'Polygon']
dfBorough.head()


Unnamed: 0,Borough Id,Short name,Long name,INSEE,Latitude,Longitude,Polygon
0,1,1er Ardt,Louvre,75101,48.862563,2.336443,"[[[2.328007329038849, 48.86991742140715], [2.3..."
1,2,2ème Ardt,Bourse,75102,48.868279,2.342803,"[[[2.351518483670821, 48.8644258050741], [2.35..."
2,3,3ème Ardt,Temple,75103,48.862872,2.360001,"[[[2.363828096062925, 48.86750443060333], [2.3..."
3,4,4ème Ardt,Hôtel-de-Ville,75104,48.854341,2.35763,"[[[2.368512371393433, 48.85573412813671], [2.3..."
4,5,5ème Ardt,Panthéon,75105,48.844443,2.350715,"[[[2.364433128355796, 48.84614003688701], [2.3..."


<strong>2.1</strong> Loading Borough Postal Codes

In [3]:
PostalCodes = []

for i in range(20):
    r = requests.get('https://public.opendatasoft.com/api/records/1.0/search/?dataset=correspondance-code-insee-code-postal&rows=20&facet=insee_com&facet=nom_dept&facet=nom_region&facet=statut&refine.nom_dept=PARIS').json()["records"][i]['fields']

    PostalCodes.append([(
        r['insee_com'],
        r['postal_code']
        )])

dfPostCode = pd.DataFrame([item for PostalCodes in PostalCodes for item in PostalCodes])
dfPostCode.columns = ['INSEE', 
                      'Postal Code']

dfPostCode = dfPostCode.sort_values(by=['INSEE']).reset_index()


dfPostCode.head()


Unnamed: 0,index,INSEE,Postal Code
0,8,75101,75001
1,9,75102,75002
2,5,75103,75003
3,7,75104,75004
4,13,75105,75005


<strong>2.2</strong> Merging Arrondissements with Postal Codes 

In [4]:
result = pd.concat([dfBorough, dfPostCode] , axis= 1)
result['Postal Code'] = result['Postal Code'].astype(str).astype(int)
result

Unnamed: 0,Borough Id,Short name,Long name,INSEE,Latitude,Longitude,Polygon,index,INSEE.1,Postal Code
0,1,1er Ardt,Louvre,75101,48.862563,2.336443,"[[[2.328007329038849, 48.86991742140715], [2.3...",8,75101,75001
1,2,2ème Ardt,Bourse,75102,48.868279,2.342803,"[[[2.351518483670821, 48.8644258050741], [2.35...",9,75102,75002
2,3,3ème Ardt,Temple,75103,48.862872,2.360001,"[[[2.363828096062925, 48.86750443060333], [2.3...",5,75103,75003
3,4,4ème Ardt,Hôtel-de-Ville,75104,48.854341,2.35763,"[[[2.368512371393433, 48.85573412813671], [2.3...",7,75104,75004
4,5,5ème Ardt,Panthéon,75105,48.844443,2.350715,"[[[2.364433128355796, 48.84614003688701], [2.3...",13,75105,75005
5,6,6ème Ardt,Luxembourg,75106,48.84913,2.332898,"[[[2.344592677496354, 48.85404922924987], [2.3...",6,75106,75006
6,7,7ème Ardt,Palais-Bourbon,75107,48.856174,2.312188,"[[[2.320902853673182, 48.86305739061689], [2.3...",15,75107,75007
7,8,8ème Ardt,Élysée,75108,48.872721,2.312554,"[[[2.325836254471964, 48.86956219698904], [2.3...",2,75108,75008
8,9,9ème Ardt,Opéra,75109,48.877164,2.337458,"[[[2.339776543083362, 48.88202934722508], [2.3...",12,75109,75009
9,10,10ème Ardt,Entrepôt,75110,48.87613,2.360728,"[[[2.364685889119478, 48.884369043407936], [2....",18,75110,75010


<strong>Step 3:</strong> Loading Paris Hotels Data in Data Frame

In [5]:
#importing accommodations with the word hotel 
url = 'http://tour-pedia.org/api/getPlaces?category=accommodation&location=Paris&name=hotel'
r = requests.get(url)
data = json.loads(r.content.decode())

df = pd.DataFrame.from_dict(data, orient='columns')

dfHotelsA = df[['id','name','address','lat','lng']]

#importing accommodations with the word Hotel 
url = 'http://tour-pedia.org/api/getPlaces?category=accommodation&location=Paris&name=Hotel'
r = requests.get(url)
data = json.loads(r.content.decode())

df = pd.DataFrame.from_dict(data, orient='columns')

dfHotelsB = df[['id','name','address','lat','lng']]

#merging both dataFrames into one 
dfHotels = pd.concat([dfHotelsA, dfHotelsB])

dfHotels.head()

Unnamed: 0,id,name,address,lat,lng
0,230983,Adagio Aparthotel Paris XV,"Paris, France 20, rue d'Oradour-sur-Glane...",48.831309,2.282343
1,85547,Adagio Bercy Aparthotel,1-7 Cour du Minervois,48.831971,2.38686
2,85972,Adagio City Aparthotel Paris Bercy,1-7 Cour Du Minervois,48.829365,2.426541
3,231401,Amhotel,"96 Avenue de Choisy, Paris, France",48.825015,2.360983
4,231533,Goldhotel,"Paris, France 67, Boulevard De Clichy, 09...",48.883858,2.331587


In [6]:
#taking an overall look at hotels df
dfHotels.shape


(758, 5)

<strong>Step 4:</strong> Loading Paris Events Data in Data Frame

In [7]:
from numpy import loadtxt
from urllib.request import urlopen

url = 'https://opendata.paris.fr/explore/dataset/que-faire-a-paris-/download/?format=csv&timezone=America/Guatemala&use_labels_for_header=true'
data = pd.read_csv(url,sep=';')

dfEvents = data[['ID', 'Titre', 'Catégorie', 'Mots clés', 'Date de début', 'Date de fin', 'Adresse du lieu', 'Code postal', 'Coordonnées géographiques', 'Type de prix'  ]]
dfEvents.columns = ['Activity Id', 
              'Title', 
              'Category', 
              'Key Words', 
              'Start', 
              'End', 
              'Adress',
              'Postal code',
              'Coord','Price type'
            ]

dfEvents[['lat','lng']] = dfEvents.Coord.str.split(",", expand=True)
dfEvents.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


Unnamed: 0,Activity Id,Title,Category,Key Words,Start,End,Adress,Postal code,Coord,Price type,lat,lng
0,88773,Body Of Light • Dead Mascot • Boy Scoot Hopkin...,Concerts -> Rock,Musique,2019-11-05T13:00:00-06:00,2019-11-05T16:30:00-06:00,9 Rue Biscornet,75012,"48.8503272,2.3697085",gratuit,48.8503272,2.3697085
1,90673,KIM GORDON & DIMITRI CHAMBLAS,Spectacles -> Autre spectacle,,2019-10-20T09:00:00-06:00,2019-10-20T11:00:00-06:00,34 avenue de New-York,75016,"48.8631449861,2.29461939773",gratuit,48.8631449861,2.29461939773
2,83552,C'est vous qui faites l'atelier !,Animations -> Atelier / Cours,Bibliothèques;En famille,2019-09-28T08:00:00-06:00,2019-12-14T08:30:00-06:00,10 passage de la Canopée,75001,"48.8625776973,2.34695809946",gratuit,48.8625776973,2.34695809946
3,90561,La Petite Histoire... / ... de Ludwig van Beet...,Animations -> Conférence / Débat,Musique,2020-02-26T03:30:00-06:00,2020-02-26T05:30:00-06:00,221 Avenue Jean Jaurès,75019,"48.889174,2.3936148",payant,48.889174,2.3936148
4,90550,La Petite Histoire... / ... du style classique,Animations -> Conférence / Débat,,2020-02-05T03:30:00-06:00,2020-02-05T05:30:00-06:00,221 Avenue Jean Jaurès,75019,"48.889174,2.3936148",payant,48.889174,2.3936148


In [8]:
dfEvents.shape

(4062, 12)

<strong>Step 5:</strong> Loading Paris Foursquare Venues Data in Data Frame

In [9]:
dfEvents.count()

Activity Id    4062
Title          4062
Category       4062
Key Words      3410
Start          4062
End            4062
Adress         4062
Postal code    4062
Coord          3789
Price type     4062
lat            3789
lng            3789
dtype: int64

In [10]:
#viewing info on all PAris events
dfEvents.groupby('Category').count()[['Activity Id']]

Unnamed: 0_level_0,Activity Id
Category,Unnamed: 1_level_1
Animations -> Atelier / Cours,369
Animations -> Autre animation,53
Animations -> Balade,16
Animations -> Conférence / Débat,403
Animations -> Lecture / Rencontre,149
Animations -> Loisirs / Jeux,70
Animations -> Stage,90
Animations -> Visite guidée,92
Concerts -> Autre concert,168
Concerts -> Chanson française,30


In [11]:
CLIENT_ID = 'CE1A1WHI4KFCXGW4HXJLNWPB0N4ALZRJDBEFS22WGIYSZGHH' 
CLIENT_SECRET = 'UL0MBWEJKO01EQZOBWRVBZWWADSK0E421UYENQWQPHRV51N4'
VERSION = '20190927' 
LIMIT = 100

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name," lat ", lat, "lon ", lng)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        
        # make the GET request
        parcial =  requests.get(url).json()["response"]
        if 'groups' in parcial:
        
            results = parcial['groups'][0]['items']
                         
        
            # return only relevant information for each nearby venue
            venues_list.append([(
                name, 
                lat, 
                lng, 
                v['venue']['name'],                 
                v['venue']['location']['lat'], 
                v['venue']['location']['lng'],  
                v['venue']['location']['formattedAddress'], 
                v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Hotel', 
                  'Hotel Latitude', 
                  'Hotel Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Address',
                  'Venue Category']
    
    return(nearby_venues)

In [12]:
# paris_venues = getNearbyVenues(names=dfHotels.iloc[0:10]['name'],
#                                   latitudes=dfHotels.iloc[0:10]['lat'],
#                                   longitudes=dfHotels.iloc[0:10]['lng']
#                                  )

paris_venues = getNearbyVenues(names=dfHotels['name'],
                                   latitudes=dfHotels['lat'],
                                   longitudes=dfHotels['lng']
                                  )

paris_venues.head()

Adagio Aparthotel Paris XV  lat  48.831309407264 lon  2.2823426127434
Adagio Bercy Aparthotel  lat  48.831971 lon  2.38686
Adagio City Aparthotel Paris Bercy  lat  48.829365 lon  2.426541
Amhotel  lat  48.825015 lon  2.360983
Goldhotel  lat  48.883857719992 lon  2.3315866291523
Green hotels Confort Paris 13  lat  48.827355983946 lon  2.3719847202301
Interhotel Ajiel  lat  48.835735 lon  2.301974
L'hoteliere Garibaldi  lat  48.857967 lon  2.388979
Littlehotel  lat  48.870533619904 lon  2.3603211343288
Newhotel Candide  lat  48.858351706531 lon  2.3817425966263
Newhotel Lafayette  lat  48.876225671496 lon  2.3423040658236
Newhotel Opéra  lat  48.879702064729 lon  2.3286931961775
Newhotel Roblin  lat  48.871333263026 lon  2.323926910758
Newhotel Saint Lazare  lat  48.880176119677 lon  2.3269739001989
Quality hotel  lat  48.823603 lon  2.324225
Residhotel Imperial Rennequin  lat  48.88207934994 lon  2.2976902127266
Residhotel Paris Cadet  lat  48.880108 lon  2.337289
Starhotels Castille  l

Crystal Hotel  lat  48.854169224648 lon  2.3327735066414
Daumesnil Hotel  lat  48.839048 lon  2.392555
De Seze Hotel  lat  48.87053 lon  2.326324
Des Hotels et des Iles  lat  48.8713 lon  2.306907
Des Trois Couronnes Hotel Paris  lat  48.877696 lon  2.293423
Dolbeau Hotel Management  lat  48.854569 lon  2.30472
Dupleix Hotel  lat  48.850418 lon  2.291988
Economique Hotel Nice Arenas  lat  48.854529 lon  2.415427
Edouard VII Hotel  lat  48.868291 lon  2.333096
Elysees Niel Hotel  lat  48.879628420441 lon  2.2961305081844
Emeraude Hotel Louvre Montana  lat  48.864449209533 lon  2.3317375034094
Emeraude Hotel Madeleine Plaza  lat  48.871016 lon  2.324825
Est Hotel  lat  48.872940008662 lon  2.3586273193359
Etap Hotel Porte D' Orleans  lat  48.819498 lon  2.326475
Etoile Park Hotel  lat  48.876117377926 lon  2.2948850690227
Europe Hotel Paris Eiffel  lat  48.849568090961 lon  2.2969660162926
Européenne d'Hotellerie  lat  48.886088 lon  2.390828
Eurostars Panorama Hotel  lat  48.87622082034

Hotel Delarc  lat  48.870475399387 lon  2.3077592253685
Hotel Des Archives Paris  lat  48.863566 lon  2.360844
Hotel Des Arenes  lat  48.845091860517 lon  2.3523194342852
Hotel Des Arts  lat  48.88621 lon  2.335241
Hotel Des Arts Montmartre  lat  48.8776204 lon  2.3447572
Hotel Des Carmes  lat  48.849191 lon  2.347671
Hotel Des Champs Elysees  lat  48.872294 lon  2.309002
Hotel Des Deux Continents  lat  48.855253262203 lon  2.3344787210226
Hotel Des Saints Peres  lat  48.853330921474 lon  2.3297707736492
Hotel Des Tuileries  lat  48.866080436933 lon  2.3315289616585
Hotel Design Sorbonne  lat  48.848105328989 lon  2.3425367474556
Hotel Devillas  lat  48.839974417666 lon  2.3614047467709
Hotel Du Cadran  lat  48.856525663802 lon  2.3059715330601
Hotel Du Casino  lat  48.879942 lon  2.329001
Hotel Du Commerce  lat  48.848907 lon  2.348515
Hotel Du Globe  lat  48.872078 lon  2.371863
Hotel Du Marche  lat  48.870951 lon  2.357946
Hotel Du Ministere  lat  48.871402 lon  2.319189
Hotel Du Mé

Hotel Napa  lat  48.829289445005 lon  2.461118574871
Hotel New Parnasse  lat  48.839495127095 lon  2.3228627443314
Hotel Notre-Dame Saint Michel  lat  48.853109 lon  2.346504
Hotel Nouvelle France  lat  48.855713 lon  2.374982
Hotel O  lat  48.865103 lon  2.341862
Hotel Oasis Montparnasse  lat  48.8337 lon  2.317864
Hotel Oberkampf  lat  48.866709 lon  2.381346
Hotel Observatoire Luxembourg  lat  48.843676187011 lon  2.3390471935272
Hotel Oceanic  lat  48.875008 lon  2.322414
Hotel Opéra LaFayette Paris  lat  48.876317 lon  2.346032
Hotel Original  lat  48.854815 lon  2.368872
Hotel Oscar  lat  48.869383507118 lon  2.3047301172119
Hotel Pacific  lat  48.872562474385 lon  2.3553597182035
Hotel Paix Republique  lat  48.868207490921 lon  2.3617476224354
Hotel Paradis  lat  48.873807532822 lon  2.3499020934105
Hotel Parc Saint Severin  lat  48.851900037893 lon  2.3447663336992
Hotel Paris Bercy  lat  48.836484769501 lon  2.3939669876745
Hotel Paris Bercy Rive Gauche  lat  48.82352674579 lo

Hotel des Ecrivains (ex Hotel des Arts)  lat  48.8333 lon  2.356061
Hotel des Maraîchers  lat  48.853435 lon  2.40641
Hotel des Olympiades  lat  48.893665 lon  2.347118
Hotel des Vosges  lat  48.86777797054 lon  2.3829084633689
Hotel du Bresil  lat  48.846499 lon  2.341387
Hotel du Centre, Paris  lat  48.847045 lon  2.377996
Hotel du Chemin Vert  lat  48.860954551539 lon  2.3806321620941
Hotel du Collectionneur Arc de Triomphe  lat  48.876983327957 lon  2.3069787025452
Hotel du College de France  lat  48.849897 lon  2.346098
Hotel du Delta  lat  48.882646 lon  2.346643
Hotel du Dragon  lat  48.852803224488 lon  2.3303139209747
Hotel du Globe  lat  48.851697 lon  2.337485
Hotel du Lys  lat  48.852239341461 lon  2.342165261507
Hotel du Mont Blanc  lat  48.853151346285 lon  2.3451425135136
Hotel du Mont-Louis  lat  48.856514 lon  2.383419
Hotel du Nord et de l'Est  lat  48.865879293324 lon  2.3675704747438
Hotel du Petit Moulin  lat  48.861622005072 lon  2.3620478063822
Hotel du Square  l

Seven Hotel Paris  lat  48.83900568107 lon  2.3457272350788
Shangri-La Hotel, Paris  lat  48.863970153203 lon  2.2932967543602
Sibour Hotel  lat  48.875131 lon  2.358393
Sister Hotels  lat  48.876979 lon  2.294944
Soc Exploit Residences Hotelieres Rail  lat  48.838778 lon  2.316299
Soc Hotel Du Parc Fleuri  lat  48.825478 lon  2.341499
Soc Hotel de la Renaissance  lat  48.847842 lon  2.387534
Soc Hotel la Boetie  lat  48.872165 lon  2.309317
Soc Hotel la Cite Rougemont  lat  48.871746 lon  2.346012
Soc Hoteliere Des Abbesses  lat  48.886407 lon  2.312564
Soc Parmentier Hotel  lat  48.862254 lon  2.377409
Societe Hoteliere Anya  lat  48.856757 lon  2.381968
Societe Hoteliere Bibliotheque  lat  48.829175 lon  2.37445
Societe Hoteliere Duboscq  lat  48.854284 lon  2.30731
Societe Hoteliere Et Maritime  lat  48.871716 lon  2.327257
Societe Hoteliere de la Porte D'asnier  lat  48.893068 lon  2.302472
Société Hotelière Ouest Paris  lat  48.879332 lon  2.356741
Société de Gestion Hotelière  l

Unnamed: 0,Hotel,Hotel Latitude,Hotel Longitude,Venue,Venue Latitude,Venue Longitude,Address,Venue Category
0,Adagio Aparthotel Paris XV,48.831309,2.282343,Sushiken,48.829512,2.283538,"[12 rue Ernest Renan, 92130 Issy-les-Moulineau...",Japanese Restaurant
1,Adagio Aparthotel Paris XV,48.831309,2.282343,OKKO HOTELS Paris Porte de Versailles,48.832717,2.278323,"[2 rue du Colonel Avia, 75015 Paris, France]",Hotel
2,Adagio Aparthotel Paris XV,48.831309,2.282343,Le Jardin Suspendu,48.832216,2.282332,"[40 rue d'Oradour-sur-Glane, 75015 Paris, France]",Roof Deck
3,Adagio Aparthotel Paris XV,48.831309,2.282343,Big Fernand Issy,48.8286,2.28186,"[30 rue Ernest Renan (Rue Michelet), 92130 Iss...",Burger Joint
4,Adagio Aparthotel Paris XV,48.831309,2.282343,Hôtel Océania Paris Porte de Versailles,48.83045,2.284403,"[52 rue d'Oradour-sur-Glâne, 75015 Paris, France]",Hotel


Obtaining Postal Codes for all Venues

In [13]:
paris_v2 = pd.DataFrame(paris_venues)
paris_v2[['phys_add', 'code', 'country']] = pd.DataFrame(paris_v2.Address.values.tolist(), index= paris_v2.index)
paris_v2[['postal_code']] = pd.DataFrame(paris_v2.code.str[:5],index= paris_v2.index)
paris_v2 = paris_v2.drop("country", axis=1)
paris_v2 = paris_v2.drop("code", axis=1)
paris_v2 = paris_v2.drop("Address", axis=1)
paris_v2

Unnamed: 0,Hotel,Hotel Latitude,Hotel Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,phys_add,postal_code
0,Adagio Aparthotel Paris XV,48.831309,2.282343,Sushiken,48.829512,2.283538,Japanese Restaurant,12 rue Ernest Renan,92130
1,Adagio Aparthotel Paris XV,48.831309,2.282343,OKKO HOTELS Paris Porte de Versailles,48.832717,2.278323,Hotel,2 rue du Colonel Avia,75015
2,Adagio Aparthotel Paris XV,48.831309,2.282343,Le Jardin Suspendu,48.832216,2.282332,Roof Deck,40 rue d'Oradour-sur-Glane,75015
3,Adagio Aparthotel Paris XV,48.831309,2.282343,Big Fernand Issy,48.828600,2.281860,Burger Joint,30 rue Ernest Renan (Rue Michelet),92130
4,Adagio Aparthotel Paris XV,48.831309,2.282343,Hôtel Océania Paris Porte de Versailles,48.830450,2.284403,Hotel,52 rue d'Oradour-sur-Glâne,75015
5,Adagio Aparthotel Paris XV,48.831309,2.282343,Adagio Porte de Versailles,48.830724,2.280843,Hotel,16 rue Eliane Jeannin Garreau,92130
6,Adagio Aparthotel Paris XV,48.831309,2.282343,La Manufacture,48.828248,2.281975,French Restaurant,20 esplanade de la Manufacture,92130
7,Adagio Aparthotel Paris XV,48.831309,2.282343,Hôtel Porte de Versailles (PVH),48.835016,2.282371,Hotel,11 boulevard Victor,75015
8,Adagio Aparthotel Paris XV,48.831309,2.282343,Forest Hill Aquaboulevard,48.831278,2.276187,Gym / Fitness Center,4 rue Louis Armand,75015
9,Adagio Aparthotel Paris XV,48.831309,2.282343,Okayama,48.829377,2.283284,Japanese Restaurant,16 Rue Ernest Renan,92130


In [14]:
paris_v2.sort_values(by=['postal_code', 'Venue', 'Venue Latitude'])


Unnamed: 0,Hotel,Hotel Latitude,Hotel Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,phys_add,postal_code
16555,Hotel Bac Saint-Germain,48.855719,2.325245,Hôtel Le Petit Chomel,48.852066,2.325292,Hotel,21 Avenue Jean Jaurès (Boulevard Raspail),01000
21910,Hotel De Nevers Saint Germain,48.854340,2.324198,Hôtel Le Petit Chomel,48.852066,2.325292,Hotel,21 Avenue Jean Jaurès (Boulevard Raspail),01000
22415,Hotel De Suede Saint Germain,48.852568,2.319117,Hôtel Le Petit Chomel,48.852066,2.325292,Hotel,21 Avenue Jean Jaurès (Boulevard Raspail),01000
23652,Hotel Des Saints Peres,48.853331,2.329771,Hôtel Le Petit Chomel,48.852066,2.325292,Hotel,21 Avenue Jean Jaurès (Boulevard Raspail),01000
31151,Hotel Lindbergh Paris,48.852189,2.326082,Hôtel Le Petit Chomel,48.852066,2.325292,Hotel,21 Avenue Jean Jaurès (Boulevard Raspail),01000
33115,Hotel Matignon,48.854814,2.320650,Hôtel Le Petit Chomel,48.852066,2.325292,Hotel,21 Avenue Jean Jaurès (Boulevard Raspail),01000
41511,Hotel Sevres Saint Germain,48.849264,2.324981,Hôtel Le Petit Chomel,48.852066,2.325292,Hotel,21 Avenue Jean Jaurès (Boulevard Raspail),01000
45734,Hotel de Suède,48.852587,2.319328,Hôtel Le Petit Chomel,48.852066,2.325292,Hotel,21 Avenue Jean Jaurès (Boulevard Raspail),01000
45852,Hotel de l'Abbaye,48.850040,2.330561,Hôtel Le Petit Chomel,48.852066,2.325292,Hotel,21 Avenue Jean Jaurès (Boulevard Raspail),01000
49460,Hoteliere Bocraz,48.848652,2.324956,Hôtel Le Petit Chomel,48.852066,2.325292,Hotel,21 Avenue Jean Jaurès (Boulevard Raspail),01000


Filtering Paris Venues, deleting all Hotels, they'll be considered on dfHotels

In [15]:
paris_v2 = paris_v2[paris_v2['Venue Category'] != 'Hotel']
paris_v2.shape 
#paris_v2

(55338, 9)

In [16]:
#including only venues that are in the Paris Boroughs

paris_codes = ['75001', '75002', '75003', '75004', '75005', '75006', '75007', '75008', '75009', '75010', '75011', '75012', '75013', '75014', '75015', '75016', '75017', '75018', '75019', '75020']

paris_v2 = paris_v2[paris_v2.postal_code.isin(paris_codes)]
paris_v2.shape


paris_v2.groupby('postal_code').size()

#deleting duplicate venues
paris_v2 = paris_v2.drop_duplicates(keep='last')

In [17]:

paris_v3 = paris_v2[['Venue','Venue Latitude', 'Venue Longitude','Venue Category', 'phys_add', 'postal_code']]
paris_v3 = paris_v3.drop_duplicates(keep='last')
print(paris_v3.shape)


(6661, 6)


In [18]:
paris_v3.sort_values(by=['postal_code', 'Venue', 'Venue Latitude'])

Unnamed: 0,Venue,Venue Latitude,Venue Longitude,Venue Category,phys_add,postal_code
57498,A la Civette,48.863188,2.335915,Smoke Shop,157 rue Saint-Honoré,75001
54889,Acne Studios,48.865448,2.338579,Clothing Store,124 galerie Valois,75001
57438,Aesop,48.864158,2.333601,Cosmetics Shop,256 rue Saint Honoré,75001
57510,Aki,48.866147,2.335414,Japanese Restaurant,11 bis rue Sainte-Anne,75001
54467,Alexander McQueen,48.867311,2.326958,Boutique,372 rue Saint-Honoré,75001
57079,Amorino,48.864215,2.324462,Ice Cream Shop,Jardin des Tuileries (Place de la Concorde),75001
57428,Angelina,48.865090,2.328443,Tea Room,226 rue de Rivoli,75001
57509,Anticafé Louvre,48.864259,2.336109,Café,10 rue de Richelieu,75001
52413,Ar Poul Gwen,48.863488,2.350080,Creperie,11 rue Étienne Marcel,75001
24895,Arteum,48.861403,2.334126,Furniture / Home Store,Carrousel du Louvre (99 rue de Rivoli),75001


In [19]:
paris_v3.groupby('Venue Category').count()[['Venue']]

Unnamed: 0_level_0,Venue
Venue Category,Unnamed: 1_level_1
Accessories Store,7
Advertising Agency,1
Afghan Restaurant,2
African Restaurant,14
Alsatian Restaurant,2
American Restaurant,18
Antique Shop,3
Aquarium,1
Arcade,1
Arepa Restaurant,1


In [20]:
map_paris = folium.Map(location=[48.864716, 2.349014], zoom_start=14)
map_paris

<strong>Step 6:</strong> Display Boroughs centers

In [21]:
map_paris = folium.Map(location=[48.864716, 2.349014], zoom_start=12)
incidents = folium.map.FeatureGroup()

for lat, lng, in zip(dfBorough.Latitude, dfBorough.Longitude):
    incidents.add_child(
        folium.features.CircleMarker(
            [lat, lng],
            radius=8, # define how big you want the circle markers to be
            color='red',
            fill=True,
            fill_color='red',
            fill_opacity=0.6
        )
    )

map_paris.add_child(incidents)


map_paris



In [22]:


incidentsHotels = folium.map.FeatureGroup()

for lat, lng, in zip(dfHotels.lat, dfHotels.lng):
    incidentsHotels.add_child(
        folium.features.CircleMarker(
            [lat, lng],
            radius=1, # define how big you want the circle markers to be
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.6
        )
    )

map_paris.add_child(incidentsHotels)
map_paris



Narrowing down results

In [23]:
dfEvents.groupby('Category').size()


Category
Animations -> Atelier / Cours              369
Animations -> Autre animation               53
Animations -> Balade                        16
Animations -> Conférence / Débat           403
Animations -> Lecture / Rencontre          149
Animations -> Loisirs / Jeux                70
Animations -> Stage                         90
Animations -> Visite guidée                 92
Concerts -> Autre concert                  168
Concerts -> Chanson française               30
Concerts -> Classique                      473
Concerts -> Folk                            12
Concerts -> Hip-Hop                         59
Concerts -> Jazz                           362
Concerts -> Musiques du Monde              170
Concerts -> Pop / Variété                   33
Concerts -> Reggae                           6
Concerts -> Rock                           217
Concerts -> Soul / Funk                     11
Concerts -> Électronique                    80
Expositions -> Art Contemporain             65
Expo

Events Analysis and Clustering

In [24]:
# one hot code encoding events

#keep only the events that will be held in the PAris Boroughs
dfEvents = dfEvents[dfEvents['Postal code'].isin(paris_codes)]

# one hot encoding
to_onehot_ev = pd.get_dummies(dfEvents[['Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
to_onehot_ev['Borough Name'] = dfEvents['Postal code'] 

# move neighborhood column to the first column
fixed_columns = [to_onehot_ev.columns[-1]] + list(to_onehot_ev.columns[:-1])
to_onehot_ev = to_onehot_ev[fixed_columns]

#to_onehot = to_onehot.rename(columns={'a1Neighborhood': 'Neighborhood'})
to_onehot_ev.head()



Unnamed: 0,Borough Name,Animations -> Atelier / Cours,Animations -> Autre animation,Animations -> Balade,Animations -> Conférence / Débat,Animations -> Lecture / Rencontre,Animations -> Loisirs / Jeux,Animations -> Stage,Animations -> Visite guidée,Concerts -> Autre concert,...,Spectacles -> Opéra / Musical,Spectacles -> Projection,Spectacles -> Théâtre,Événements -> Autre événement,Événements -> Brocante / Marché,Événements -> Festival / Cycle,Événements -> Fête / Parade,Événements -> Salon,Événements -> Soirée / Bal,Événements -> Événement sportif
0,75012,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,75016,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,75001,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,75019,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,75019,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
events_grouped = to_onehot_ev.groupby('Borough Name').mean().reset_index()
print(events_grouped.shape)
events_grouped.head()

(20, 45)


Unnamed: 0,Borough Name,Animations -> Atelier / Cours,Animations -> Autre animation,Animations -> Balade,Animations -> Conférence / Débat,Animations -> Lecture / Rencontre,Animations -> Loisirs / Jeux,Animations -> Stage,Animations -> Visite guidée,Concerts -> Autre concert,...,Spectacles -> Opéra / Musical,Spectacles -> Projection,Spectacles -> Théâtre,Événements -> Autre événement,Événements -> Brocante / Marché,Événements -> Festival / Cycle,Événements -> Fête / Parade,Événements -> Salon,Événements -> Soirée / Bal,Événements -> Événement sportif
0,75001,0.087097,0.003226,0.0,0.03871,0.009677,0.019355,0.016129,0.019355,0.003226,...,0.0,0.012903,0.009677,0.0,0.0,0.009677,0.0,0.009677,0.009677,0.0
1,75002,0.147059,0.0,0.029412,0.088235,0.029412,0.058824,0.058824,0.117647,0.0,...,0.0,0.058824,0.0,0.0,0.029412,0.0,0.0,0.0,0.029412,0.0
2,75003,0.125786,0.006289,0.0,0.220126,0.025157,0.037736,0.012579,0.031447,0.006289,...,0.0,0.056604,0.006289,0.0,0.006289,0.012579,0.0,0.006289,0.0,0.0
3,75004,0.074074,0.012346,0.0,0.098765,0.117284,0.024691,0.012346,0.049383,0.006173,...,0.006173,0.179012,0.111111,0.024691,0.0,0.018519,0.0,0.006173,0.0,0.012346
4,75005,0.151261,0.058824,0.008403,0.134454,0.07563,0.016807,0.008403,0.084034,0.016807,...,0.0,0.042017,0.10084,0.033613,0.0,0.02521,0.008403,0.0,0.0,0.0


In [26]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [27]:
num_top_events = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Borough Name']
for ind in np.arange(num_top_events):
    try:
        columns.append('{}{} Most Common Event'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Event'.format(ind+1))

# create a new dataframe
neighborhoods_events_sorted = pd.DataFrame(columns=columns)
neighborhoods_events_sorted['Borough Name'] = events_grouped['Borough Name']

for ind in np.arange(events_grouped.shape[0]):
    neighborhoods_events_sorted.iloc[ind, 1:] = return_most_common_venues(events_grouped.iloc[ind, :], num_top_events)

neighborhoods_events_sorted.head()

Unnamed: 0,Borough Name,1st Most Common Event,2nd Most Common Event,3rd Most Common Event,4th Most Common Event,5th Most Common Event,6th Most Common Event,7th Most Common Event,8th Most Common Event,9th Most Common Event,10th Most Common Event
0,75001,Concerts -> Jazz,Animations -> Atelier / Cours,Animations -> Conférence / Débat,Animations -> Loisirs / Jeux,Animations -> Visite guidée,Concerts -> Musiques du Monde,Animations -> Stage,Spectacles -> Projection,Spectacles -> Théâtre,Animations -> Lecture / Rencontre
1,75002,Animations -> Atelier / Cours,Animations -> Visite guidée,Animations -> Conférence / Débat,Spectacles -> Autre spectacle,Expositions -> Histoire / Civilisations,Animations -> Loisirs / Jeux,Spectacles -> Projection,Animations -> Stage,Spectacles -> Jeune public,Concerts -> Électronique
2,75003,Animations -> Conférence / Débat,Animations -> Atelier / Cours,Spectacles -> Jeune public,Expositions -> Photographie,Spectacles -> Projection,Concerts -> Rock,Expositions -> Art Contemporain,Expositions -> Sciences / Techniques,Animations -> Loisirs / Jeux,Concerts -> Électronique
3,75004,Spectacles -> Projection,Animations -> Lecture / Rencontre,Spectacles -> Théâtre,Animations -> Conférence / Débat,Animations -> Atelier / Cours,Animations -> Visite guidée,Concerts -> Classique,Expositions -> Histoire / Civilisations,Expositions -> Art Contemporain,Animations -> Loisirs / Jeux
4,75005,Animations -> Atelier / Cours,Animations -> Conférence / Débat,Spectacles -> Théâtre,Animations -> Visite guidée,Animations -> Lecture / Rencontre,Animations -> Autre animation,Expositions -> Photographie,Spectacles -> Projection,Concerts -> Classique,Événements -> Autre événement


In [28]:
neighborhoods_events_sorted

Unnamed: 0,Borough Name,1st Most Common Event,2nd Most Common Event,3rd Most Common Event,4th Most Common Event,5th Most Common Event,6th Most Common Event,7th Most Common Event,8th Most Common Event,9th Most Common Event,10th Most Common Event
0,75001,Concerts -> Jazz,Animations -> Atelier / Cours,Animations -> Conférence / Débat,Animations -> Loisirs / Jeux,Animations -> Visite guidée,Concerts -> Musiques du Monde,Animations -> Stage,Spectacles -> Projection,Spectacles -> Théâtre,Animations -> Lecture / Rencontre
1,75002,Animations -> Atelier / Cours,Animations -> Visite guidée,Animations -> Conférence / Débat,Spectacles -> Autre spectacle,Expositions -> Histoire / Civilisations,Animations -> Loisirs / Jeux,Spectacles -> Projection,Animations -> Stage,Spectacles -> Jeune public,Concerts -> Électronique
2,75003,Animations -> Conférence / Débat,Animations -> Atelier / Cours,Spectacles -> Jeune public,Expositions -> Photographie,Spectacles -> Projection,Concerts -> Rock,Expositions -> Art Contemporain,Expositions -> Sciences / Techniques,Animations -> Loisirs / Jeux,Concerts -> Électronique
3,75004,Spectacles -> Projection,Animations -> Lecture / Rencontre,Spectacles -> Théâtre,Animations -> Conférence / Débat,Animations -> Atelier / Cours,Animations -> Visite guidée,Concerts -> Classique,Expositions -> Histoire / Civilisations,Expositions -> Art Contemporain,Animations -> Loisirs / Jeux
4,75005,Animations -> Atelier / Cours,Animations -> Conférence / Débat,Spectacles -> Théâtre,Animations -> Visite guidée,Animations -> Lecture / Rencontre,Animations -> Autre animation,Expositions -> Photographie,Spectacles -> Projection,Concerts -> Classique,Événements -> Autre événement
5,75006,Animations -> Conférence / Débat,Animations -> Lecture / Rencontre,Animations -> Atelier / Cours,Animations -> Visite guidée,Expositions -> Art Contemporain,Expositions -> Beaux-Arts,Expositions -> Photographie,Animations -> Stage,Concerts -> Classique,Spectacles -> Projection
6,75007,Animations -> Atelier / Cours,Animations -> Conférence / Débat,Expositions -> Art Contemporain,Concerts -> Classique,Concerts -> Électronique,Expositions -> Beaux-Arts,Expositions -> Histoire / Civilisations,Animations -> Loisirs / Jeux,Animations -> Lecture / Rencontre,Animations -> Stage
7,75008,Concerts -> Classique,Animations -> Conférence / Débat,Spectacles -> Projection,Expositions -> Beaux-Arts,Concerts -> Jazz,Expositions -> Art Contemporain,Animations -> Lecture / Rencontre,Événements -> Festival / Cycle,Spectacles -> Théâtre,Animations -> Atelier / Cours
8,75009,Animations -> Atelier / Cours,Animations -> Loisirs / Jeux,Animations -> Lecture / Rencontre,Animations -> Autre animation,Animations -> Conférence / Débat,Concerts -> Hip-Hop,Événements -> Soirée / Bal,Expositions -> Photographie,Spectacles -> Autre spectacle,Expositions -> Art Contemporain
9,75010,Animations -> Atelier / Cours,Concerts -> Autre concert,Concerts -> Jazz,Animations -> Lecture / Rencontre,Concerts -> Rock,Spectacles -> Projection,Expositions -> Photographie,Événements -> Soirée / Bal,Animations -> Conférence / Débat,Concerts -> Musiques du Monde


Clustering Events

In [29]:
from sklearn.cluster import KMeans

kclusters = 7

events_grouped_clustering = events_grouped.drop('Borough Name', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(events_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

#neighborhoods_events_sorted
neighborhoods_events_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

paris_merged = result
paris_merged = paris_merged.rename(columns={'Postal Code': 'Borough Name'})

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
paris_merged = paris_merged.join(neighborhoods_events_sorted.set_index('Borough Name'), on='Borough Name')

paris_merged

Unnamed: 0,Borough Id,Short name,Long name,INSEE,Latitude,Longitude,Polygon,index,INSEE.1,Borough Name,...,1st Most Common Event,2nd Most Common Event,3rd Most Common Event,4th Most Common Event,5th Most Common Event,6th Most Common Event,7th Most Common Event,8th Most Common Event,9th Most Common Event,10th Most Common Event
0,1,1er Ardt,Louvre,75101,48.862563,2.336443,"[[[2.328007329038849, 48.86991742140715], [2.3...",8,75101,75001,...,Concerts -> Jazz,Animations -> Atelier / Cours,Animations -> Conférence / Débat,Animations -> Loisirs / Jeux,Animations -> Visite guidée,Concerts -> Musiques du Monde,Animations -> Stage,Spectacles -> Projection,Spectacles -> Théâtre,Animations -> Lecture / Rencontre
1,2,2ème Ardt,Bourse,75102,48.868279,2.342803,"[[[2.351518483670821, 48.8644258050741], [2.35...",9,75102,75002,...,Animations -> Atelier / Cours,Animations -> Visite guidée,Animations -> Conférence / Débat,Spectacles -> Autre spectacle,Expositions -> Histoire / Civilisations,Animations -> Loisirs / Jeux,Spectacles -> Projection,Animations -> Stage,Spectacles -> Jeune public,Concerts -> Électronique
2,3,3ème Ardt,Temple,75103,48.862872,2.360001,"[[[2.363828096062925, 48.86750443060333], [2.3...",5,75103,75003,...,Animations -> Conférence / Débat,Animations -> Atelier / Cours,Spectacles -> Jeune public,Expositions -> Photographie,Spectacles -> Projection,Concerts -> Rock,Expositions -> Art Contemporain,Expositions -> Sciences / Techniques,Animations -> Loisirs / Jeux,Concerts -> Électronique
3,4,4ème Ardt,Hôtel-de-Ville,75104,48.854341,2.35763,"[[[2.368512371393433, 48.85573412813671], [2.3...",7,75104,75004,...,Spectacles -> Projection,Animations -> Lecture / Rencontre,Spectacles -> Théâtre,Animations -> Conférence / Débat,Animations -> Atelier / Cours,Animations -> Visite guidée,Concerts -> Classique,Expositions -> Histoire / Civilisations,Expositions -> Art Contemporain,Animations -> Loisirs / Jeux
4,5,5ème Ardt,Panthéon,75105,48.844443,2.350715,"[[[2.364433128355796, 48.84614003688701], [2.3...",13,75105,75005,...,Animations -> Atelier / Cours,Animations -> Conférence / Débat,Spectacles -> Théâtre,Animations -> Visite guidée,Animations -> Lecture / Rencontre,Animations -> Autre animation,Expositions -> Photographie,Spectacles -> Projection,Concerts -> Classique,Événements -> Autre événement
5,6,6ème Ardt,Luxembourg,75106,48.84913,2.332898,"[[[2.344592677496354, 48.85404922924987], [2.3...",6,75106,75006,...,Animations -> Conférence / Débat,Animations -> Lecture / Rencontre,Animations -> Atelier / Cours,Animations -> Visite guidée,Expositions -> Art Contemporain,Expositions -> Beaux-Arts,Expositions -> Photographie,Animations -> Stage,Concerts -> Classique,Spectacles -> Projection
6,7,7ème Ardt,Palais-Bourbon,75107,48.856174,2.312188,"[[[2.320902853673182, 48.86305739061689], [2.3...",15,75107,75007,...,Animations -> Atelier / Cours,Animations -> Conférence / Débat,Expositions -> Art Contemporain,Concerts -> Classique,Concerts -> Électronique,Expositions -> Beaux-Arts,Expositions -> Histoire / Civilisations,Animations -> Loisirs / Jeux,Animations -> Lecture / Rencontre,Animations -> Stage
7,8,8ème Ardt,Élysée,75108,48.872721,2.312554,"[[[2.325836254471964, 48.86956219698904], [2.3...",2,75108,75008,...,Concerts -> Classique,Animations -> Conférence / Débat,Spectacles -> Projection,Expositions -> Beaux-Arts,Concerts -> Jazz,Expositions -> Art Contemporain,Animations -> Lecture / Rencontre,Événements -> Festival / Cycle,Spectacles -> Théâtre,Animations -> Atelier / Cours
8,9,9ème Ardt,Opéra,75109,48.877164,2.337458,"[[[2.339776543083362, 48.88202934722508], [2.3...",12,75109,75009,...,Animations -> Atelier / Cours,Animations -> Loisirs / Jeux,Animations -> Lecture / Rencontre,Animations -> Autre animation,Animations -> Conférence / Débat,Concerts -> Hip-Hop,Événements -> Soirée / Bal,Expositions -> Photographie,Spectacles -> Autre spectacle,Expositions -> Art Contemporain
9,10,10ème Ardt,Entrepôt,75110,48.87613,2.360728,"[[[2.364685889119478, 48.884369043407936], [2....",18,75110,75010,...,Animations -> Atelier / Cours,Concerts -> Autre concert,Concerts -> Jazz,Animations -> Lecture / Rencontre,Concerts -> Rock,Spectacles -> Projection,Expositions -> Photographie,Événements -> Soirée / Bal,Animations -> Conférence / Débat,Concerts -> Musiques du Monde


In [30]:
paris_merged[['Short name','Cluster Labels']].sort_values(by=['Cluster Labels'])

Unnamed: 0,Short name,Cluster Labels
0,1er Ardt,0
17,18ème Ardt,1
13,14ème Ardt,1
10,11ème Ardt,1
8,9ème Ardt,1
6,7ème Ardt,1
9,10ème Ardt,1
19,20ème Ardt,1
7,8ème Ardt,2
18,19ème Ardt,2


Mapping Events Clustering Events

In [31]:
import matplotlib.cm as cm
import matplotlib.colors as colors
# create map
map_clusters = folium.Map(location=[48.864716, 2.349014], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(paris_merged['Latitude'],paris_merged['Longitude'], paris_merged['Borough Name'],paris_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [32]:
eventsSummary = pd.DataFrame(paris_merged[['Borough Name','Cluster Labels', 'Short name', '1st Most Common Event', '2nd Most Common Event', '3rd Most Common Event', '4th Most Common Event', '5th Most Common Event', '6th Most Common Event', '7th Most Common Event', '8th Most Common Event', '9th Most Common Event', '10th Most Common Event']])
eventsSummary.sort_values(by=['Cluster Labels'])

Unnamed: 0,Borough Name,Cluster Labels,Short name,1st Most Common Event,2nd Most Common Event,3rd Most Common Event,4th Most Common Event,5th Most Common Event,6th Most Common Event,7th Most Common Event,8th Most Common Event,9th Most Common Event,10th Most Common Event
0,75001,0,1er Ardt,Concerts -> Jazz,Animations -> Atelier / Cours,Animations -> Conférence / Débat,Animations -> Loisirs / Jeux,Animations -> Visite guidée,Concerts -> Musiques du Monde,Animations -> Stage,Spectacles -> Projection,Spectacles -> Théâtre,Animations -> Lecture / Rencontre
17,75018,1,18ème Ardt,Animations -> Atelier / Cours,Spectacles -> Théâtre,Concerts -> Rock,Concerts -> Électronique,Concerts -> Pop / Variété,Concerts -> Musiques du Monde,Concerts -> Autre concert,Spectacles -> Jeune public,Animations -> Visite guidée,Spectacles -> Projection
13,75014,1,14ème Ardt,Concerts -> Jazz,Concerts -> Autre concert,Animations -> Atelier / Cours,Animations -> Lecture / Rencontre,Spectacles -> Projection,Concerts -> Musiques du Monde,Concerts -> Rock,Animations -> Loisirs / Jeux,Animations -> Conférence / Débat,Événements -> Festival / Cycle
10,75011,1,11ème Ardt,Concerts -> Autre concert,Animations -> Conférence / Débat,Animations -> Atelier / Cours,Spectacles -> Théâtre,Concerts -> Musiques du Monde,Concerts -> Rock,Concerts -> Jazz,Animations -> Stage,Concerts -> Électronique,Événements -> Autre événement
8,75009,1,9ème Ardt,Animations -> Atelier / Cours,Animations -> Loisirs / Jeux,Animations -> Lecture / Rencontre,Animations -> Autre animation,Animations -> Conférence / Débat,Concerts -> Hip-Hop,Événements -> Soirée / Bal,Expositions -> Photographie,Spectacles -> Autre spectacle,Expositions -> Art Contemporain
6,75007,1,7ème Ardt,Animations -> Atelier / Cours,Animations -> Conférence / Débat,Expositions -> Art Contemporain,Concerts -> Classique,Concerts -> Électronique,Expositions -> Beaux-Arts,Expositions -> Histoire / Civilisations,Animations -> Loisirs / Jeux,Animations -> Lecture / Rencontre,Animations -> Stage
9,75010,1,10ème Ardt,Animations -> Atelier / Cours,Concerts -> Autre concert,Concerts -> Jazz,Animations -> Lecture / Rencontre,Concerts -> Rock,Spectacles -> Projection,Expositions -> Photographie,Événements -> Soirée / Bal,Animations -> Conférence / Débat,Concerts -> Musiques du Monde
19,75020,1,20ème Ardt,Concerts -> Musiques du Monde,Animations -> Atelier / Cours,Concerts -> Jazz,Animations -> Lecture / Rencontre,Concerts -> Autre concert,Événements -> Soirée / Bal,Concerts -> Rock,Concerts -> Hip-Hop,Spectacles -> Théâtre,Spectacles -> Projection
7,75008,2,8ème Ardt,Concerts -> Classique,Animations -> Conférence / Débat,Spectacles -> Projection,Expositions -> Beaux-Arts,Concerts -> Jazz,Expositions -> Art Contemporain,Animations -> Lecture / Rencontre,Événements -> Festival / Cycle,Spectacles -> Théâtre,Animations -> Atelier / Cours
18,75019,2,19ème Ardt,Concerts -> Classique,Animations -> Conférence / Débat,Concerts -> Rock,Concerts -> Jazz,Animations -> Atelier / Cours,Événements -> Soirée / Bal,Concerts -> Musiques du Monde,Concerts -> Autre concert,Concerts -> Électronique,Concerts -> Hip-Hop


Venues Analysis and Clustering

In [33]:
# one hot code encoding venues

#keep only the events that will be held in the PAris Boroughs
paris_v3 = paris_v3[paris_v3['postal_code'].isin(paris_codes)]

# one hot encoding
to_onehot_ve = pd.get_dummies(paris_v3[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
to_onehot_ve['Borough Name'] = paris_v3['postal_code'] 

# move neighborhood column to the first column
fixed_columns = [to_onehot_ve.columns[-1]] + list(to_onehot_ve.columns[:-1])
to_onehot_ve = to_onehot_ve[fixed_columns]

#to_onehot = to_onehot.rename(columns={'a1Neighborhood': 'Neighborhood'})
to_onehot_ve.head()


Unnamed: 0,Borough Name,Accessories Store,Advertising Agency,Afghan Restaurant,African Restaurant,Alsatian Restaurant,American Restaurant,Antique Shop,Aquarium,Arcade,...,Video Game Store,Vietnamese Restaurant,Vineyard,Water Park,Wine Bar,Wine Shop,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
50,75013,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
66,75013,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
67,75013,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
69,75012,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
123,75013,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [34]:
venues_grouped = to_onehot_ve.groupby('Borough Name').mean().reset_index()
print(venues_grouped.shape)
venues_grouped.head()

(20, 337)


Unnamed: 0,Borough Name,Accessories Store,Advertising Agency,Afghan Restaurant,African Restaurant,Alsatian Restaurant,American Restaurant,Antique Shop,Aquarium,Arcade,...,Video Game Store,Vietnamese Restaurant,Vineyard,Water Park,Wine Bar,Wine Shop,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,75001,0.006515,0.0,0.0,0.0,0.0,0.003257,0.003257,0.0,0.0,...,0.0,0.003257,0.0,0.0,0.016287,0.003257,0.003257,0.0,0.0,0.0
1,75002,0.0,0.0,0.0,0.00463,0.0,0.0,0.00463,0.0,0.00463,...,0.0,0.00463,0.0,0.0,0.027778,0.009259,0.009259,0.00463,0.0,0.0
2,75003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0181,0.0,0.0,0.022624,0.004525,0.0,0.004525,0.0,0.0
3,75004,0.0,0.0,0.0,0.0,0.003676,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.014706,0.0,0.003676,0.0,0.0,0.0
4,75005,0.0,0.0,0.0,0.003185,0.0,0.0,0.0,0.0,0.0,...,0.0,0.009554,0.0,0.0,0.015924,0.003185,0.0,0.0,0.003185,0.006369


In [35]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Borough Name']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Borough Name'] = venues_grouped['Borough Name']

for ind in np.arange(venues_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(venues_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Borough Name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,75001,French Restaurant,Café,Japanese Restaurant,Plaza,Italian Restaurant,Coffee Shop,Clothing Store,Boutique,Bakery,Tea Room
1,75002,French Restaurant,Cocktail Bar,Japanese Restaurant,Bakery,Italian Restaurant,Wine Bar,Thai Restaurant,Burger Joint,Bar,Bistro
2,75003,French Restaurant,Coffee Shop,Art Gallery,Café,Bakery,Cocktail Bar,Japanese Restaurant,Pizza Place,Burger Joint,Clothing Store
3,75004,French Restaurant,Ice Cream Shop,Italian Restaurant,Plaza,Cocktail Bar,Seafood Restaurant,Clothing Store,Art Gallery,Bakery,Cosmetics Shop
4,75005,French Restaurant,Bar,Bakery,Café,Plaza,Creperie,Coffee Shop,Italian Restaurant,Indie Movie Theater,Science Museum


Clustering Venues

In [36]:
from sklearn.cluster import KMeans

kclusters = 7

venues_grouped_clustering = venues_grouped.drop('Borough Name', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(venues_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

#neighborhoods_venues_sorted
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

parisv_merged = result
parisv_merged = parisv_merged.rename(columns={'Postal Code': 'Borough Name'})

#converting borough to int
neighborhoods_venues_sorted['Borough Name'] = neighborhoods_venues_sorted['Borough Name'].astype(str).astype(int)

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
parisv_merged = parisv_merged.join(neighborhoods_venues_sorted.set_index('Borough Name'), on='Borough Name')

parisv_merged

Unnamed: 0,Borough Id,Short name,Long name,INSEE,Latitude,Longitude,Polygon,index,INSEE.1,Borough Name,...,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1,1er Ardt,Louvre,75101,48.862563,2.336443,"[[[2.328007329038849, 48.86991742140715], [2.3...",8,75101,75001,...,French Restaurant,Café,Japanese Restaurant,Plaza,Italian Restaurant,Coffee Shop,Clothing Store,Boutique,Bakery,Tea Room
1,2,2ème Ardt,Bourse,75102,48.868279,2.342803,"[[[2.351518483670821, 48.8644258050741], [2.35...",9,75102,75002,...,French Restaurant,Cocktail Bar,Japanese Restaurant,Bakery,Italian Restaurant,Wine Bar,Thai Restaurant,Burger Joint,Bar,Bistro
2,3,3ème Ardt,Temple,75103,48.862872,2.360001,"[[[2.363828096062925, 48.86750443060333], [2.3...",5,75103,75003,...,French Restaurant,Coffee Shop,Art Gallery,Café,Bakery,Cocktail Bar,Japanese Restaurant,Pizza Place,Burger Joint,Clothing Store
3,4,4ème Ardt,Hôtel-de-Ville,75104,48.854341,2.35763,"[[[2.368512371393433, 48.85573412813671], [2.3...",7,75104,75004,...,French Restaurant,Ice Cream Shop,Italian Restaurant,Plaza,Cocktail Bar,Seafood Restaurant,Clothing Store,Art Gallery,Bakery,Cosmetics Shop
4,5,5ème Ardt,Panthéon,75105,48.844443,2.350715,"[[[2.364433128355796, 48.84614003688701], [2.3...",13,75105,75005,...,French Restaurant,Bar,Bakery,Café,Plaza,Creperie,Coffee Shop,Italian Restaurant,Indie Movie Theater,Science Museum
5,6,6ème Ardt,Luxembourg,75106,48.84913,2.332898,"[[[2.344592677496354, 48.85404922924987], [2.3...",6,75106,75006,...,French Restaurant,Café,Italian Restaurant,Bistro,Bakery,Wine Bar,Japanese Restaurant,Chocolate Shop,Bookstore,Bar
6,7,7ème Ardt,Palais-Bourbon,75107,48.856174,2.312188,"[[[2.320902853673182, 48.86305739061689], [2.3...",15,75107,75007,...,French Restaurant,Italian Restaurant,Café,Plaza,Bakery,Coffee Shop,Restaurant,Bistro,Japanese Restaurant,Brasserie
7,8,8ème Ardt,Élysée,75108,48.872721,2.312554,"[[[2.325836254471964, 48.86956219698904], [2.3...",2,75108,75008,...,French Restaurant,Boutique,Italian Restaurant,Clothing Store,Bakery,Salad Place,Café,Sandwich Place,Bistro,Japanese Restaurant
8,9,9ème Ardt,Opéra,75109,48.877164,2.337458,"[[[2.339776543083362, 48.88202934722508], [2.3...",12,75109,75009,...,French Restaurant,Italian Restaurant,Bistro,Coffee Shop,Theater,Bar,Restaurant,Cocktail Bar,Japanese Restaurant,Pizza Place
9,10,10ème Ardt,Entrepôt,75110,48.87613,2.360728,"[[[2.364685889119478, 48.884369043407936], [2....",18,75110,75010,...,French Restaurant,Bar,Indian Restaurant,Coffee Shop,Bistro,Café,Italian Restaurant,Restaurant,Pizza Place,Bakery


In [37]:
parisv_merged.dtypes
neighborhoods_venues_sorted.dtypes

Cluster Labels             int32
Borough Name               int64
1st Most Common Venue     object
2nd Most Common Venue     object
3rd Most Common Venue     object
4th Most Common Venue     object
5th Most Common Venue     object
6th Most Common Venue     object
7th Most Common Venue     object
8th Most Common Venue     object
9th Most Common Venue     object
10th Most Common Venue    object
dtype: object

In [38]:
parisv_merged[['Short name','Cluster Labels']].sort_values(by=['Cluster Labels'])

Unnamed: 0,Short name,Cluster Labels
15,16ème Ardt,0
6,7ème Ardt,0
19,20ème Ardt,1
18,19ème Ardt,1
10,11ème Ardt,1
17,18ème Ardt,2
16,17ème Ardt,2
4,5ème Ardt,2
14,15ème Ardt,2
13,14ème Ardt,2


In [39]:
import matplotlib.cm as cm
import matplotlib.colors as colors
# create map
map_clusters = folium.Map(location=[48.864716, 2.349014], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster, name in zip(parisv_merged['Latitude'],parisv_merged['Longitude'], parisv_merged['Borough Name'],parisv_merged['Cluster Labels'], parisv_merged['Short name']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster) + ' ' + name, parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        #color='yellow',
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)

for lat, lon, poi, cluster, name in zip(paris_merged['Latitude'],paris_merged['Longitude'], paris_merged['Borough Name'],paris_merged['Cluster Labels'], paris_merged['Short name']):
    label = folium.Popup(str(poi) + ' Events ' + str(cluster) + ' ' + name, parse_html=True)
    folium.CircleMarker(
        [lat, lon+0.005],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [40]:
venuesSummary = pd.DataFrame(parisv_merged[['Borough Name','Cluster Labels', 'Short name', '1st Most Common Venue', '2nd Most Common Venue', '3rd Most Common Venue', '4th Most Common Venue', '5th Most Common Venue', '6th Most Common Venue', '7th Most Common Venue', '8th Most Common Venue', '9th Most Common Venue', '10th Most Common Venue']])
venuesSummary.sort_values(by=['Cluster Labels'])

Unnamed: 0,Borough Name,Cluster Labels,Short name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,75016,0,16ème Ardt,French Restaurant,Italian Restaurant,Café,Plaza,Bakery,Japanese Restaurant,Supermarket,Museum,Pizza Place,Gym / Fitness Center
6,75007,0,7ème Ardt,French Restaurant,Italian Restaurant,Café,Plaza,Bakery,Coffee Shop,Restaurant,Bistro,Japanese Restaurant,Brasserie
19,75020,1,20ème Ardt,Bar,French Restaurant,Supermarket,Plaza,Bistro,Pizza Place,Bakery,Italian Restaurant,Theater,Metro Station
18,75019,1,19ème Ardt,French Restaurant,Bar,Café,Restaurant,Italian Restaurant,Plaza,Bistro,Pizza Place,Supermarket,Multiplex
10,75011,1,11ème Ardt,Bar,French Restaurant,Bistro,Restaurant,Wine Bar,Italian Restaurant,Café,Cocktail Bar,Vietnamese Restaurant,Pizza Place
17,75018,2,18ème Ardt,French Restaurant,Bar,Italian Restaurant,Bistro,Bakery,Plaza,Café,Supermarket,Wine Bar,Pizza Place
16,75017,2,17ème Ardt,French Restaurant,Italian Restaurant,Bar,Bakery,Bistro,Pizza Place,Restaurant,Japanese Restaurant,Gym / Fitness Center,Supermarket
4,75005,2,5ème Ardt,French Restaurant,Bar,Bakery,Café,Plaza,Creperie,Coffee Shop,Italian Restaurant,Indie Movie Theater,Science Museum
14,75015,2,15ème Ardt,French Restaurant,Italian Restaurant,Bakery,Japanese Restaurant,Bistro,Korean Restaurant,Supermarket,Café,Sandwich Place,Pizza Place
13,75014,2,14ème Ardt,French Restaurant,Italian Restaurant,Bar,Café,Bakery,Japanese Restaurant,Bistro,Pizza Place,Vietnamese Restaurant,Creperie
