### 5. The Best Hotel Recommendation

In [1]:
%matplotlib inline
import pandas as pd
from pandas import read_csv
from pandas import datetime
import numpy as np
from matplotlib import pyplot as plt
import math

import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

import missingno as msno

In [2]:
#Load data dari file pickle Clean_Data
data = pd.read_pickle('Clean_Data')

#### 5.1 The Total Number of Positive and Negative Reviews

In [3]:
data["pos_count"] = 1
data["neg_count"] = 1

In [4]:
data["pos_count"] = data.apply(lambda x: 0 if x["Positive_Review"] == 'No Positive' else x["pos_count"],axis =1)

In [5]:
data["pos_count"].value_counts()

1    479308
0     35904
Name: pos_count, dtype: int64

In [6]:
data["neg_count"] = data.apply(lambda x: 0 if x["Negative_Review"] == 'No Negative' else x["neg_count"],axis =1)

In [7]:
data["neg_count"].value_counts()

1    387455
0    127757
Name: neg_count, dtype: int64

In [8]:
reviews = pd.DataFrame(data.groupby(["Hotel_Name"])["pos_count","neg_count"].sum())

In [9]:
reviews["Hotel_Name"] = reviews.index
reviews.index = range(reviews.shape[0])

In [10]:
reviews.head()

Unnamed: 0,pos_count,neg_count,Hotel_Name
0,150,113,11 Cadogan Gardens
1,136,123,1K Hotel
2,660,459,25hours Hotel beim MuseumsQuartier
3,103,55,41
4,28,18,45 Park Lane Dorchester Collection


In [11]:
reviews["total"] = reviews["pos_count"] + reviews["neg_count"]

In [12]:
data["count"] = 1
count_review = data.groupby("Hotel_Name",as_index=False)["count"].sum()

#### 5.2 Famous Hotels in Europe (based in the number of reviews)

In [13]:
reviews = pd.merge(reviews,count_review,on = "Hotel_Name",how = "left")

In [14]:
reviews.head()

Unnamed: 0,pos_count,neg_count,Hotel_Name,total,count
0,150,113,11 Cadogan Gardens,263,159
1,136,123,1K Hotel,259,148
2,660,459,25hours Hotel beim MuseumsQuartier,1119,689
3,103,55,41,158,103
4,28,18,45 Park Lane Dorchester Collection,46,28


In [15]:
for i in reviews.sort_values(by = "count",ascending=False)["Hotel_Name"].head(10).values:
    print(i)

Britannia International Hotel Canary Wharf
Strand Palace Hotel
Park Plaza Westminster Bridge London
Copthorne Tara Hotel London Kensington
DoubleTree by Hilton Hotel London Tower of London
Grand Royale London Hyde Park
Holiday Inn London Kensington
Hilton London Metropole
Millennium Gloucester Hotel London
Intercontinental London The O2


In [16]:
reviews["pos_ratio"] = reviews["pos_count"].astype("float")/reviews["total"].astype("float")

#### 5.3 The Positive Hotels Among The Famous Hotels (based on the number of positive reviews of the famous hotels)

In [17]:
famous_hotels = reviews.sort_values(by = "count",ascending=False).head(100)

In [18]:
pd.set_option('display.max_colwidth', 2000)
popular = famous_hotels["Hotel_Name"].values[:10]
data.loc[data['Hotel_Name'].isin(popular)][["Hotel_Name","Hotel_Address"]].drop_duplicates()

Unnamed: 0,Hotel_Name,Hotel_Address
8301,Grand Royale London Hyde Park,1 Inverness Terrace Westminster Borough London W2 3JP United Kingdom
14829,Intercontinental London The O2,1 Waterview Drive Greenwich London SE10 0TW United Kingdom
63942,Britannia International Hotel Canary Wharf,163 Marsh Wall Docklands Tower Hamlets London E14 9SJ United Kingdom
111930,Hilton London Metropole,225 Edgware Road Westminster Borough London W2 1JU United Kingdom
164259,Strand Palace Hotel,372 Strand Westminster Borough London WC2R 0JJ United Kingdom
171770,Millennium Gloucester Hotel London,4 18 Harrington Gardens Kensington and Chelsea London SW7 4LH United Kingdom
236055,DoubleTree by Hilton Hotel London Tower of London,7 Pepys Street City of London London EC3N 4AF United Kingdom
440985,Copthorne Tara Hotel London Kensington,Scarsdale Place Kensington Kensington and Chelsea London W8 5SY United Kingdom
504027,Park Plaza Westminster Bridge London,Westminster Bridge Road Lambeth London SE1 7UT United Kingdom
512645,Holiday Inn London Kensington,Wrights Lane Kensington and Chelsea London W8 5SP United Kingdom


In [19]:
for i in famous_hotels.sort_values(by = "pos_ratio",ascending=False)["Hotel_Name"].head(10):
    print(i)

Hotel Berna
Club Quarters Hotel Lincoln s Inn Fields
Apex Temple Court Hotel
Apex City Of London Hotel
Hotel Esther a
Urban Lodge Hotel
Hilton London Canary Wharf
The Piccadilly London West End
Shangri La Hotel at The Shard London
The Student Hotel Amsterdam City


In [20]:
pos = famous_hotels.sort_values(by = "pos_ratio",ascending=False)["Hotel_Name"].head(10).values
data.loc[data['Hotel_Name'].isin(pos)][["Hotel_Name","Hotel_Address"]].drop_duplicates()

Unnamed: 0,Hotel_Name,Hotel_Address
971,Apex Temple Court Hotel,1 2 Serjeant s Inn Fleet Street City of London London EC4Y 1LL United Kingdom
147504,Shangri La Hotel at The Shard London,31 St Thomas Street Southwark London SE1 9QU United Kingdom
223782,Club Quarters Hotel Lincoln s Inn Fields,61 Lincoln s Inn Fields Camden London WC2A 3JW United Kingdom
228012,The Piccadilly London West End,65 73 Shaftesbury Avenue Westminster Borough London W1D 6EX United Kingdom
273892,Urban Lodge Hotel,Arlandaweg 10 Westpoort 1043 EW Amsterdam Netherlands
387956,Apex City Of London Hotel,No 1 Seething Lane City of London London EC3N 4AX United Kingdom
450210,Hotel Esther a,Singel 303 309 Amsterdam City Center 1012 WJ Amsterdam Netherlands
454064,Hilton London Canary Wharf,South Quay Marsh Wall Tower Hamlets London E14 9SH United Kingdom
483570,Hotel Berna,Via Napo Torriani 18 Central Station 20124 Milan Italy
509232,The Student Hotel Amsterdam City,Wibautstraat 129 Oost 1091 GL Amsterdam Netherlands


#### 5.4 The Most Consistent Performance Hotels

In [21]:
data.Review_Date = pd.to_datetime(data.Review_Date)

In [22]:
temp = data.groupby("Hotel_Name", as_index=False)["Reviewer_Score"].agg([np.mean, np.std]).sort_values("mean",ascending=False)
temp = temp[temp["mean"] > 8.9]
temp.shape
temp.sort_values("std").index[0:20]

Index(['H10 Casa Mimosa 4 Sup', 'Hotel Casa Camper',
       'H tel de La Tamise Esprit de France', 'Le Narcisse Blanc Spa',
       'Hotel Eiffel Blomet', '45 Park Lane Dorchester Collection', '41',
       'Hotel Stendhal Place Vend me Paris MGallery by Sofitel',
       'H tel D Aubusson', 'Hotel The Serras', 'Hotel Am Stephansplatz',
       'Lansbury Heritage Hotel', 'Covent Garden Hotel', 'The Soho Hotel',
       'Catalonia Magdalenes', 'H tel Saint Paul Rive Gauche',
       'Milestone Hotel Kensington', 'Ritz Paris', 'H tel Fabric',
       'Le 123 S bastopol Astotel'],
      dtype='object', name='Hotel_Name')

In [23]:
lis = ['H10 Casa Mimosa 4 Sup', 'Hotel Casa Camper',
       'H tel de La Tamise Esprit de France', 'Le Narcisse Blanc Spa',
       'Hotel Eiffel Blomet', '45 Park Lane Dorchester Collection', '41',
       'Hotel Stendhal Place Vend me Paris MGallery by Sofitel',
       'H tel D Aubusson', 'Hotel The Serras', 'Hotel Am Stephansplatz',
       'Lansbury Heritage Hotel', 'Covent Garden Hotel', 'The Soho Hotel',
       'Catalonia Magdalenes', 'H tel Saint Paul Rive Gauche',
       'Milestone Hotel Kensington', 'Ritz Paris', 'H tel Fabric',
       'Le 123 S bastopol Astotel']
data.loc[data['Hotel_Name'].isin(lis)][["Hotel_Name","Hotel_Address","Average_Score"]].drop_duplicates()

Unnamed: 0,Hotel_Name,Hotel_Address,Average_Score
11259,Milestone Hotel Kensington,1 Kensington Court Kensington and Chelsea London W8 5DL United Kingdom,9.5
20472,Covent Garden Hotel,10 Monmouth Street Camden London WC2H 9HB United Kingdom,9.4
37337,Lansbury Heritage Hotel,117 Poplar High Street Tower Hamlets London E14 0AE United Kingdom,9.4
41518,Le 123 S bastopol Astotel,123 boulevard S bastopol 2nd arr 75002 Paris France,9.3
54717,Ritz Paris,15 Place Vend me 1st arr 75001 Paris France,9.8
81101,Le Narcisse Blanc Spa,19 Boulevard De La Tour Maubourg 7th arr 75007 Paris France,9.5
111027,Hotel Stendhal Place Vend me Paris MGallery by Sofitel,22 Rue Danielle Casanova 1st arr 75002 Paris France,8.8
147195,H tel Fabric,31 rue de la Folie M ricourt 11th arr 75011 Paris France,9.3
152204,H tel D Aubusson,33 Rue Dauphine 6th arr 75006 Paris France,9.4
176748,The Soho Hotel,4 Richmond Mews Westminster Borough London W1D 3DH United Kingdom,9.5
