In [250]:
import pandas as pd
import numpy as np

# Loading in Data

In [251]:
hotel_review = pd.read_csv('Hotel_Reviews.csv')
hotel_review_sample = hotel_review.sample(frac=0.2, random_state = 42)
hotel_review_sample.to_csv("hotel_review_sample.csv", index = False)

In [252]:
hotel_prices = pd.read_csv('booking_bcn1.csv', index_col = 0)

Checking to see how many hotel names match

In [253]:
hotel_review_sample['name_processed'] = hotel_review_sample['Hotel_Name'].str.strip().str.lower()

drop_cols = [
    'Hotel_Name'
]

hotel_review_sample = hotel_review_sample.drop(columns=drop_cols)

In [254]:
hotel_prices['name_processed'] = hotel_prices['Hotels'].str.lower().str.split()

drop_cols = [
    'Hotels'
]

hotel_prices = hotel_prices.drop(columns=drop_cols)

In [255]:
hotel_prices['name_processed'] = hotel_prices['name_processed'].apply(lambda x: ' '.join(x) if isinstance(x, list) else x)
matching_hotels = set(hotel_prices['name_processed']).intersection(set(hotel_review_sample['name_processed']))

# Count the number of matches
num_matches = len(matching_hotels)

print(f"Number of matching hotel names: {num_matches}")


Number of matching hotel names: 96


Getting rid of columns we don't need

In [256]:
hotel_review_sample.columns

Index(['Hotel_Address', 'Additional_Number_of_Scoring', 'Review_Date',
       'Average_Score', 'Reviewer_Nationality', 'Negative_Review',
       'Review_Total_Negative_Word_Counts', 'Total_Number_of_Reviews',
       'Positive_Review', 'Review_Total_Positive_Word_Counts',
       'Total_Number_of_Reviews_Reviewer_Has_Given', 'Reviewer_Score', 'Tags',
       'days_since_review', 'lat', 'lng', 'name_processed'],
      dtype='object')

In [257]:
drop_cols = [
    'Hotel_Address', 
    'Additional_Number_of_Scoring', 
    'Review_Date', 
    'Average_Score', 
    'Reviewer_Nationality', 
    'Total_Number_of_Reviews', 
    'Tags', 
    'days_since_review', 
    'lat', 
    'lng'
]

hotel_review_sample = hotel_review_sample.drop(columns=drop_cols)

In [258]:
merged_df = hotel_review_sample.merge(
    hotel_prices, on="name_processed", how="inner"
)


In [259]:
# merged_df
merged_df.columns

Index(['Negative_Review', 'Review_Total_Negative_Word_Counts',
       'Positive_Review', 'Review_Total_Positive_Word_Counts',
       'Total_Number_of_Reviews_Reviewer_Has_Given', 'Reviewer_Score',
       'name_processed', 'Prices', 'Descriptions', 'Full_Descriptions'],
      dtype='object')

In [260]:
print(merged_df.isna().sum())  # See missing values


Negative_Review                               0
Review_Total_Negative_Word_Counts             0
Positive_Review                               0
Review_Total_Positive_Word_Counts             0
Total_Number_of_Reviews_Reviewer_Has_Given    0
Reviewer_Score                                0
name_processed                                0
Prices                                        0
Descriptions                                  0
Full_Descriptions                             0
dtype: int64


In [261]:
merged_df

Unnamed: 0,Negative_Review,Review_Total_Negative_Word_Counts,Positive_Review,Review_Total_Positive_Word_Counts,Total_Number_of_Reviews_Reviewer_Has_Given,Reviewer_Score,name_processed,Prices,Descriptions,Full_Descriptions
0,Nothing to dislike,4,Location Culture bars and restaurants Transpo...,13,2,9.6,room mate anna,"€ 1,418",1 double bed,Featuring a seasonal rooftop plunge pool with ...
1,No Negative,0,The hotel was in a great spot near to everyth...,29,7,8.8,hotel barcelona center,"€ 1,584",Multiple bed types,Barcelona Center is 400 metres from Barcelona’...
2,Due to the location and it being a very touri...,31,We had such a great experience here The staff...,138,1,10.0,hotel barcelona catedral,"€ 2,259",1 extra-large double bed,There is a gym and a rooftop chill-out terrace...
3,No Negative,0,Free coffee if wanted during breakfast withou...,41,1,7.9,petit palace boqueria garden,"€ 2,068",Beds: 1 double or 2 singles,The Petit Palace Boqueria Garden is set in a h...
4,Semi transparent wall of bathroom It cause un...,14,Easy to reach from and to airport by metro wi...,41,4,7.9,hotel ronda lesseps,"€ 1,352",1 double bed,The Hotel Ronda Lesseps is a quiet and familia...
...,...,...,...,...,...,...,...,...,...,...
6003,There was a faint sewage smell in the bathroo...,24,Lovely studio and apartment hotel with large ...,43,13,9.6,capri by fraser barcelona,"€ 1,347",Beds: 1 double or 2 singles,This motorcycle-friendly hotel is 3 Metro stop...
6004,The streets were very noisy from 3 6am No cof...,18,Great location,4,10,7.9,petit palace boqueria garden,"€ 2,068",Beds: 1 double or 2 singles,The Petit Palace Boqueria Garden is set in a h...
6005,The room was ready early when we arrived afte...,21,Very nice hotel and extremely helpful and fri...,10,1,10.0,hotel villa emilia,"€ 1,607",Beds: 1 double or 2 singles,This stylish design hotel is located 150 metre...
6006,No Negative,0,Staff were lovely very useful The shower was ...,29,15,9.2,olivia balmes hotel,"€ 1,904",Beds: 1 double or 2 singles,"Offering an outdoor swimming pool, Olivia Balm..."


In [262]:
merged_df['Prices'] = merged_df['Prices'].str.replace('€', '', regex=True).str.replace(',', '', regex=True).str.strip().astype(float)

In [263]:
merged_df

Unnamed: 0,Negative_Review,Review_Total_Negative_Word_Counts,Positive_Review,Review_Total_Positive_Word_Counts,Total_Number_of_Reviews_Reviewer_Has_Given,Reviewer_Score,name_processed,Prices,Descriptions,Full_Descriptions
0,Nothing to dislike,4,Location Culture bars and restaurants Transpo...,13,2,9.6,room mate anna,1418.0,1 double bed,Featuring a seasonal rooftop plunge pool with ...
1,No Negative,0,The hotel was in a great spot near to everyth...,29,7,8.8,hotel barcelona center,1584.0,Multiple bed types,Barcelona Center is 400 metres from Barcelona’...
2,Due to the location and it being a very touri...,31,We had such a great experience here The staff...,138,1,10.0,hotel barcelona catedral,2259.0,1 extra-large double bed,There is a gym and a rooftop chill-out terrace...
3,No Negative,0,Free coffee if wanted during breakfast withou...,41,1,7.9,petit palace boqueria garden,2068.0,Beds: 1 double or 2 singles,The Petit Palace Boqueria Garden is set in a h...
4,Semi transparent wall of bathroom It cause un...,14,Easy to reach from and to airport by metro wi...,41,4,7.9,hotel ronda lesseps,1352.0,1 double bed,The Hotel Ronda Lesseps is a quiet and familia...
...,...,...,...,...,...,...,...,...,...,...
6003,There was a faint sewage smell in the bathroo...,24,Lovely studio and apartment hotel with large ...,43,13,9.6,capri by fraser barcelona,1347.0,Beds: 1 double or 2 singles,This motorcycle-friendly hotel is 3 Metro stop...
6004,The streets were very noisy from 3 6am No cof...,18,Great location,4,10,7.9,petit palace boqueria garden,2068.0,Beds: 1 double or 2 singles,The Petit Palace Boqueria Garden is set in a h...
6005,The room was ready early when we arrived afte...,21,Very nice hotel and extremely helpful and fri...,10,1,10.0,hotel villa emilia,1607.0,Beds: 1 double or 2 singles,This stylish design hotel is located 150 metre...
6006,No Negative,0,Staff were lovely very useful The shower was ...,29,15,9.2,olivia balmes hotel,1904.0,Beds: 1 double or 2 singles,"Offering an outdoor swimming pool, Olivia Balm..."
