In [2]:
import pandas as pd
from textblob import TextBlob

data = pd.read_csv('cleaned_hotelreviews.csv')

In [3]:
# I must fist ensure that reviews are strings because TextBlob only work with strings
data['Positive_Review'] = data['Positive_Review'].astype(str)
data['Negative_Review'] = data['Negative_Review'].astype(str)

In [4]:
# To analyze the text for polarity (sentiment) and subjectivity
data['Positive_Sentiment'] = data['Positive_Review'].apply(lambda x: TextBlob(x).sentiment.polarity)
data['Negative_Sentiment'] = data['Negative_Review'].apply(lambda x: TextBlob(x).sentiment.polarity)

In [5]:
# To see the sentiment polarity
print(data[['Hotel_Name', 'Positive_Review', 'Positive_Sentiment', 'Negative_Review', 'Negative_Sentiment']].head())

                         Hotel_Name  \
0  The Park Grand London Paddington   
1  The Park Grand London Paddington   
2  The Park Grand London Paddington   
3  The Park Grand London Paddington   
4  The Park Grand London Paddington   

                                     Positive_Review  Positive_Sentiment  \
0                               the size of the room            0.000000   
1                                    nothing special            0.357143   
2  the front desk and entrance was very nice and ...            0.780000   
3  they transfer me to the park grand london kens...            0.275000   
4  very handy location to paddington station and ...            0.354551   

                                     Negative_Review  Negative_Sentiment  
0  the first night we were given an extremely sma...            0.050000  
1  they stolen 800 from my money from the safe bo...            0.233333  
2  the room was appalling i had originally reques...            0.111224  
3  the c

In [6]:
# Aggregating sentiment scores by hotel
hotel_sentiments = data.groupby('Hotel_Name')[['Positive_Sentiment', 'Negative_Sentiment']].mean()

print(hotel_sentiments.sort_values(by='Positive_Sentiment', ascending=False).head(5))  # Top positive hotels
print(hotel_sentiments.sort_values(by='Negative_Sentiment', ascending=False).head(5))  # Top negative hotels

                                                   Positive_Sentiment  \
Hotel_Name                                                              
Intercontinental London The O2                               0.469307   
Hotel Esther a                                               0.444059   
The Tower A Guoman Hotel                                     0.434519   
Mondrian London                                              0.433861   
DoubleTree by Hilton Hotel London Tower of London            0.431256   

                                                   Negative_Sentiment  
Hotel_Name                                                             
Intercontinental London The O2                               0.032244  
Hotel Esther a                                               0.036016  
The Tower A Guoman Hotel                                     0.014068  
Mondrian London                                              0.040852  
DoubleTree by Hilton Hotel London Tower of London       

In [9]:
# Calculating the hotel scores. Absolute value was used for negative sentiments because higher values are worse in the context of negative comments, while high values
# are better for positive comments

hotel_sentiments['Hotel_Score'] = (hotel_sentiments['Positive_Sentiment'] - abs(hotel_sentiments['Negative_Sentiment'])) * 50 + 50

# Ensuring that the scores are within 0-100 bounds
hotel_sentiments['Hotel_Score'] = hotel_sentiments['Hotel_Score'].clip(0, 100)
data['Hotel_Score'] = data['Hotel_Name'].map(hotel_sentiments['Hotel_Score'])

# Sorting the hotels by their score (desc)
hotel_sentiments = hotel_sentiments.sort_values(by='Hotel_Score', ascending=False)

# Resetting the index to include 'Hotel_Name' as a column in hotel_sentiments
hotel_sentiments = hotel_sentiments.reset_index()

# Adding the 'Hotel_Score' column to the original data
data['Hotel_Score'] = data['Hotel_Name'].map(hotel_sentiments.set_index('Hotel_Name')['Hotel_Score'])

# Displaying the top 10 hotels by score
print(hotel_sentiments[['Hotel_Name', 'Hotel_Score']].head(10))
print(hotel_sentiments[['Hotel_Name', 'Hotel_Score']].tail(10))

                                          Hotel_Name  Hotel_Score
0                     Intercontinental London The O2    71.853144
1  DoubleTree by Hilton Hotel London Tower of London    71.219423
2                  St James Court A Taj Hotel London    71.041401
3                           The Tower A Guoman Hotel    71.022560
4                              Hilton London Wembley    70.628802
5                        Park Plaza London Riverbank    70.610807
6    DoubleTree by Hilton London Docklands Riverside    70.547576
7               Park Plaza Westminster Bridge London    70.438930
8                                     Hotel Esther a    70.402132
9                      Park Plaza County Hall London    70.178690
                                    Hotel_Name  Hotel_Score
32            The Park Grand London Paddington    66.836044
33                   Norfolk Towers Paddington    66.767927
34          Millennium Gloucester Hotel London    66.572460
35          Best Western Premier H

In [30]:
data.to_csv('hotelreviews_with_scores.csv', index=False)