In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load data
file_path = '/mnt/data/tripadvisor.csv'
tripadvisor_data = pd.read_csv(file_path)

# Select relevant columns for analysis based on sentiment-related keywords
sentiment_columns = ["numberOfReviews", "categoryReviewScores/0/score", 
                     "categoryReviewScores/1/score", "categoryReviewScores/2/score"]
sentiment_data = tripadvisor_data[sentiment_columns].dropna()

# Rename columns for better readability
sentiment_data.columns = ["Number of Reviews", "Cleanliness Score", "Staff Score", "Facilities Score"]

# Calculate average sentiment score for each hotel
sentiment_data['Average Sentiment Score'] = sentiment_data[['Cleanliness Score', 'Staff Score', 'Facilities Score']].mean(axis=1)


In [None]:

# Plot Perceptual Map
plt.figure(figsize=(10, 7))
plt.scatter(sentiment_data['Number of Reviews'], sentiment_data['Average Sentiment Score'], alpha=0.7)
plt.title('Perceptual Map of Hotels Based on Sentiment Analysis')
plt.xlabel('Number of Reviews')
plt.ylabel('Average Sentiment Score')
plt.grid(True)

# Annotate top 10 hotels by review volume
top_hotels = sentiment_data.nlargest(10, 'Number of Reviews')
for i, row in top_hotels.iterrows():
    plt.text(row['Number of Reviews'] + 2, row['Average Sentiment Score'], f'Hotel {i}', fontsize=9)

plt.show()


In [None]:

# Calculate the average scores across all hotels for each service aspect
sentiment_trends = sentiment_data[['Cleanliness Score', 'Staff Score', 'Facilities Score']].mean()

# Summarize sentiment trends
trend_summary = pd.DataFrame({
    "Service Aspect": ["Cleanliness", "Staff Behavior", "Facilities"],
    "Average Sentiment Score": sentiment_trends.values
})

# Display sentiment trend summary table
print("Sentiment Trends Summary:")
print(trend_summary)
