In [2]:
# import packages
import os
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import seaborn as sns
from scipy.stats import spearmanr


# Importing Data

In [7]:
# change to main folder (change file path as necessary)
os.chdir("c:\\Users\\ellie\\OneDrive\\Documents\\spring 2025\\ds4002\\DS4002-CS3")

# import restaurant data (change file path as necessary)
restaurant_df = pd.read_csv('DATA/philly_restaurants.csv')

# import review data (change file path as necessary)
review_df = pd.read_csv('DATA/philly_reviews.csv')

# VADER Sentiment Analysis

In [None]:
# Install and download VADER lexicon
nltk.download('vader_lexicon')

# Ensure relevant columns exist 
review_df = review_df[['text', 'review_id','business_id']].dropna()

# Initialize VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Function to comput sentiment score for each review
def get_sentiment(text):
    """Compute sentiment compound score for a given review."""
    return analyzer.polarity_scores(text)['compound']

# Apply sentiment analysis
review_df['sentiment_score'] = review_df['text'].apply(get_sentiment)

# Group by business_id and calculate mean sentiment
grouped = review_df.groupby('business_id')['sentiment_score'].mean().reset_index()

# Correlation Analysis

In [None]:
# Merge restaurant data with review sentiment data
df = restaurant_df.merge(grouped, how='outer', on='business_id')

In [None]:
# Make boxplot visualization to compare price and sentiment

fig, ax = plt.subplots(figsize=(8, 6))  # Assigns an Axes object

# Boxplot
sns.boxplot(x='price_level', y='sentiment_score', data=df, palette='pastel', ax=ax)

# Set title and labels on the Axes object
ax.set_title('Sentiment Scores by Restaurant Price Level')
ax.set_xlabel('Price Level')
ax.set_ylabel('Sentiment Score')

plt.show()

In [None]:
# Calculate Spearman correlation (https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.spearmanr.html)
corr, p_value = spearmanr(df['price_level'], df['sentiment_score'])

# Print results
print(f"Spearman Correlation: {corr:.3f}")
print(f"P-Value: {p_value:.5f}")