# Restaurant Review Sentiment Analysis
## Multi-Aspect, Multi-Model Comparative Analysis

This notebook analyzes restaurant reviews across three aspects (Food, Service, Ambiance) using two sentiment analysis models (TinyRoBERTa and DistilBERT).

**Dataset Overview:**
- 3 Aspects: Food, Service, Ambiance
- 2 Models: TinyRoBERTa SQuAD2, DistilBERT Base Uncased
- 6 Total Files for comprehensive comparison

**Analysis Goals:**
1. Compare model performance and agreement
2. Identify sentiment patterns across aspects
3. Discover geographic and restaurant-level insights
4. Understand confidence scores and prediction reliability

## 1. Import Libraries and Load Data

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy import stats
from scipy.stats import pearsonr, spearmanr, chi2_contingency
import warnings
from pathlib import Path
warnings.filterwarnings('ignore')

# Set visualization styles
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
%matplotlib inline

# Configure pandas display
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

print("Libraries imported successfully!")

Libraries imported successfully!


In [None]:
# Define aspect sentiment file paths
results_dir = Path('../results')

# Load all files
files = {
    'Ambiance_BERT': 'ambiance_sentiment_bert_base_cased.csv',
    'Ambiance_TinyRoBERTa': 'ambiance_sentiment_tinyroberta_squad2.csv',
    'Food_BERT': 'food_sentiment_bert_base_cased.csv',
    'Food_TinyRoBERTa': 'food_sentiment_tinyroberta_squad2.csv',
    'Service_BERT': 'service_sentiment_bert_base_cased.csv',
    'Service_TinyRoBERTa': 'service_sentiment_tinyroberta_squad2.csv'
}

data = {}
for key, filename in files.items():
    filepath = results_dir / filename
    data[key] = pd.read_csv(filepath)
    print(f"✅ {key}: {len(data[key])} rows")

print(f"\nTotal datasets loaded: {len(data)}")


NameError: name 'Path' is not defined

## 2. Data Overview and Quality Check

In [None]:
# Display first few rows
print("Sample of consolidated data:")
df_all.head(10)

In [None]:
# Data shape and info
print("=" * 80)
print("DATA STRUCTURE SUMMARY")
print("=" * 80)
print(f"\nTotal Records: {df_all.shape[0]:,}")
print(f"Total Columns: {df_all.shape[1]}")
print(f"\nColumn Names:")
print(df_all.columns.tolist())
print(f"\nData Types:")
print(df_all.dtypes)

In [None]:
# Check for missing values
print("\n" + "=" * 80)
print("MISSING VALUES ANALYSIS")
print("=" * 80)

missing_summary = pd.DataFrame({
    'Column': df_all.columns,
    'Missing_Count': df_all.isnull().sum(),
    'Missing_Percentage': (df_all.isnull().sum() / len(df_all) * 100).round(2)
})
missing_summary = missing_summary[missing_summary['Missing_Count'] > 0].sort_values('Missing_Count', ascending=False)

if len(missing_summary) > 0:
    print(missing_summary.to_string(index=False))
else:
    print("✓ No missing values found in the dataset!")

In [None]:
# Summary statistics by aspect and model
print("\n" + "=" * 80)
print("RECORD COUNT BY ASPECT AND MODEL")
print("=" * 80)

summary_table = df_all.groupby(['Aspect', 'Model']).size().reset_index(name='Record_Count')
summary_pivot = summary_table.pivot(index='Aspect', columns='Model', values='Record_Count')
print(summary_pivot)

# Total by aspect
print("\nTotal by Aspect:")
print(df_all.groupby('Aspect').size().sort_values(ascending=False))

# Total by model
print("\nTotal by Model:")
print(df_all.groupby('Model').size().sort_values(ascending=False))

In [None]:
# Basic statistics for numerical columns
print("\n" + "=" * 80)
print("NUMERICAL COLUMNS STATISTICS")
print("=" * 80)

numerical_cols = df_all.select_dtypes(include=[np.number]).columns.tolist()
print(f"\nNumerical columns: {numerical_cols}")
print("\nDescriptive Statistics:")
df_all[numerical_cols].describe()

## 3. Sentiment Distribution Analysis

In [None]:
# Overall sentiment distribution
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
fig.suptitle('Sentiment Label Distribution by Aspect and Model', fontsize=16, fontweight='bold')

for idx, (key, df) in enumerate(dataframes.items()):
    row = idx // 3
    col = idx % 3
    ax = axes[row, col]
    
    sentiment_counts = df['Sentiment_Label'].value_counts().sort_index()
    colors = ['#d62728', '#ff7f0e', '#bcbd22', '#2ca02c', '#17becf']
    
    sentiment_counts.plot(kind='bar', ax=ax, color=colors[:len(sentiment_counts)])
    ax.set_title(f"{df['Aspect'].iloc[0]} - {df['Model'].iloc[0]}", fontweight='bold')
    ax.set_xlabel('Sentiment Label (Stars)')
    ax.set_ylabel('Count')
    ax.grid(axis='y', alpha=0.3)
    
    # Add value labels on bars
    for i, v in enumerate(sentiment_counts):
        ax.text(i, v + 5, str(v), ha='center', va='bottom', fontweight='bold')

plt.tight_layout()
plt.show()

print("Sentiment distribution visualization complete!")

In [None]:
# Pie charts for sentiment distribution
fig = make_subplots(
    rows=2, cols=3,
    subplot_titles=[f"{df['Aspect'].iloc[0]} - {df['Model'].iloc[0]}" for df in dataframes.values()],
    specs=[[{'type':'pie'}]*3, [{'type':'pie'}]*3]
)

colors = ['#d62728', '#ff7f0e', '#bcbd22', '#2ca02c', '#17becf']

for idx, (key, df) in enumerate(dataframes.items()):
    row = idx // 3 + 1
    col = idx % 3 + 1
    
    sentiment_counts = df['Sentiment_Label'].value_counts().sort_index()
    
    fig.add_trace(
        go.Pie(labels=[f"{int(x)} Star" for x in sentiment_counts.index],
               values=sentiment_counts.values,
               marker=dict(colors=colors[:len(sentiment_counts)]),
               textinfo='label+percent',
               hovertemplate='%{label}<br>Count: %{value}<br>Percentage: %{percent}<extra></extra>'),
        row=row, col=col
    )

fig.update_layout(
    title_text="Sentiment Distribution - Pie Charts Overview",
    height=800,
    showlegend=False
)

fig.show()

In [None]:
# Comparative sentiment distribution across aspects
aspect_comparison = df_all.groupby(['Aspect', 'Sentiment_Label']).size().reset_index(name='Count')
aspect_pivot = aspect_comparison.pivot(index='Sentiment_Label', columns='Aspect', values='Count').fillna(0)

fig, ax = plt.subplots(figsize=(12, 6))
aspect_pivot.plot(kind='bar', ax=ax, width=0.8)
ax.set_title('Sentiment Distribution Comparison Across Aspects', fontsize=14, fontweight='bold')
ax.set_xlabel('Sentiment Label (Stars)', fontsize=12)
ax.set_ylabel('Count', fontsize=12)
ax.legend(title='Aspect', title_fontsize=12, fontsize=11)
ax.grid(axis='y', alpha=0.3)
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

print("\nSentiment Count by Aspect:")
print(aspect_pivot)

## 4. Model Comparison

In [None]:
# Prepare data for model comparison
# For each aspect, merge TinyRoBERTa and DistilBERT predictions

model_comparisons = {}

for aspect in ['food', 'service', 'ambiance']:
    df_tiny = dataframes[f'{aspect}_tinyroberta'].copy()
    df_distil = dataframes[f'{aspect}_distilbert'].copy()
    
    # Merge on common columns (assuming Restaurant_id and Review_id exist)
    # If they don't exist, we'll merge on index
    if 'Review_id' in df_tiny.columns:
        merged = df_tiny.merge(df_distil, 
                               on=['Review_id', 'Restaurant_id'], 
                               suffixes=('_tiny', '_distil'))
    else:
        # Merge on index
        df_tiny_indexed = df_tiny.reset_index()
        df_distil_indexed = df_distil.reset_index()
        merged = df_tiny_indexed.merge(df_distil_indexed, 
                                       left_index=True, right_index=True,
                                       suffixes=('_tiny', '_distil'))
    
    model_comparisons[aspect.capitalize()] = merged
    print(f"{aspect.capitalize()}: {len(merged)} matching records")

print("\n✓ Model comparison datasets prepared!")

In [None]:
# Calculate agreement rates between models
print("=" * 80)
print("MODEL AGREEMENT ANALYSIS")
print("=" * 80)

agreement_results = []

for aspect, df_comp in model_comparisons.items():
    # Exact match agreement
    exact_match = (df_comp['Sentiment_Label_tiny'] == df_comp['Sentiment_Label_distil']).sum()
    total = len(df_comp)
    agreement_rate = (exact_match / total * 100)
    
    # Within 1 star agreement
    diff = abs(df_comp['Sentiment_Label_tiny'] - df_comp['Sentiment_Label_distil'])
    within_1_star = (diff <= 1).sum()
    within_1_rate = (within_1_star / total * 100)
    
    agreement_results.append({
        'Aspect': aspect,
        'Total_Records': total,
        'Exact_Match': exact_match,
        'Exact_Agreement_%': round(agreement_rate, 2),
        'Within_1_Star': within_1_star,
        'Within_1_Star_%': round(within_1_rate, 2)
    })

agreement_df = pd.DataFrame(agreement_results)
print(agreement_df.to_string(index=False))

In [None]:
# Calculate correlation coefficients
print("\n" + "=" * 80)
print("CORRELATION ANALYSIS BETWEEN MODELS")
print("=" * 80)

correlation_results = []

for aspect, df_comp in model_comparisons.items():
    pearson_corr, pearson_p = pearsonr(df_comp['Sentiment_Label_tiny'], 
                                        df_comp['Sentiment_Label_distil'])
    spearman_corr, spearman_p = spearmanr(df_comp['Sentiment_Label_tiny'], 
                                           df_comp['Sentiment_Label_distil'])
    
    correlation_results.append({
        'Aspect': aspect,
        'Pearson_Correlation': round(pearson_corr, 4),
        'Pearson_p_value': f"{pearson_p:.2e}",
        'Spearman_Correlation': round(spearman_corr, 4),
        'Spearman_p_value': f"{spearman_p:.2e}"
    })

correlation_df = pd.DataFrame(correlation_results)
print(correlation_df.to_string(index=False))

In [None]:
# Confusion matrices for model comparison
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
fig.suptitle('Model Prediction Comparison - Confusion Matrices', fontsize=16, fontweight='bold')

for idx, (aspect, df_comp) in enumerate(model_comparisons.items()):
    ax = axes[idx]
    
    # Create confusion matrix
    confusion = pd.crosstab(df_comp['Sentiment_Label_tiny'], 
                            df_comp['Sentiment_Label_distil'],
                            rownames=['TinyRoBERTa'],
                            colnames=['DistilBERT'])
    
    sns.heatmap(confusion, annot=True, fmt='d', cmap='YlOrRd', ax=ax, cbar_kws={'label': 'Count'})
    ax.set_title(f'{aspect}', fontweight='bold')
    ax.set_xlabel('DistilBERT Prediction', fontsize=11)
    ax.set_ylabel('TinyRoBERTa Prediction', fontsize=11)

plt.tight_layout()
plt.show()

In [None]:
# Interactive scatter plot comparing models
fig = make_subplots(
    rows=1, cols=3,
    subplot_titles=list(model_comparisons.keys()),
    specs=[[{'type':'scatter'}]*3]
)

colors_map = {'Food': 'red', 'Service': 'blue', 'Ambiance': 'green'}

for idx, (aspect, df_comp) in enumerate(model_comparisons.items()):
    # Add scatter plot
    fig.add_trace(
        go.Scatter(
            x=df_comp['Sentiment_Label_tiny'],
            y=df_comp['Sentiment_Label_distil'],
            mode='markers',
            marker=dict(
                size=5,
                color=colors_map[aspect],
                opacity=0.5
            ),
            name=aspect,
            text=[f"Tiny: {t}<br>Distil: {d}" for t, d in 
                  zip(df_comp['Sentiment_Label_tiny'], df_comp['Sentiment_Label_distil'])],
            hovertemplate='%{text}<extra></extra>'
        ),
        row=1, col=idx+1
    )
    
    # Add diagonal line (perfect agreement)
    fig.add_trace(
        go.Scatter(
            x=[1, 5],
            y=[1, 5],
            mode='lines',
            line=dict(color='black', dash='dash'),
            showlegend=False
        ),
        row=1, col=idx+1
    )

fig.update_xaxes(title_text="TinyRoBERTa Prediction", row=1, col=1)
fig.update_xaxes(title_text="TinyRoBERTa Prediction", row=1, col=2)
fig.update_xaxes(title_text="TinyRoBERTa Prediction", row=1, col=3)
fig.update_yaxes(title_text="DistilBERT Prediction", row=1, col=1)

fig.update_layout(
    title_text="Model Prediction Comparison - Scatter Plots",
    height=500,
    showlegend=True
)

fig.show()

## 5. Aspect-Based Analysis

In [None]:
# Average sentiment by aspect
aspect_stats = df_all.groupby(['Aspect', 'Model'])['Sentiment_Label'].agg([
    'mean', 'median', 'std', 'min', 'max', 'count'
]).round(3)

print("=" * 80)
print("SENTIMENT STATISTICS BY ASPECT AND MODEL")
print("=" * 80)
print(aspect_stats)

In [None]:
# Grouped bar chart comparing average ratings
aspect_avg = df_all.groupby(['Aspect', 'Model'])['Sentiment_Label'].mean().reset_index()
aspect_avg_pivot = aspect_avg.pivot(index='Aspect', columns='Model', values='Sentiment_Label')

fig, ax = plt.subplots(figsize=(10, 6))
aspect_avg_pivot.plot(kind='bar', ax=ax, width=0.7)
ax.set_title('Average Sentiment Rating by Aspect and Model', fontsize=14, fontweight='bold')
ax.set_xlabel('Aspect', fontsize=12)
ax.set_ylabel('Average Sentiment (1-5 Stars)', fontsize=12)
ax.legend(title='Model', title_fontsize=11, fontsize=10)
ax.grid(axis='y', alpha=0.3)
ax.set_ylim([0, 5])
plt.xticks(rotation=0)

# Add value labels
for container in ax.containers:
    ax.bar_label(container, fmt='%.2f', padding=3)

plt.tight_layout()
plt.show()

In [None]:
# Distribution comparison across aspects
fig, axes = plt.subplots(1, 3, figsize=(18, 5), sharey=True)
fig.suptitle('Sentiment Distribution by Aspect (Both Models Combined)', fontsize=16, fontweight='bold')

aspects = ['Food', 'Service', 'Ambiance']
colors_list = ['#e74c3c', '#3498db', '#2ecc71']

for idx, aspect in enumerate(aspects):
    ax = axes[idx]
    data = df_all[df_all['Aspect'] == aspect]['Sentiment_Label']
    
    ax.hist(data, bins=[0.5, 1.5, 2.5, 3.5, 4.5, 5.5], 
            edgecolor='black', alpha=0.7, color=colors_list[idx])
    ax.set_title(aspect, fontweight='bold', fontsize=13)
    ax.set_xlabel('Sentiment Label (Stars)', fontsize=11)
    ax.set_xticks([1, 2, 3, 4, 5])
    ax.grid(axis='y', alpha=0.3)
    
    # Add statistics text
    mean_val = data.mean()
    median_val = data.median()
    ax.text(0.98, 0.97, f'Mean: {mean_val:.2f}\nMedian: {median_val:.1f}',
            transform=ax.transAxes, fontsize=10,
            verticalalignment='top', horizontalalignment='right',
            bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

axes[0].set_ylabel('Frequency', fontsize=11)
plt.tight_layout()
plt.show()

In [None]:
# Box plots for sentiment distribution
fig, ax = plt.subplots(figsize=(12, 6))

data_for_box = []
labels_for_box = []

for aspect in ['Food', 'Service', 'Ambiance']:
    for model in ['TinyRoBERTa', 'DistilBERT']:
        data = df_all[(df_all['Aspect'] == aspect) & (df_all['Model'] == model)]['Sentiment_Label']
        data_for_box.append(data)
        labels_for_box.append(f"{aspect}\n{model}")

bp = ax.boxplot(data_for_box, labels=labels_for_box, patch_artist=True,
                showmeans=True, meanline=True)

# Color the boxes
colors = ['#ffcccc', '#ffaaaa', '#ccddff', '#aaccff', '#ccffcc', '#aaffaa']
for patch, color in zip(bp['boxes'], colors):
    patch.set_facecolor(color)

ax.set_title('Sentiment Distribution Box Plots by Aspect and Model', fontsize=14, fontweight='bold')
ax.set_ylabel('Sentiment Label (Stars)', fontsize=12)
ax.set_xlabel('Aspect and Model', fontsize=12)
ax.grid(axis='y', alpha=0.3)
ax.set_ylim([0, 6])
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
# Interactive radar chart for aspect comparison
categories = ['1 Star', '2 Stars', '3 Stars', '4 Stars', '5 Stars']

fig = go.Figure()

for aspect in ['Food', 'Service', 'Ambiance']:
    data = df_all[df_all['Aspect'] == aspect]['Sentiment_Label'].value_counts().sort_index()
    # Normalize to percentages
    data_pct = (data / data.sum() * 100).reindex([1, 2, 3, 4, 5], fill_value=0)
    
    fig.add_trace(go.Scatterpolar(
        r=data_pct.values,
        theta=categories,
        fill='toself',
        name=aspect
    ))

fig.update_layout(
    polar=dict(
        radialaxis=dict(
            visible=True,
            range=[0, max([df_all[df_all['Aspect'] == a]['Sentiment_Label'].value_counts().max() 
                          / len(df_all[df_all['Aspect'] == a]) * 100 for a in ['Food', 'Service', 'Ambiance']])]
        )
    ),
    showlegend=True,
    title="Aspect Sentiment Distribution Comparison (Radar Chart)"
)

fig.show()

## 6. Geographic Patterns

In [None]:
# Check if City column exists
if 'City' in df_all.columns:
    print("✓ City column found!")
    city_analysis = True
    print(f"\nUnique cities: {df_all['City'].nunique()}")
    print(f"\nTop 10 cities by review count:")
    print(df_all['City'].value_counts().head(10))
else:
    print("⚠ City column not found in dataset")
    print("Available columns:", df_all.columns.tolist())
    city_analysis = False

In [None]:
if city_analysis:
    # Average sentiment by city (top 20 cities)
    city_sentiment = df_all.groupby('City').agg({
        'Sentiment_Label': ['mean', 'count']
    }).round(2)
    city_sentiment.columns = ['Avg_Sentiment', 'Review_Count']
    city_sentiment = city_sentiment[city_sentiment['Review_Count'] >= 30]  # Filter cities with at least 30 reviews
    city_sentiment = city_sentiment.sort_values('Avg_Sentiment', ascending=False).head(20)
    
    print("=" * 80)
    print("TOP 20 CITIES BY AVERAGE SENTIMENT (min 30 reviews)")
    print("=" * 80)
    print(city_sentiment)
    
    # Visualization
    fig, ax = plt.subplots(figsize=(14, 8))
    bars = ax.barh(range(len(city_sentiment)), city_sentiment['Avg_Sentiment'], color='skyblue', edgecolor='navy')
    ax.set_yticks(range(len(city_sentiment)))
    ax.set_yticklabels(city_sentiment.index)
    ax.set_xlabel('Average Sentiment Rating', fontsize=12)
    ax.set_title('Top 20 Cities by Average Sentiment Rating', fontsize=14, fontweight='bold')
    ax.grid(axis='x', alpha=0.3)
    
    # Add value labels
    for i, (idx, row) in enumerate(city_sentiment.iterrows()):
        ax.text(row['Avg_Sentiment'] + 0.02, i, f"{row['Avg_Sentiment']:.2f} ({int(row['Review_Count'])})", 
                va='center', fontsize=9)
    
    plt.tight_layout()
    plt.show()
else:
    print("Skipping geographic analysis - City column not available")

In [None]:
if city_analysis:
    # Sentiment distribution by city (top 10 cities)
    top_cities = df_all['City'].value_counts().head(10).index
    
    fig, axes = plt.subplots(2, 5, figsize=(20, 8))
    fig.suptitle('Sentiment Distribution in Top 10 Cities', fontsize=16, fontweight='bold')
    
    for idx, city in enumerate(top_cities):
        row = idx // 5
        col = idx % 5
        ax = axes[row, col]
        
        city_data = df_all[df_all['City'] == city]['Sentiment_Label']
        counts = city_data.value_counts().sort_index()
        
        ax.bar(counts.index, counts.values, color='coral', edgecolor='black', alpha=0.7)
        ax.set_title(city, fontweight='bold')
        ax.set_xlabel('Rating')
        ax.set_ylabel('Count')
        ax.set_xticks([1, 2, 3, 4, 5])
        ax.grid(axis='y', alpha=0.3)
    
    plt.tight_layout()
    plt.show()

In [None]:
if city_analysis:
    # Aspect-specific sentiment by city
    city_aspect = df_all.groupby(['City', 'Aspect'])['Sentiment_Label'].mean().reset_index()
    city_aspect_pivot = city_aspect.pivot(index='City', columns='Aspect', values='Sentiment_Label')
    
    # Filter cities with sufficient data
    city_counts = df_all.groupby('City').size()
    cities_to_show = city_counts[city_counts >= 50].index
    city_aspect_pivot = city_aspect_pivot.loc[cities_to_show].head(15)
    
    # Create heatmap
    fig, ax = plt.subplots(figsize=(10, 10))
    sns.heatmap(city_aspect_pivot, annot=True, fmt='.2f', cmap='RdYlGn', 
                center=3, vmin=1, vmax=5, ax=ax, cbar_kws={'label': 'Average Sentiment'})
    ax.set_title('Average Sentiment by City and Aspect (Top 15 Cities)', fontsize=14, fontweight='bold')
    ax.set_xlabel('Aspect', fontsize=12)
    ax.set_ylabel('City', fontsize=12)
    plt.tight_layout()
    plt.show()

## 7. Rating vs Sentiment Correlation

In [None]:
# Check for similarity score columns
similarity_cols = [col for col in df_all.columns if 'similarity' in col.lower() or 'score' in col.lower()]
print("Columns related to similarity/scores:")
print(similarity_cols)

# Check specific aspect similarity columns
aspect_similarity_col = None
for col in df_all.columns:
    if 'aspect' in col.lower() and 'sim' in col.lower():
        aspect_similarity_col = col
        break

if aspect_similarity_col:
    print(f"\n✓ Found aspect similarity column: {aspect_similarity_col}")
else:
    print("\n⚠ Aspect similarity column not found. Available columns:")
    print(df_all.columns.tolist())

In [None]:
# Analyze relationship between aspect similarity and sentiment
if 'Aspect_Similarity_Score' in df_all.columns:
    similarity_col = 'Aspect_Similarity_Score'
elif len([col for col in df_all.columns if 'similarity' in col.lower()]) > 0:
    similarity_col = [col for col in df_all.columns if 'similarity' in col.lower()][0]
else:
    similarity_col = None

if similarity_col:
    print(f"Analyzing: {similarity_col}")
    
    # Calculate correlation
    correlation, p_value = pearsonr(df_all[similarity_col].dropna(), 
                                     df_all.loc[df_all[similarity_col].notna(), 'Sentiment_Label'])
    
    print(f"\nPearson Correlation: {correlation:.4f}")
    print(f"P-value: {p_value:.2e}")
    
    # Scatter plot
    fig, ax = plt.subplots(figsize=(12, 6))
    
    for aspect in ['Food', 'Service', 'Ambiance']:
        data = df_all[df_all['Aspect'] == aspect]
        ax.scatter(data[similarity_col], data['Sentiment_Label'], 
                  alpha=0.3, label=aspect, s=20)
    
    ax.set_xlabel(similarity_col, fontsize=12)
    ax.set_ylabel('Sentiment Label', fontsize=12)
    ax.set_title(f'Relationship between {similarity_col} and Sentiment\n(Correlation: {correlation:.4f})', 
                 fontsize=14, fontweight='bold')
    ax.legend()
    ax.grid(alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    # Box plot: Sentiment by similarity score bins
    df_all['Similarity_Bin'] = pd.cut(df_all[similarity_col], bins=5, labels=['Very Low', 'Low', 'Medium', 'High', 'Very High'])
    
    fig, ax = plt.subplots(figsize=(12, 6))
    df_all.boxplot(column='Sentiment_Label', by='Similarity_Bin', ax=ax)
    ax.set_title('Sentiment Distribution by Aspect Similarity Level', fontsize=14, fontweight='bold')
    ax.set_xlabel('Aspect Similarity Level', fontsize=12)
    ax.set_ylabel('Sentiment Label', fontsize=12)
    plt.suptitle('')  # Remove default title
    plt.tight_layout()
    plt.show()
else:
    print("Skipping similarity analysis - no similarity column found")

## 8. Text Length and Sentiment Relationship

In [None]:
# Calculate text length
if 'Review_Text' in df_all.columns:
    df_all['Review_Length'] = df_all['Review_Text'].astype(str).str.len()
    df_all['Word_Count'] = df_all['Review_Text'].astype(str).str.split().str.len()
    
    print("✓ Text length metrics calculated!")
    print("\nText Length Statistics:")
    print(df_all[['Review_Length', 'Word_Count']].describe())
else:
    text_col = None
    for col in df_all.columns:
        if 'text' in col.lower() or 'review' in col.lower():
            text_col = col
            break
    
    if text_col:
        print(f"✓ Using column: {text_col}")
        df_all['Review_Length'] = df_all[text_col].astype(str).str.len()
        df_all['Word_Count'] = df_all[text_col].astype(str).str.split().str.len()
        print("\nText Length Statistics:")
        print(df_all[['Review_Length', 'Word_Count']].describe())
    else:
        print("⚠ No text column found")

In [None]:
if 'Review_Length' in df_all.columns:
    # Scatter plot: Review length vs sentiment
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # Character length
    for aspect in ['Food', 'Service', 'Ambiance']:
        data = df_all[df_all['Aspect'] == aspect]
        axes[0].scatter(data['Review_Length'], data['Sentiment_Label'], 
                       alpha=0.3, label=aspect, s=15)
    
    axes[0].set_xlabel('Review Length (Characters)', fontsize=12)
    axes[0].set_ylabel('Sentiment Label', fontsize=12)
    axes[0].set_title('Review Length vs Sentiment', fontsize=13, fontweight='bold')
    axes[0].legend()
    axes[0].grid(alpha=0.3)
    
    # Word count
    for aspect in ['Food', 'Service', 'Ambiance']:
        data = df_all[df_all['Aspect'] == aspect]
        axes[1].scatter(data['Word_Count'], data['Sentiment_Label'], 
                       alpha=0.3, label=aspect, s=15)
    
    axes[1].set_xlabel('Word Count', fontsize=12)
    axes[1].set_ylabel('Sentiment Label', fontsize=12)
    axes[1].set_title('Word Count vs Sentiment', fontsize=13, fontweight='bold')
    axes[1].legend()
    axes[1].grid(alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Calculate correlations
    print("\n" + "=" * 80)
    print("CORRELATION: TEXT LENGTH AND SENTIMENT")
    print("=" * 80)
    
    for aspect in ['Food', 'Service', 'Ambiance']:
        data = df_all[df_all['Aspect'] == aspect]
        corr_len, p_len = pearsonr(data['Review_Length'].dropna(), 
                                    data.loc[data['Review_Length'].notna(), 'Sentiment_Label'])
        corr_word, p_word = pearsonr(data['Word_Count'].dropna(), 
                                     data.loc[data['Word_Count'].notna(), 'Sentiment_Label'])
        
        print(f"\n{aspect}:")
        print(f"  Review Length correlation: {corr_len:.4f} (p={p_len:.4f})")
        print(f"  Word Count correlation: {corr_word:.4f} (p={p_word:.4f})")

In [None]:
if 'Review_Length' in df_all.columns:
    # Box plots: Sentiment by text length quartiles
    df_all['Length_Quartile'] = pd.qcut(df_all['Review_Length'], q=4, labels=['Q1 (Short)', 'Q2', 'Q3', 'Q4 (Long)'])
    
    fig, ax = plt.subplots(figsize=(12, 6))
    
    data_for_box = [df_all[df_all['Length_Quartile'] == q]['Sentiment_Label'].values 
                    for q in ['Q1 (Short)', 'Q2', 'Q3', 'Q4 (Long)']]
    
    bp = ax.boxplot(data_for_box, labels=['Q1 (Short)', 'Q2', 'Q3', 'Q4 (Long)'], 
                    patch_artist=True, showmeans=True)
    
    colors = ['#ff9999', '#ffcc99', '#99ccff', '#99ff99']
    for patch, color in zip(bp['boxes'], colors):
        patch.set_facecolor(color)
    
    ax.set_title('Sentiment Distribution by Review Length Quartile', fontsize=14, fontweight='bold')
    ax.set_xlabel('Review Length Quartile', fontsize=12)
    ax.set_ylabel('Sentiment Label', fontsize=12)
    ax.grid(axis='y', alpha=0.3)
    plt.tight_layout()
    plt.show()

## 9. Confidence Score Analysis

In [None]:
# Check for confidence score columns
confidence_cols = [col for col in df_all.columns if 'confidence' in col.lower()]
print("Confidence-related columns:")
print(confidence_cols)

if len(confidence_cols) > 0:
    confidence_col = confidence_cols[0]
    print(f"\n✓ Using confidence column: {confidence_col}")
    
    print("\nConfidence Score Statistics:")
    print(df_all[confidence_col].describe())
else:
    print("\n⚠ No confidence column found")
    confidence_col = None

In [None]:
if confidence_col:
    # Confidence distribution by model
    fig, axes = plt.subplots(1, 2, figsize=(16, 5))
    
    for idx, model in enumerate(['TinyRoBERTa', 'DistilBERT']):
        data = df_all[df_all['Model'] == model][confidence_col]
        
        axes[idx].hist(data, bins=50, edgecolor='black', alpha=0.7, color='steelblue')
        axes[idx].set_title(f'{model} Confidence Score Distribution', fontweight='bold', fontsize=13)
        axes[idx].set_xlabel('Confidence Score', fontsize=11)
        axes[idx].set_ylabel('Frequency', fontsize=11)
        axes[idx].axvline(data.mean(), color='red', linestyle='--', linewidth=2, label=f'Mean: {data.mean():.3f}')
        axes[idx].axvline(data.median(), color='green', linestyle='--', linewidth=2, label=f'Median: {data.median():.3f}')
        axes[idx].legend()
        axes[idx].grid(alpha=0.3)
    
    plt.tight_layout()
    plt.show()

In [None]:
if confidence_col:
    # Confidence by sentiment label
    fig, ax = plt.subplots(figsize=(12, 6))
    
    sentiment_labels = sorted(df_all['Sentiment_Label'].unique())
    data_by_sentiment = [df_all[df_all['Sentiment_Label'] == label][confidence_col].values 
                         for label in sentiment_labels]
    
    bp = ax.boxplot(data_by_sentiment, labels=[f'{int(x)} Star' for x in sentiment_labels],
                    patch_artist=True, showmeans=True)
    
    colors = ['#d62728', '#ff7f0e', '#bcbd22', '#2ca02c', '#17becf']
    for patch, color in zip(bp['boxes'], colors[:len(sentiment_labels)]):
        patch.set_facecolor(color)
    
    ax.set_title('Confidence Score Distribution by Sentiment Label', fontsize=14, fontweight='bold')
    ax.set_xlabel('Sentiment Label', fontsize=12)
    ax.set_ylabel('Confidence Score', fontsize=12)
    ax.grid(axis='y', alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    # Statistics
    print("\n" + "=" * 80)
    print("AVERAGE CONFIDENCE BY SENTIMENT LABEL")
    print("=" * 80)
    conf_by_sentiment = df_all.groupby('Sentiment_Label')[confidence_col].agg(['mean', 'std', 'count']).round(4)
    print(conf_by_sentiment)

In [None]:
if confidence_col:
    # Identify low confidence predictions
    low_confidence_threshold = df_all[confidence_col].quantile(0.1)
    print(f"Low confidence threshold (10th percentile): {low_confidence_threshold:.4f}")
    
    low_conf = df_all[df_all[confidence_col] < low_confidence_threshold]
    print(f"\nNumber of low confidence predictions: {len(low_conf)} ({len(low_conf)/len(df_all)*100:.2f}%)")
    
    print("\nLow Confidence Distribution by Aspect and Model:")
    print(low_conf.groupby(['Aspect', 'Model']).size())
    
    # Visualize
    low_conf_dist = low_conf.groupby(['Aspect', 'Model']).size().reset_index(name='Count')
    
    fig = px.bar(low_conf_dist, x='Aspect', y='Count', color='Model', 
                 barmode='group',
                 title=f'Low Confidence Predictions Distribution (Confidence < {low_confidence_threshold:.4f})',
                 labels={'Count': 'Number of Predictions'},
                 color_discrete_map={'TinyRoBERTa': '#ff6b6b', 'DistilBERT': '#4ecdc4'})
    fig.show()

In [None]:
if confidence_col:
    # Confidence vs sentiment scatter (with aspect colors)
    fig = px.scatter(df_all.sample(5000), 
                     x=confidence_col, 
                     y='Sentiment_Label',
                     color='Aspect',
                     facet_col='Model',
                     opacity=0.5,
                     title='Confidence Score vs Sentiment Label',
                     labels={confidence_col: 'Confidence Score', 'Sentiment_Label': 'Sentiment'},
                     color_discrete_map={'Food': '#e74c3c', 'Service': '#3498db', 'Ambiance': '#2ecc71'})
    fig.update_yaxes(dtick=1)
    fig.show()

## 10. Language Distribution

In [None]:
# Check language column
if 'Language' in df_all.columns or 'language' in df_all.columns:
    lang_col = 'Language' if 'Language' in df_all.columns else 'language'
    
    print("=" * 80)
    print("LANGUAGE DISTRIBUTION")
    print("=" * 80)
    
    print("\nLanguage counts:")
    lang_counts = df_all[lang_col].value_counts()
    print(lang_counts)
    
    # Visualization
    fig, ax = plt.subplots(figsize=(10, 6))
    lang_counts.plot(kind='bar', ax=ax, color='teal', edgecolor='black')
    ax.set_title('Language Distribution in Dataset', fontsize=14, fontweight='bold')
    ax.set_xlabel('Language', fontsize=12)
    ax.set_ylabel('Count', fontsize=12)
    ax.grid(axis='y', alpha=0.3)
    
    for i, v in enumerate(lang_counts):
        ax.text(i, v + 10, str(v), ha='center', va='bottom', fontweight='bold')
    
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()
    
    # Sentiment by language
    if len(lang_counts) > 1:
        print("\n" + "=" * 80)
        print("SENTIMENT STATISTICS BY LANGUAGE")
        print("=" * 80)
        lang_sentiment = df_all.groupby(lang_col)['Sentiment_Label'].agg(['mean', 'std', 'count']).round(3)
        print(lang_sentiment)
else:
    print("=" * 80)
    print("LANGUAGE ANALYSIS")
    print("=" * 80)
    print("⚠ Language column not found in dataset")
    print("Note: Current dataset appears to be monolingual (English)")
    print("\nFramework is ready for multi-language analysis when data becomes available.")

## 11. Restaurant-Level Insights

In [None]:
# Check for restaurant identifier
restaurant_col = None
for col in ['Restaurant_id', 'restaurant_id', 'Restaurant_ID', 'RestaurantID']:
    if col in df_all.columns:
        restaurant_col = col
        break

if restaurant_col:
    print(f"✓ Restaurant identifier found: {restaurant_col}")
    print(f"Total unique restaurants: {df_all[restaurant_col].nunique()}")
    
    # Reviews per restaurant
    reviews_per_restaurant = df_all[restaurant_col].value_counts()
    print(f"\nRestaurants with most reviews:")
    print(reviews_per_restaurant.head(10))
else:
    print("⚠ Restaurant identifier column not found")
    print("Available columns:", df_all.columns.tolist())

In [None]:
if restaurant_col:
    # Average sentiment by restaurant
    restaurant_sentiment = df_all.groupby([restaurant_col, 'Aspect'])['Sentiment_Label'].agg([
        'mean', 'std', 'count'
    ]).round(3)
    restaurant_sentiment.columns = ['Avg_Sentiment', 'Std_Dev', 'Review_Count']
    
    # Filter restaurants with at least 5 reviews per aspect
    restaurant_sentiment = restaurant_sentiment[restaurant_sentiment['Review_Count'] >= 5]
    
    print("=" * 80)
    print("RESTAURANT PERFORMANCE SUMMARY (min 5 reviews per aspect)")
    print("=" * 80)
    print(f"\nTotal qualifying restaurants: {restaurant_sentiment.index.get_level_values(0).nunique()}")
    
    # Top performers by aspect
    for aspect in ['Food', 'Service', 'Ambiance']:
        print(f"\n{'='*80}")
        print(f"TOP 10 RESTAURANTS - {aspect.upper()}")
        print(f"{'='*80}")
        aspect_data = restaurant_sentiment.xs(aspect, level='Aspect').sort_values('Avg_Sentiment', ascending=False).head(10)
        print(aspect_data)

In [None]:
if restaurant_col:
    # Overall restaurant rankings (average across all aspects)
    overall_restaurant = df_all.groupby(restaurant_col)['Sentiment_Label'].agg([
        'mean', 'std', 'count'
    ]).round(3)
    overall_restaurant.columns = ['Avg_Sentiment', 'Std_Dev', 'Review_Count']
    overall_restaurant = overall_restaurant[overall_restaurant['Review_Count'] >= 15]  # At least 15 total reviews
    overall_restaurant = overall_restaurant.sort_values('Avg_Sentiment', ascending=False)
    
    print("\n" + "=" * 80)
    print("OVERALL TOP 20 RESTAURANTS")
    print("=" * 80)
    print(overall_restaurant.head(20))
    
    print("\n" + "=" * 80)
    print("OVERALL BOTTOM 20 RESTAURANTS")
    print("=" * 80)
    print(overall_restaurant.tail(20))

In [None]:
if restaurant_col:
    # Visualize top 15 restaurants
    top_15 = overall_restaurant.head(15)
    
    fig, ax = plt.subplots(figsize=(12, 8))
    bars = ax.barh(range(len(top_15)), top_15['Avg_Sentiment'], color='gold', edgecolor='darkgoldenrod')
    ax.set_yticks(range(len(top_15)))
    ax.set_yticklabels([f"Restaurant {rid}" for rid in top_15.index])
    ax.set_xlabel('Average Sentiment Rating', fontsize=12)
    ax.set_title('Top 15 Restaurants by Average Sentiment', fontsize=14, fontweight='bold')
    ax.grid(axis='x', alpha=0.3)
    ax.set_xlim([0, 5])
    
    # Add value labels
    for i, (idx, row) in enumerate(top_15.iterrows()):
        ax.text(row['Avg_Sentiment'] + 0.05, i, 
                f"{row['Avg_Sentiment']:.2f} ({int(row['Review_Count'])} reviews)", 
                va='center', fontsize=9)
    
    plt.tight_layout()
    plt.show()

In [None]:
if restaurant_col:
    # Aspect breakdown for top restaurants
    top_10_ids = overall_restaurant.head(10).index
    
    aspect_breakdown = df_all[df_all[restaurant_col].isin(top_10_ids)].groupby([restaurant_col, 'Aspect'])['Sentiment_Label'].mean().reset_index()
    aspect_breakdown_pivot = aspect_breakdown.pivot(index=restaurant_col, columns='Aspect', values='Sentiment_Label')
    
    # Create heatmap
    fig, ax = plt.subplots(figsize=(10, 8))
    sns.heatmap(aspect_breakdown_pivot, annot=True, fmt='.2f', cmap='RdYlGn', 
                center=3, vmin=1, vmax=5, ax=ax, cbar_kws={'label': 'Average Sentiment'})
    ax.set_title('Aspect-Level Performance of Top 10 Restaurants', fontsize=14, fontweight='bold')
    ax.set_xlabel('Aspect', fontsize=12)
    ax.set_ylabel('Restaurant ID', fontsize=12)
    plt.tight_layout()
    plt.show()

## 12. Word Cloud and Topic Analysis

In [None]:
# Install wordcloud if needed (uncomment if necessary)
# !pip install wordcloud

try:
    from wordcloud import WordCloud
    import re
    wordcloud_available = True
    print("✓ WordCloud library available")
except ImportError:
    print("⚠ WordCloud library not available. Install with: pip install wordcloud")
    wordcloud_available = False

In [None]:
if wordcloud_available and 'Review_Text' in df_all.columns:
    # Function to clean text
    def clean_text(text):
        text = str(text).lower()
        text = re.sub(r'[^a-zA-Z\s]', '', text)
        return text
    
    # Generate word clouds for positive and negative reviews
    positive_reviews = df_all[df_all['Sentiment_Label'] >= 4]['Review_Text'].apply(clean_text)
    negative_reviews = df_all[df_all['Sentiment_Label'] <= 2]['Review_Text'].apply(clean_text)
    
    positive_text = ' '.join(positive_reviews.astype(str))
    negative_text = ' '.join(negative_reviews.astype(str))
    
    # Create word clouds
    fig, axes = plt.subplots(1, 2, figsize=(18, 8))
    
    # Positive reviews
    wordcloud_pos = WordCloud(width=800, height=400, background_color='white', 
                              colormap='Greens', max_words=100).generate(positive_text)
    axes[0].imshow(wordcloud_pos, interpolation='bilinear')
    axes[0].set_title('Positive Reviews (4-5 Stars)\nMost Common Words', fontsize=14, fontweight='bold')
    axes[0].axis('off')
    
    # Negative reviews
    wordcloud_neg = WordCloud(width=800, height=400, background_color='white', 
                              colormap='Reds', max_words=100).generate(negative_text)
    axes[1].imshow(wordcloud_neg, interpolation='bilinear')
    axes[1].set_title('Negative Reviews (1-2 Stars)\nMost Common Words', fontsize=14, fontweight='bold')
    axes[1].axis('off')
    
    plt.tight_layout()
    plt.show()
    
    print("✓ Word clouds generated successfully!")
elif not wordcloud_available:
    print("Skipping word cloud generation - library not available")
else:
    print("Skipping word cloud generation - Review_Text column not found")

In [None]:
if wordcloud_available and 'Review_Text' in df_all.columns:
    # Aspect-specific word clouds
    fig, axes = plt.subplots(3, 2, figsize=(18, 20))
    fig.suptitle('Aspect-Specific Word Clouds (Positive vs Negative)', fontsize=16, fontweight='bold', y=0.995)
    
    for idx, aspect in enumerate(['Food', 'Service', 'Ambiance']):
        aspect_data = df_all[df_all['Aspect'] == aspect]
        
        # Positive
        pos_text = ' '.join(aspect_data[aspect_data['Sentiment_Label'] >= 4]['Review_Text'].apply(clean_text).astype(str))
        wordcloud_pos = WordCloud(width=700, height=350, background_color='white', 
                                  colormap='Greens', max_words=80).generate(pos_text)
        axes[idx, 0].imshow(wordcloud_pos, interpolation='bilinear')
        axes[idx, 0].set_title(f'{aspect} - Positive (4-5 Stars)', fontsize=12, fontweight='bold')
        axes[idx, 0].axis('off')
        
        # Negative
        neg_text = ' '.join(aspect_data[aspect_data['Sentiment_Label'] <= 2]['Review_Text'].apply(clean_text).astype(str))
        wordcloud_neg = WordCloud(width=700, height=350, background_color='white', 
                                  colormap='Reds', max_words=80).generate(neg_text)
        axes[idx, 1].imshow(wordcloud_neg, interpolation='bilinear')
        axes[idx, 1].set_title(f'{aspect} - Negative (1-2 Stars)', fontsize=12, fontweight='bold')
        axes[idx, 1].axis('off')
    
    plt.tight_layout()
    plt.show()

In [None]:
if 'Review_Text' in df_all.columns:
    # Most common words analysis using simple counting
    from collections import Counter
    import string
    
    # Common stop words
    stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 
                      'of', 'with', 'by', 'from', 'as', 'is', 'was', 'are', 'were', 'been',
                      'be', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
                      'should', 'may', 'might', 'must', 'can', 'this', 'that', 'these', 'those',
                      'i', 'you', 'he', 'she', 'it', 'we', 'they', 'my', 'your', 'his', 'her',
                      'its', 'our', 'their', 'me', 'him', 'us', 'them'])
    
    def extract_words(text_series):
        words = []
        for text in text_series:
            text = str(text).lower()
            text = text.translate(str.maketrans('', '', string.punctuation))
            words.extend([w for w in text.split() if w not in stop_words and len(w) > 2])
        return Counter(words)
    
    print("=" * 80)
    print("TOP 20 WORDS IN POSITIVE VS NEGATIVE REVIEWS")
    print("=" * 80)
    
    # Positive reviews
    pos_words = extract_words(df_all[df_all['Sentiment_Label'] >= 4]['Review_Text'])
    print("\nPOSITIVE REVIEWS (4-5 Stars):")
    for word, count in pos_words.most_common(20):
        print(f"  {word:.<20} {count}")
    
    # Negative reviews
    neg_words = extract_words(df_all[df_all['Sentiment_Label'] <= 2]['Review_Text'])
    print("\nNEGATIVE REVIEWS (1-2 Stars):")
    for word, count in neg_words.most_common(20):
        print(f"  {word:.<20} {count}")

## 13. Statistical Testing

In [None]:
# T-test: Compare sentiment between models
print("=" * 80)
print("T-TEST: MODEL COMPARISON")
print("=" * 80)

for aspect in ['Food', 'Service', 'Ambiance']:
    tiny_data = df_all[(df_all['Aspect'] == aspect) & (df_all['Model'] == 'TinyRoBERTa')]['Sentiment_Label']
    distil_data = df_all[(df_all['Aspect'] == aspect) & (df_all['Model'] == 'DistilBERT')]['Sentiment_Label']
    
    t_stat, p_value = stats.ttest_ind(tiny_data, distil_data)
    
    print(f"\n{aspect}:")
    print(f"  TinyRoBERTa mean: {tiny_data.mean():.3f} (std: {tiny_data.std():.3f})")
    print(f"  DistilBERT mean: {distil_data.mean():.3f} (std: {distil_data.std():.3f})")
    print(f"  t-statistic: {t_stat:.4f}")
    print(f"  p-value: {p_value:.4f}")
    print(f"  Significant? {'Yes (p < 0.05)' if p_value < 0.05 else 'No (p >= 0.05)'}")

In [None]:
# ANOVA: Compare sentiment across aspects
print("\n" + "=" * 80)
print("ANOVA: ASPECT COMPARISON")
print("=" * 80)

food_data = df_all[df_all['Aspect'] == 'Food']['Sentiment_Label']
service_data = df_all[df_all['Aspect'] == 'Service']['Sentiment_Label']
ambiance_data = df_all[df_all['Aspect'] == 'Ambiance']['Sentiment_Label']

f_stat, p_value = stats.f_oneway(food_data, service_data, ambiance_data)

print(f"\nFood mean: {food_data.mean():.3f} (std: {food_data.std():.3f})")
print(f"Service mean: {service_data.mean():.3f} (std: {service_data.std():.3f})")
print(f"Ambiance mean: {ambiance_data.mean():.3f} (std: {ambiance_data.std():.3f})")
print(f"\nF-statistic: {f_stat:.4f}")
print(f"p-value: {p_value:.4f}")
print(f"Significant? {'Yes (p < 0.05)' if p_value < 0.05 else 'No (p >= 0.05)'}")

if p_value < 0.05:
    print("\n✓ There are statistically significant differences in sentiment across aspects")
else:
    print("\n✗ No statistically significant differences in sentiment across aspects")