In [13]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score


In [14]:
sentiment_path = "data hybrid/sentiment_scores.csv" 
content_path = "data hybrid/content_scores.csv"
item_cf_path = "data hybrid/item_cf_scores.csv"

sentiment_scores = pd.read_csv(sentiment_path) 
content_scores = pd.read_csv(content_path)  
item_cf_scores = pd.read_csv(item_cf_path) 

print(sentiment_scores.head())
print(content_scores.head())
print(item_cf_scores.head())

                       Restaurant  Sentiment_Score
0               10 Downing Street             0.86
1                        13 Dhaba             0.67
2  3B's - Buddies, Bar & Barbecue             0.94
3       AB's - Absolute Barbecues             0.98
4               Absolute Sizzlers             0.68
          Restaurant             Similar_Restaurant  Content_Score
0  10 Downing Street                          SKYHY       0.675805
1  10 Downing Street         Mustang Terrace Lounge       0.672156
2  10 Downing Street  The Lal Street - Bar Exchange       0.659896
3  10 Downing Street     La La Land - Bar & Kitchen       0.637958
4  10 Downing Street     Over The Moon Brew Company       0.631628
          Restaurant                    Similar_Restaurant  Item_CF_Score
0  10 Downing Street                    Hunger Maggi Point       0.232301
1  10 Downing Street                         Gal Punjab Di       0.172412
2  10 Downing Street  Collage - Hyatt Hyderabad Gachibowli       0.136

hybird model of sentiment analysis + content-based filtering

In [15]:


# Normalize restaurant names for consistency
for df in [sentiment_scores, content_scores]:
    df["Restaurant"] = df["Restaurant"].str.strip().str.lower()
    if "Similar_Restaurant" in df.columns:
        df["Similar_Restaurant"] = df["Similar_Restaurant"].str.strip().str.lower()

# Merge content-based filtering scores with sentiment scores of the similar restaurant
hybrid_model = content_scores.merge(sentiment_scores, left_on="Similar_Restaurant", right_on="Restaurant", how="left")

# Drop duplicate Restaurant column
hybrid_model = hybrid_model.drop(columns=["Restaurant_y"]).rename(columns={"Restaurant_x": "Restaurant"})

# Normalize similarity and sentiment scores using MinMaxScaler
scaler = MinMaxScaler()
hybrid_model[["Content_Score", "Sentiment_Score"]] = scaler.fit_transform(
    hybrid_model[["Content_Score", "Sentiment_Score"]])

# Compute final hybrid score (weighted sum of content and sentiment scores)
hybrid_model["Hybrid_Score"] = (
    0.7 * hybrid_model["Content_Score"] + 0.3 * hybrid_model["Sentiment_Score"]
)

# Sort recommendations by Hybrid Score
hybrid_model = hybrid_model.sort_values(by=["Restaurant", "Hybrid_Score"], ascending=[True, False])

# Display the first few rows
print(hybrid_model.head())

# Save the hybrid model recommendations


           Restaurant             Similar_Restaurant  Content_Score  \
4   10 downing street     over the moon brew company       0.722478   
2   10 downing street  the lal street - bar exchange       0.757282   
0   10 downing street                          skyhy       0.776870   
1   10 downing street         mustang terrace lounge       0.772377   
10  10 downing street           prism club & kitchen       0.651262   

    Sentiment_Score  Hybrid_Score  
4          0.907692      0.778042  
2          0.646154      0.723944  
0          0.600000      0.723809  
1          0.569231      0.711433  
10         0.830769      0.705114  


In [16]:
# Function to evaluate recommendation system
def evaluate_recommendation_system(hybrid_df, actual_interactions, top_k=5):
    precision_list, recall_list, f1_list = [], []

    for user, actual_restaurants in actual_interactions.items():
        # Get top-k recommended restaurants for the user
        recommended_restaurants = hybrid_df[hybrid_df["Restaurant"].isin(actual_restaurants)].nlargest(top_k, "Hybrid_Score")["Similar_Restaurant"].tolist()

        if not recommended_restaurants:  # If no recommendations exist, set precision, recall, and f1 to 0
            precision_list.append(0)
            recall_list.append(0)
            f1_list.append(0)
            continue

        # Compute evaluation metrics
        relevant_recommendations = set(recommended_restaurants).intersection(set(actual_restaurants))
        precision = len(relevant_recommendations) / len(recommended_restaurants) if recommended_restaurants else 0
        recall = len(relevant_recommendations) / len(actual_restaurants) if actual_restaurants else 0
        f1 = (2 * precision * recall) / (precision + recall) if precision + recall > 0 else 0

        precision_list.append(precision)
        recall_list.append(recall)
        f1_list.append(f1)

    return np.mean(precision_list), np.mean(recall_list), np.mean(f1_list)  # Ensure all three values are returned

# Re-run evaluation
precision, recall, f1 = evaluate_recommendation_system(hybrid_model, actual_interactions, top_k=5)

# Compute AUC (if applicable)
def evaluate_auc(recommended_items, actual_items):
    y_true = []
    y_scores = []

    for rec, actual in zip(recommended_items, actual_items):
        for item in rec:
            y_true.append(1 if item in actual else 0)
            y_scores.append(1)  # Assume all recommendations have score=1

    return roc_auc_score(y_true, y_scores) if len(set(y_true)) > 1 else 0  # Ensure AUC is computed properly

recommended_items = [hybrid_model.groupby("Restaurant").head(5)["Similar_Restaurant"].tolist()]
actual_items = [list(set(sum(actual_interactions.values(), [])))]  # Flatten actual interactions

auc_score = evaluate_auc(recommended_items, actual_items)

# Display evaluation results
evaluation_results = pd.DataFrame({
    "Metric": ["Precision", "Recall", "F1-Score", "AUC"],
    "Score": [precision, recall, f1, auc_score]
})

print(evaluation_results)


ValueError: not enough values to unpack (expected 3, got 2)