In [1]:
import pandas as pd
from pymongo import MongoClient
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.impute import KNNImputer
import numpy as np
import pickle

In [2]:
client = MongoClient("mongodb://localhost:27017/")
db = client['cafe_db']
collection = db['cafe_data']

In [3]:
data = list(collection.find())
df = pd.DataFrame(data)

In [4]:
df['reviews'] = df['reviews'].apply(lambda x: ' '.join([review['description'] for review in x]) if isinstance(x, list) else '')

In [5]:
imputer = KNNImputer(n_neighbors=3)  
numeric_cols = ['price_for_two', 'ac', 'dine_in', 'serves_nonveg', 'live_music']
df[numeric_cols] = imputer.fit_transform(df[numeric_cols])

Content-Based Filtering

In [6]:
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(df['reviews'])
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [7]:
def get_content_recommendations(name, cosine_sim=cosine_sim):
    if name not in df['name'].values:
        return None  # Café not in the list
    idx = df.index[df['name'] == name].tolist()[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    cafe_indices = [i[0] for i in sim_scores[1:]] 
    return df['name'].iloc[cafe_indices].tolist()

Hybrid Recommendation Logic

In [8]:
def recommend_cafe(cafe_name, budget=None, ac=None, dine_in=None, veg=None, live_music=None, locality=None):
    # Step 3.1: Get content-based recommendations
    content_recommendations = get_content_recommendations(cafe_name)
    if content_recommendations is None:
        return f"{cafe_name} not found in the list."

    # Step 3.2: Get relevant cafés based on locality and cuisine
    cafe_info = df[df['name'] == cafe_name].iloc[0]
    similar_cafes = df[
        (df['locality'] == cafe_info['locality']) | 
        (df['cuisines'] == cafe_info['cuisines']) |
        (df['price_for_two'] <= (budget if budget is not None else float('inf')))
    ]

    # Combine recommendations and keep unique entries
    recommendations = set(content_recommendations) | set(similar_cafes['name'])

    # Prepare results with reasons
    results = []
    for cafe in recommendations:
        cafe_info = df[df['name'] == cafe].iloc[0]
        reasons = []

        # Check user preferences
        if budget is not None and cafe_info['price_for_two'] > budget:
            continue
        if ac is not None and cafe_info['ac'] != ac:
            continue
        if dine_in is not None and cafe_info['dine_in'] != dine_in:
            continue
        if veg is not None and cafe_info['serves_nonveg'] != (0 if veg else 1):
            continue
        if live_music is not None and cafe_info['live_music'] != live_music:
            continue
        if locality is not None and cafe_info['locality'] != locality:
            continue

        # Construct reason string
        reason_parts = []
        if budget is not None:
            reason_parts.append(f"(budget: {cafe_info['price_for_two']})")
        if ac is not None:
            reason_parts.append(f"(AC: {'Yes' if cafe_info['ac'] == 1 else 'No'})")
        if dine_in is not None:
            reason_parts.append(f"(Dine-in: {'Yes' if cafe_info['dine_in'] == 1 else 'No'})")
        if veg is not None:
            reason_parts.append(f"(Veg: {'Yes' if veg else 'No'})")
        if live_music is not None:
            reason_parts.append(f"(Live music: {'Yes' if cafe_info['live_music'] == 1 else 'No'})")
        if locality is not None:
            reason_parts.append(f"(Locality: {cafe_info['locality']})")

        reason_str = ' '.join(reason_parts)
        results.append(f"{cafe_info['name']} {reason_str}")

    return results if results else ["No cafes match your criteria."]

In [12]:
cafe_name = "Italian Barista Academy Coffees By Sahil Thakkar"
recommended_cafes = recommend_cafe(cafe_name, locality='Juhu')
print("Recommended Cafes:", recommended_cafes)

Recommended Cafes: ['Poetry By Love And Cheesecake (Locality: Juhu)', 'Kitchen Garden by Suzette (Locality: Juhu)']


In [10]:
df.to_pickle('cafes.pkl')