In [2]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
import pickle

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
restaurants_df = pd.read_csv('../dataset_new/restaurant_dataset_clean.csv')
dishes_df = pd.read_csv('../dataset_new/dishes_dataset_clean.csv')
reviews_df = pd.read_csv('../dataset_new/reviews_dataset_clean.csv')


In [4]:
model = SentenceTransformer('all-MiniLM-L6-v2')

In [20]:
def build_restaurant_text(rid, max_dishes=3, max_reviews=2):
    r = restaurants_df.loc[restaurants_df['restaurant_id'] == rid].iloc[0]
    text = (
        f"{r['name']} is a {r['restaurant_type']} restaurant in {r['location']}, {r['city']} "
        f"serving {r['cuisine_category']}. Average price for two: ₹{r['price_for_two']}, "
        f"Votes: {r['votes']}, Open: {r['timing']}."
    )

    # Top dishes
    dishes = dishes_df[dishes_df['restaurant_id'] == rid].sort_values('rating', ascending=False).head(max_dishes)
    dish_texts = [f"{d['name']} ({d['category']}, {d['type']}, {d['cuisine']}, ₹{d['cost']})" for _, d in dishes.iterrows()]
    if dish_texts:
        text += " Dishes: " + "; ".join(dish_texts)

    # Top reviews
    reviews = reviews_df[reviews_df['restaurant_id'] == rid].sort_values('rating', ascending=False).head(max_reviews)
    review_texts = []
    for _, rev in reviews.iterrows():
        attrs = [f"{field}: {rev[field]}" for field in ['ambiance','occasion','dietary','features','sentiment'] if pd.notna(rev.get(field))]
        attr_text = ", ".join(attrs)
        review_texts.append(f"{rev['review_text']} {attr_text}")
    if review_texts:
        text += " Reviews: " + "; ".join(review_texts)

    dish_ids = dishes['dish_id'].tolist()
    review_ids = reviews['review_id'].tolist()

    return text, dish_ids, review_ids

In [21]:
restaurant_ids = restaurants_df['restaurant_id'].tolist()
restaurant_embeddings = []
restaurant_meta = {}

for idx, rid in enumerate(restaurant_ids):
    text, dish_ids, review_ids = build_restaurant_text(rid)
    vector = model.encode(text).astype('float32')
    restaurant_embeddings.append(vector)

    r = restaurants_df.loc[restaurants_df['restaurant_id'] == rid].iloc[0]
    restaurant_meta[rid] = {  # <-- Use actual restaurant_id as key
        "restaurant_id": int(r['restaurant_id']),
        "location": r['location'],
        "cuisine_category": r['cuisine_category'],
        "dish_ids": [int(d) for d in dish_ids],
        "review_ids": [int(rid) for rid in review_ids if pd.notna(rid)]
    }

restaurant_embeddings = np.vstack(restaurant_embeddings).astype('float32')

In [None]:
dish_texts = dishes_df.apply(
    lambda d: f"{d['name']} ({d['category']}, {d['type']}, {d['cuisine']}, ₹{d['cost']})", axis=1
).tolist()
dish_vectors = model.encode(dish_texts).astype('float32')

dish_meta = {
    int(d['dish_id']): {
        "dish_id": int(d['dish_id']),
        "restaurant_id": int(d['restaurant_id']),
        "cuisine": d['cuisine'],
        "category": d['category'],
        "type": d['type'],
        "cost": d['cost']
    } for _, d in dishes_df.iterrows()
}

# ------------------------------
# Generate review embeddings
# ------------------------------
review_texts = reviews_df.apply(
    lambda r: r['review_text'] + " " + ", ".join(
        f"{field}: {r[field]}" for field in ['ambiance','occasion','dietary','features','sentiment'] if pd.notna(r[field])
    ), axis=1
).tolist()
review_vectors = model.encode(review_texts).astype('float32')

review_meta = {
    int(r['review_id']): {
        "review_id": int(r['review_id']),
        "restaurant_id": int(r['restaurant_id']),
        "dish_id": int(r['dish_id']) if pd.notna(r['dish_id']) else None,
        "rating": r['rating']
    } for _, r in reviews_df.iterrows()
}

In [23]:
dim = restaurant_embeddings.shape[1]

restaurant_index = faiss.IndexFlatL2(dim)
restaurant_index.add(restaurant_embeddings)

dish_index = faiss.IndexFlatL2(dim)
dish_index.add(dish_vectors)

review_index = faiss.IndexFlatL2(dim)
review_index.add(review_vectors)

In [24]:
faiss.write_index(restaurant_index, "restaurant_index.faiss")
faiss.write_index(dish_index, "dish_index.faiss")
faiss.write_index(review_index, "review_index.faiss")

with open("restaurant_meta.pkl", "wb") as f:
    pickle.dump(restaurant_meta, f)
with open("dish_meta.pkl", "wb") as f:
    pickle.dump(dish_meta, f)
with open("review_meta.pkl", "wb") as f:
    pickle.dump(review_meta, f)

print("All embeddings and FAISS indexes are ready!")

All embeddings and FAISS indexes are ready!


In [25]:
import pickle

# Load and print restaurant metadata
with open("restaurant_meta.pkl", "rb") as f:
    restaurant_meta = pickle.load(f)

print("Restaurant meta sample:")
for i, (k, v) in enumerate(restaurant_meta.items()):
    print(k, v)
    if i >= 2:  # print only first 3 entries
        break

# Load and print dish metadata
with open("dish_meta.pkl", "rb") as f:
    dish_meta = pickle.load(f)

print("\nDish meta sample:")
for i, (k, v) in enumerate(dish_meta.items()):
    print(k, v)
    if i >= 2:
        break

# Load and print review metadata
with open("review_meta.pkl", "rb") as f:
    review_meta = pickle.load(f)

print("\nReview meta sample:")
for i, (k, v) in enumerate(review_meta.items()):
    print(k, v)
    if i >= 2:
        break


Restaurant meta sample:
1 {'restaurant_id': 1, 'location': 'Andheri', 'cuisine_category': 'North Indian, Chinese, Biryani, Continental, American, Fast Food', 'dish_ids': [9, 4, 8], 'review_ids': [1, 2]}
2 {'restaurant_id': 2, 'location': 'Andheri', 'cuisine_category': 'Desserts', 'dish_ids': [12, 13, 20], 'review_ids': [6, 5]}
3 {'restaurant_id': 3, 'location': 'Andheri', 'cuisine_category': 'Chinese', 'dish_ids': [22, 25, 21], 'review_ids': [7, 8]}

Dish meta sample:
1 {'dish_id': 1, 'restaurant_id': 1, 'cuisine': 'Fast Food', 'category': 'Snack', 'type': 'Veg', 'cost': 285}
2 {'dish_id': 2, 'restaurant_id': 1, 'cuisine': 'Continental', 'category': 'Bread', 'type': 'Seafood', 'cost': 455}
3 {'dish_id': 3, 'restaurant_id': 1, 'cuisine': 'Chinese', 'category': 'Starter', 'type': 'Veg', 'cost': 980}

Review meta sample:
1 {'review_id': 1, 'restaurant_id': 1, 'dish_id': 9, 'rating': 5}
2 {'review_id': 2, 'restaurant_id': 1, 'dish_id': 9, 'rating': 5}
3 {'review_id': 3, 'restaurant_id': 1,