In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

df = pd.read_csv('D:\ZomatoX-ML\Data\enhanced_zomato_dataset_clean.csv')
df.head()


Unnamed: 0,Restaurant_Name,Dining_Rating,Delivery_Rating,Dining_Votes,Delivery_Votes,Cuisine,Place_Name,City,Item_Name,Best_Seller,...,Is_Bestseller,Restaurant_Popularity,Avg_Rating_Restaurant,Avg_Price_Restaurant,Avg_Rating_Cuisine,Avg_Price_Cuisine,Avg_Rating_City,Avg_Price_City,Is_Highly_Rated,Is_Expensive
0,Doner King,3.9,4.2,39,0,Fast Food,Malakpet,Hyderabad,Platter Kebab Combo,BESTSELLER,...,1,46,4.05,171.347826,3.81868,255.954894,3.939031,245.130482,1,0
1,Doner King,3.9,4.2,39,0,Fast Food,Malakpet,Hyderabad,Chicken Rumali Shawarma,BESTSELLER,...,1,46,4.05,171.347826,3.81868,255.954894,3.939031,245.130482,1,0
2,Doner King,3.9,4.2,39,0,Fast Food,Malakpet,Hyderabad,Chicken Tandoori Salad,NONE,...,1,46,4.05,171.347826,3.81868,255.954894,3.939031,245.130482,1,0
3,Doner King,3.9,4.2,39,0,Fast Food,Malakpet,Hyderabad,Chicken BBQ Salad,BESTSELLER,...,1,46,4.05,171.347826,3.81868,255.954894,3.939031,245.130482,1,0
4,Doner King,3.9,4.2,39,0,Fast Food,Malakpet,Hyderabad,Special Doner Wrap Combo,MUST TRY,...,1,46,4.05,171.347826,3.81868,255.954894,3.939031,245.130482,1,0


In [2]:
# Aggregated restaurant-level features
grouped = df.groupby(['Restaurant_Name', 'City', 'Cuisine']).agg({
    'Average_Rating': 'mean',
    'Prices': 'mean',
    'Restaurant_Popularity': 'mean',
    'Is_Highly_Rated': 'mean',
    'Is_Expensive': 'mean'
}).reset_index()

grouped['Cuisine'] = grouped['Cuisine'].astype(str)
grouped.head()


Unnamed: 0,Restaurant_Name,City,Cuisine,Average_Rating,Prices,Restaurant_Popularity,Is_Highly_Rated,Is_Expensive
0,12 To 12 BBQ,Kochi,Biryani,4.0,273.364198,162.0,1.0,0.302469
1,1441 Pizzeria,Mumbai,Desserts,3.911132,349.874667,135.0,0.0,0.637037
2,1944 -The HOCCO Kitchen,Ahmedabad,Shake,4.3,356.283784,222.0,1.0,0.441441
3,4M Biryani House,Hyderabad,Chinese,4.15,211.570312,128.0,1.0,0.171875
4,7 Plates,Bangalore,Seafood,4.061132,241.782123,179.0,1.0,0.296089


In [3]:
# One-hot encode Cuisine & City
encoded = pd.get_dummies(grouped[['City', 'Cuisine']])
numerical = grouped[['Average_Rating', 'Prices', 'Restaurant_Popularity']]
scaled = MinMaxScaler().fit_transform(numerical)

# Combine all
features_df = pd.concat([pd.DataFrame(scaled), encoded], axis=1)


In [4]:
similarity_matrix = cosine_similarity(features_df)


In [5]:
def recommend_by_cuisine_city(cuisine, city, sort_by="rating", top_n=5):
    # Filter dataset
    filtered = df[(df['City'].str.lower() == city.lower()) &
                  (df['Cuisine'].str.lower() == cuisine.lower())]

    if filtered.empty:
        print("❌ No matching restaurants found for this cuisine and city.")
        return

    # Sort logic
    if sort_by == "rating":
        sorted_df = filtered.sort_values(by="Average_Rating", ascending=False)
    elif sort_by == "price":
        sorted_df = filtered.sort_values(by="Prices")
    elif sort_by == "value":
        sorted_df = filtered.sort_values(by="Price_per_Vote")
    else:
        sorted_df = filtered.sort_values(by="Average_Rating", ascending=False)

    # Return top N
    return sorted_df[['Restaurant_Name', 'Place_Name', 'Prices', 'Average_Rating', 'Votes']].head(top_n)


In [6]:
recommend_by_cuisine_city("Italian", "Pune", sort_by="rating", top_n=5)
recommend_by_cuisine_city("North Indian", "Delhi", sort_by="value", top_n=10)
recommend_by_cuisine_city("Chinese", "Bangalore", sort_by="price", top_n=7)


❌ No matching restaurants found for this cuisine and city.
❌ No matching restaurants found for this cuisine and city.
❌ No matching restaurants found for this cuisine and city.


In [7]:
print("🔎 Available Cities:\n", df['City'].unique())
print("\n🍽️ Available Cuisines:\n", df['Cuisine'].unique())


🔎 Available Cities:
 [' Hyderabad' ' Mumbai' ' Chennai' ' Pune' ' Jaipur' ' Kochi' ' Goa'
 ' Bangalore' ' Kolkata' ' Ahmedabad' ' Banaswadi' ' Ulsoor'
 ' Malleshwaram' ' Magrath Road' ' Lucknow' ' New Delhi' ' Raipur']

🍽️ Available Cuisines:
 ['Fast Food' 'Wraps' 'Biryani' 'Chinese' 'Beverages' 'Desserts' 'Shake'
 'Mandi' 'South Indian' 'Kebab' 'Pizza' 'Bakery' 'Ice Cream' 'Mughlai'
 'North Indian' 'Momos' 'Shawarma' 'Burger' 'Turkish' 'Rolls' 'Coffee'
 'Seafood' 'Hyderabadi' 'Salad' 'Sichuan' 'Sandwich' 'Italian' 'Lucknowi'
 'Continental' 'American' 'Mexican' 'Kerala' 'Bbq' 'Rajasthani'
 'Healthy Food' 'Tibetan' 'Street Food' 'Mithai' 'Juices' 'Maharashtrian'
 'Thai' 'Vietnamese' 'Pasta' 'Gujarati' 'Kathiyawadi' 'Andhra' 'Awadhi'
 'Tea']


In [8]:
# Normalize 'City' and 'Cuisine' columns to lowercase and strip whitespaces
df['City'] = df['City'].astype(str).str.strip().str.lower()
df['Cuisine'] = df['Cuisine'].astype(str).str.strip().str.lower()


In [9]:
def recommend_by_cuisine_city(cuisine, city, sort_by="rating", top_n=5):
    # Normalize input
    cuisine = cuisine.strip().lower()
    city = city.strip().lower()

    # Normalize dataset
    df['City'] = df['City'].str.strip().str.lower()
    df['Cuisine'] = df['Cuisine'].str.strip().str.lower()

    # Filter
    filtered = df[(df['City'] == city) & (df['Cuisine'] == cuisine)]

    if filtered.empty:
        print("❌ No matching restaurants found for this cuisine and city.")
        return

    # Sort logic
    if sort_by == "rating":
        sorted_df = filtered.sort_values(by="Average_Rating", ascending=False)
    elif sort_by == "price":
        sorted_df = filtered.sort_values(by="Prices")
    elif sort_by == "value":
        sorted_df = filtered.sort_values(by="Price_per_Vote")
    else:
        sorted_df = filtered.sort_values(by="Average_Rating", ascending=False)

    # Return top N
    return sorted_df[['Restaurant_Name', 'Place_Name', 'Cuisine', 'Prices', 'Average_Rating', 'Votes']].head(top_n)


In [10]:
print("✅ Unique Cities:", df['City'].nunique(), "\n", df['City'].unique()[:20])
print("✅ Unique Cuisines:", df['Cuisine'].nunique(), "\n", df['Cuisine'].unique()[:20])


✅ Unique Cities: 17 
 ['hyderabad' 'mumbai' 'chennai' 'pune' 'jaipur' 'kochi' 'goa' 'bangalore'
 'kolkata' 'ahmedabad' 'banaswadi' 'ulsoor' 'malleshwaram' 'magrath road'
 'lucknow' 'new delhi' 'raipur']
✅ Unique Cuisines: 48 
 ['fast food' 'wraps' 'biryani' 'chinese' 'beverages' 'desserts' 'shake'
 'mandi' 'south indian' 'kebab' 'pizza' 'bakery' 'ice cream' 'mughlai'
 'north indian' 'momos' 'shawarma' 'burger' 'turkish' 'rolls']


In [11]:
recommend_by_cuisine_city("Biryani", "Hyderabad", sort_by="rating", top_n=5)

Unnamed: 0,Restaurant_Name,Place_Name,Cuisine,Prices,Average_Rating,Votes
13926,Sherton Restaurant,Amberpet,biryani,80.0,4.1,131
13842,Sherton Restaurant,Amberpet,biryani,250.0,4.1,0
13844,Sherton Restaurant,Amberpet,biryani,250.0,4.1,5
13845,Sherton Restaurant,Amberpet,biryani,250.0,4.1,0
13846,Sherton Restaurant,Amberpet,biryani,250.0,4.1,0


In [12]:
grouped.to_csv("/kaggle/working/restaurant_profiles.csv", index=False)
