In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load dataset
df = pd.read_csv("C:/Users/shikh/OneDrive/Desktop/Restaurant rating/Data/Dataset.csv")

# Drop irrelevant columns
drop_cols = ['Restaurant ID', 'Address', 'Locality', 'Locality Verbose',
             'Longitude', 'Latitude', 'Rating color', 'Rating text']
df.drop(columns=drop_cols, inplace=True, errors='ignore')

# Drop rows with missing essential info
df.dropna(subset=['Aggregate rating', 'City', 'Cuisines'], inplace=True)

# Fill missing values
df['Price range'].fillna(df['Price range'].mode()[0], inplace=True)
df['Currency'].fillna(df['Currency'].mode()[0], inplace=True)

# Encode categorical values (LabelEncoder for now)
le = LabelEncoder()
for col in ['City', 'Currency']:
    df[col] = le.fit_transform(df[col].astype(str))

df.reset_index(drop=True, inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Price range'].fillna(df['Price range'].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Currency'].fillna(df['Currency'].mode()[0], inplace=True)


In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Vectorize 'Cuisines'
tfidf = TfidfVectorizer(stop_words='english')
df['Cuisines'] = df['Cuisines'].fillna('')
tfidf_matrix = tfidf.fit_transform(df['Cuisines'])

# Compute cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)


In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Vectorize 'Cuisines'
tfidf = TfidfVectorizer(stop_words='english')
df['Cuisines'] = df['Cuisines'].fillna('')
tfidf_matrix = tfidf.fit_transform(df['Cuisines'])

# Compute cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)


In [6]:
def recommend_restaurants(user_cuisine, user_price, user_city, top_n=5):
    # Filter restaurants based on user preferences
    filtered_df = df[
        (df['Price range'] == user_price) 
    ]
    
    if filtered_df.empty:
        return "No matches found. Try different preferences."
    
    # Use the first matching restaurant as reference
    idx = filtered_df.index[0]

    # Get similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort by similarity
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get indices of top_n most similar restaurants
    top_indices = [i[0] for i in sim_scores[1:top_n+1]]
    
    return df.iloc[top_indices][['Restaurant Name', 'Cuisines', 'Aggregate rating', 'Price range', 'City']]


In [7]:
# Example input preferences
user_cuisine = "North Indian"
user_price = 2
user_city = "New Delhi"

recommendations = recommend_restaurants(user_cuisine, user_price, user_city)
print("Top Restaurant Recommendations:\n")
print(recommendations)


Top Restaurant Recommendations:

              Restaurant Name      Cuisines  Aggregate rating  Price range  \
6812                   Kori's  Cafe, Korean               3.6            2   
14    Sodam Korean Restaurant        Korean               4.3            3   
1624           Hahn's Kitchen        Korean               3.2            3   
1854          Gung The Palace        Korean               4.2            4   
3283         Korea Restaurant        Korean               3.0            2   

      City  
6812    87  
14     111  
1624    50  
1854    50  
3283    87  
