In [21]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [22]:
data=pd.read_csv(r"C:\Users\vaibh\Downloads\Dataset .csv")

In [23]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9551 entries, 0 to 9550
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Restaurant ID         9551 non-null   int64  
 1   Restaurant Name       9551 non-null   object 
 2   Country Code          9551 non-null   int64  
 3   City                  9551 non-null   object 
 4   Address               9551 non-null   object 
 5   Locality              9551 non-null   object 
 6   Locality Verbose      9551 non-null   object 
 7   Longitude             9551 non-null   float64
 8   Latitude              9551 non-null   float64
 9   Cuisines              9542 non-null   object 
 10  Average Cost for two  9551 non-null   int64  
 11  Currency              9551 non-null   object 
 12  Has Table booking     9551 non-null   object 
 13  Has Online delivery   9551 non-null   object 
 14  Is delivering now     9551 non-null   object 
 15  Switch to order menu 

In [24]:
data.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Restaurant ID,9551.0,9051128.0,8791521.0,53.0,301962.5,6004089.0,18352290.0,18500650.0
Country Code,9551.0,18.36562,56.75055,1.0,1.0,1.0,1.0,216.0
Longitude,9551.0,64.12657,41.46706,-157.948486,77.081343,77.19196,77.28201,174.8321
Latitude,9551.0,25.85438,11.00794,-41.330428,28.478713,28.57047,28.64276,55.97698
Average Cost for two,9551.0,1199.211,16121.18,0.0,250.0,400.0,700.0,800000.0
Price range,9551.0,1.804837,0.9056088,1.0,1.0,2.0,2.0,4.0
Aggregate rating,9551.0,2.66637,1.516378,0.0,2.5,3.2,3.7,4.9
Votes,9551.0,156.9097,430.1691,0.0,5.0,31.0,131.0,10934.0


In [25]:
# Preprocessing
data.dropna(subset=['Cuisines'], inplace=True)  # Drop rows with missing cuisines

In [26]:
data['Cuisines'] = data['Cuisines'].str.lower()  # Normalize cuisines to lowercase

In [27]:
# Combine features for recommendation
data['Features'] = data['Cuisines'] + ' ' + data['City'].str.lower()

In [28]:
# Convert text data into feature vectors
vectorizer = CountVectorizer()
feature_matrix = vectorizer.fit_transform(data['Features'])


In [29]:
# Compute similarity between restaurants
similarity_matrix = cosine_similarity(feature_matrix)

In [31]:
# Recommendation function
def recommend_restaurants(preferred_cuisine, city, top_n=5):
    preferred_features = preferred_cuisine.lower() + ' ' + city.lower()
    preferred_vector = vectorizer.transform([preferred_features])
    
    # Compute similarity with all restaurants
    similarities = cosine_similarity(preferred_vector, feature_matrix).flatten()
    
    # Get indices of top matching restaurants
    top_indices = similarities.argsort()[-top_n:][::-1]
    
    if len(top_indices) == 0:
        return "No recommendations found."

    # Fetch recommended restaurants
    recommendations = data.iloc[top_indices][['Restaurant Name', 'Cuisines', 'City', 'Aggregate rating']]
    return recommendations


In [40]:
# Example usage
user_cuisine = 'Japanese'
user_city = 'Makati City'
recommendations = recommend_restaurants(user_cuisine, user_city)


print("Top Recommended Restaurants:")
print(recommendations)

Top Recommended Restaurants:
                   Restaurant Name                    Cuisines  \
1                 Izakaya Kikufuji                    japanese   
0                 Le Petit Souffle  french, japanese, desserts   
4                      Sambo Kojin            japanese, korean   
480   Fuji Bay Japanese Restaurant             japanese, sushi   
9334                      wagamama             japanese, asian   

                  City  Aggregate rating  
1          Makati City               4.5  
0          Makati City               4.8  
4     Mandaluyong City               4.8  
480         Sioux City               3.7  
9334   Wellington City               3.7  
