In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df = pd.read_csv('/content/Dataset .csv')

In [3]:
# Preprocess the data
def preprocess_data(df):
    # Create features for recommendation
    features_df = df.copy()

    # Convert binary columns to numeric
    binary_columns = ['Has Table booking', 'Has Online delivery', 'Is delivering now']
    for col in binary_columns:
        features_df[col] = (features_df[col] == 'Yes').astype(int)

    # One-hot encode cuisines
    cuisines_encoded = features_df['Cuisines'].str.get_dummies(sep=', ')

    # Scale numerical features
    scaler = StandardScaler()
    numerical_features = ['Average Cost for two', 'Price range', 'Aggregate rating', 'Votes']
    features_df[numerical_features] = scaler.fit_transform(features_df[numerical_features])

    # Combine features
    final_features = pd.concat([
        features_df[numerical_features + binary_columns],
        cuisines_encoded
    ], axis=1)

    return final_features, cuisines_encoded.columns.tolist()


In [4]:
def get_recommendations(user_preferences, features_df, original_df, cuisines_list, top_n=5):
    # Create user vector based on preferences
    user_vector = np.zeros(features_df.shape[1])

    # Set cuisine preferences
    if 'cuisines' in user_preferences:
        for cuisine in user_preferences['cuisines']:
            if cuisine in cuisines_list:
                idx = cuisines_list.index(cuisine)
                user_vector[idx + 7] = 1  # 7 is the number of non-cuisine features

    # Set other preferences
    if 'price_range' in user_preferences:
        user_vector[1] = user_preferences['price_range']  # Normalized price range
    if 'min_rating' in user_preferences:
        user_vector[2] = user_preferences['min_rating']  # Normalized rating

    # Calculate similarity
    similarities = cosine_similarity([user_vector], features_df)[0]

    # Get top recommendations
    top_indices = similarities.argsort()[-top_n:][::-1]
    recommendations = original_df.iloc[top_indices]

    return recommendations[['Restaurant Name', 'Cuisines', 'Price range', 'Aggregate rating', 'Average Cost for two', 'City']]


In [5]:
features_df, cuisines_list = preprocess_data(df)



In [6]:


# Example user preferences
user_preferences = {
    'cuisines': ['Japanese', 'Sushi'],
    'price_range': 3,  # Mid-range
    'min_rating': 4.0
}

# Get recommendations
recommendations = get_recommendations(user_preferences, features_df, df, cuisines_list)
print("\
Top Restaurant Recommendations for Japanese/Sushi lovers with mid-range budget:")
print(recommendations)

Top Restaurant Recommendations for Japanese/Sushi lovers with mid-range budget:
     Restaurant Name         Cuisines  Price range  Aggregate rating  \
3               Ooma  Japanese, Sushi            4               4.9   
297   Corkscrew Cafe              NaN            3               3.9   
9383            Roka  Japanese, Sushi            3               4.6   
247            Osaka  Japanese, Sushi            3               4.2   
328         Dovetail              NaN            3               3.8   

      Average Cost for two              City  
3                     1500  Mandaluyong City  
297                     40       Gainesville  
9383                    60            London  
247                     40         Davenport  
328                     40             Macon  
