In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# dataset
df = pd.read_csv('/content/Dataset .csv')

# Selecting relevant columns
df_filtered = df[['Restaurant ID', 'Restaurant Name', 'Cuisines', 'Price range', 'Aggregate rating', 'Votes']]

# Handling missing values by dropping rows with any NaN values
df_filtered.dropna(inplace=True)

# Removing duplicate rows based on 'Restaurant Name' while keeping the highest rated entry for each name
df_filtered = df_filtered.sort_values(by=['Restaurant Name', 'Aggregate rating'], ascending=False)
df_filtered = df_filtered.drop_duplicates('Restaurant Name', keep='first')

# Filtering restaurants with an aggregate rating greater than 3.9
df_filtered = df_filtered[df_filtered['Aggregate rating'] > 3.9]

# Splitting cuisines into lists and expanding rows
df_filtered['Cuisines'] = df_filtered['Cuisines'].str.split(',')
df_filtered = df_filtered.explode('Cuisines')

# Counting cuisine types
cuisine_counts = df_filtered['Cuisines'].value_counts()

# Creating a restaurant-cuisine cross-tabulation
restaurant_cuisine_matrix = pd.crosstab(df_filtered['Restaurant Name'], df_filtered['Cuisines'])

# Sample of 20 random restaurants for reference
sample_restaurants = df_filtered['Restaurant Name'].sample(20, random_state=194)

# Importing Jaccard similarity measures
from sklearn.metrics import jaccard_score
from scipy.spatial.distance import pdist, squareform

# Define the restaurant for which recommendations are needed
target_restaurant = 'Ooma'

# Calculate Jaccard similarity for the target restaurant with all others
similar_restaurants = pd.DataFrame({
    'Restaurant Name': restaurant_cuisine_matrix.index,
    'simScore': squareform(pdist(restaurant_cuisine_matrix, metric='jaccard'))[restaurant_cuisine_matrix.index.get_loc(target_restaurant)]
})

# Filter and sort to get the top 5 similar restaurants with a minimum similarity score
recommended_restaurants = similar_restaurants[
    (similar_restaurants['Restaurant Name'] != target_restaurant) & (similar_restaurants['simScore'] >= 0.7)
].sort_values('simScore', ascending=False).head(5)

# Merging with aggregate rating for final recommendations
final_recommendations = recommended_restaurants.merge(
    df_filtered[['Restaurant Name', 'Aggregate rating']].drop_duplicates('Restaurant Name'),
    on='Restaurant Name'
).sort_values('Aggregate rating', ascending=False).drop_duplicates('Restaurant Name', keep='first')

# Displaying the final recommendation
final_recommendations


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered.dropna(inplace=True)


Unnamed: 0,Restaurant Name,simScore,Aggregate rating
0,'Ohana,1.0,4.5
1,Pizza Di Rocco,1.0,4.4
2,PitStop BrewPub,1.0,4.4
3,Pirates of Grill,1.0,4.1
4,Pipeline Cafe,1.0,4.0


In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics.pairwise import cosine_similarity

# dataset
df = pd.read_csv('/content/Dataset .csv')

# Selecting relevant columns
df_filtered = df[['Restaurant ID', 'Restaurant Name', 'Cuisines', 'Price range', 'Aggregate rating', 'Votes']]

# Step 1: Preprocess the Data

# Drop rows with any missing values
df_filtered.dropna(inplace=True)

# Convert 'Cuisines' to lists for one-hot encoding later
df_filtered['Cuisines'] = df_filtered['Cuisines'].str.split(',')

# Step 2: Encode Categorical Variables

# One-hot encode the 'Cuisines' column using MultiLabelBinarizer
mlb = MultiLabelBinarizer()
cuisines_encoded = pd.DataFrame(mlb.fit_transform(df_filtered['Cuisines']), columns=mlb.classes_, index=df_filtered.index)

# Concatenate the one-hot encoded cuisines with the original dataframe
df_filtered = pd.concat([df_filtered, cuisines_encoded], axis=1)

# Drop the original 'Cuisines' column as it is now encoded
df_filtered.drop('Cuisines', axis=1, inplace=True)

# Step 3: Define the Recommendation Criteria

# Function to get recommendations based on user preferences
def recommend_restaurants(user_preferences, top_n=5):
    """
    Recommend restaurants based on user preferences using content-based filtering.

    Parameters:
        user_preferences (dict): Dictionary containing user's preferred 'Cuisines' and 'Price range'.
        top_n (int): Number of top recommendations to return.

    Returns:
        pd.DataFrame: DataFrame of recommended restaurants.
    """

    # Filter restaurants within the user's preferred price range
    price_filtered_df = df_filtered[df_filtered['Price range'].isin(user_preferences['Price range'])]

    # Create a preference vector based on user's cuisine preferences
    preference_vector = np.zeros(len(mlb.classes_))
    for cuisine in user_preferences['Cuisines']:
        if cuisine in mlb.classes_:
            preference_vector[mlb.classes_.tolist().index(cuisine)] = 1

    # Calculate cosine similarity between the user's preferences and the restaurant data
    cosine_sim = cosine_similarity([preference_vector], price_filtered_df[mlb.classes_])
    price_filtered_df['Similarity'] = cosine_sim[0]

    # Sort by similarity score and aggregate rating
    recommendations = price_filtered_df.sort_values(['Similarity', 'Aggregate rating'], ascending=[False, False])

    # Return the top N recommended restaurants
    return recommendations[['Restaurant Name', 'Aggregate rating', 'Price range']].head(top_n)

# Step 4: Test the Recommendation System with Sample User Preferences

# Sample user preferences for testing
sample_preferences = {
    'Cuisines': ['Italian', 'Mexican'],
    'Price range': [2, 3]  # Assuming 1 is low, 2 is medium, 3 is high price range
}

# Generate recommendations based on sample user preferences
recommendations = recommend_restaurants(sample_preferences)
recommendations


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['Cuisines'] = df_filtered['Cuisines'].str.split(',')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  price_filtered_df['Similarity'] = cosine_sim[0]


Unnamed: 0,Restaurant Name,Aggregate rating,Price range
943,Cafe Parmesan,4.5,3
9358,La Favorita,4.5,3
9378,Bocca Di Lupo,4.5,3
9275,Flying Spaghetti Monster,4.4,3
9395,Jamie's Italian,4.3,3



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

