# Loading Libraries

In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Load the dataset

In [2]:
file_path = 'Dataset .csv'
data = pd.read_csv(file_path)


# Preprocess the dataset

Handle missing values

In [3]:

data.fillna('', inplace=True)

# Converting 'Average Cost for two' and 'Aggregate rating' to numeric

In [4]:

data['Average Cost for two'] = pd.to_numeric(data['Average Cost for two'], errors='coerce')
data['Aggregate rating'] = pd.to_numeric(data['Aggregate rating'], errors='coerce')

# Dropping rows with NaN values in numeric columns


In [5]:
data.dropna(subset=['Average Cost for two', 'Aggregate rating'], inplace=True)

# Select only necessary columns


In [6]:
columns_to_keep = ['Restaurant Name', 'Cuisines', 'Average Cost for two', 'Aggregate rating']
data = data[columns_to_keep]

# Encode categorical variables

In [7]:
data_encoded = data.copy()
data_encoded = pd.get_dummies(data_encoded, columns=['Cuisines'])


# Normalize numeric columns

In [8]:
data_encoded['Average Cost for two'] = (data_encoded['Average Cost for two'] - data_encoded['Average Cost for two'].min()) / (data_encoded['Average Cost for two'].max() - data_encoded['Average Cost for two'].min())
data_encoded['Aggregate rating'] = (data_encoded['Aggregate rating'] - data_encoded['Aggregate rating'].min()) / (data_encoded['Aggregate rating'].max() - data_encoded['Aggregate rating'].min())

# Ensure only numeric data is used for cosine similarity

In [9]:
data_encoded = data_encoded.select_dtypes(include=[float, int])


# Implementing content-based filtering

In [10]:
def recommend_restaurants(preferences, data, original_data, top_n=5):
    # Convert preferences to DataFrame
    preferences_df = pd.DataFrame([preferences])
    
    # Normalize preference values
    preferences_df['Average Cost for two'] = (preferences_df['Average Cost for two'] - data['Average Cost for two'].min()) / (data['Average Cost for two'].max() - data['Average Cost for two'].min())
    preferences_df['Aggregate rating'] = (preferences_df['Aggregate rating'] - data['Aggregate rating'].min()) / (data['Aggregate rating'].max() - data['Aggregate rating'].min())
    
    # Encode preferences
    preferences_encoded = pd.get_dummies(preferences_df, columns=['Cuisines'])
    preferences_encoded = preferences_encoded.reindex(columns=data.columns, fill_value=0)
    
    # Ensure only numeric data is used for cosine similarity
    preferences_encoded = preferences_encoded.select_dtypes(include=[float, int])
    
    # Compute cosine similarity
    similarity_matrix = cosine_similarity(data, preferences_encoded)
    
    # Get top N recommendations
    top_indices = similarity_matrix.flatten().argsort()[-top_n:][::-1]
    recommended_restaurants = original_data.iloc[top_indices]
    
    return recommended_restaurants

# Example user preferences

In [11]:
user_preferences = {
    'Cuisines': 'Italian',
    'Average Cost for two': 500,
    'Aggregate rating': 4.5
}

# Get recommendations

In [12]:
recommendations = recommend_restaurants(user_preferences, data_encoded, data)

# Display recommendations

In [13]:
print("Recommended Restaurants:")
print(recommendations[['Restaurant Name', 'Cuisines', 'Average Cost for two', 'Aggregate rating']])

Recommended Restaurants:
            Restaurant Name             Cuisines  Average Cost for two  \
30           Sandubas Caf��      Brazilian, Cafe                    30   
58    Quiosque Chopp Brahma  Bar Food, Brazilian                    70   
6331              Keventers            Beverages                   400   
6332              Muncheezz              Chinese                   300   
6333            Shaketastic            Beverages                   250   

      Aggregate rating  
30                 0.0  
58                 0.0  
6331               0.0  
6332               0.0  
6333               0.0  


# Testing the recommendation system with sample preferences

In [14]:
sample_preferences = {
    'Cuisines': 'Chinese',
    'Average Cost for two': 300,
    'Aggregate rating': 4.0
}

# Getting sample recommendations

In [15]:
sample_recommendations = recommend_restaurants(sample_preferences, data_encoded, data)

# Displaying sample recommendations

In [16]:
print("\nSample Recommendations:")
print(sample_recommendations[['Restaurant Name', 'Cuisines', 'Average Cost for two', 'Aggregate rating']])


Sample Recommendations:
            Restaurant Name             Cuisines  Average Cost for two  \
30           Sandubas Caf��      Brazilian, Cafe                    30   
58    Quiosque Chopp Brahma  Bar Food, Brazilian                    70   
6331              Keventers            Beverages                   400   
6332              Muncheezz              Chinese                   300   
6333            Shaketastic            Beverages                   250   

      Aggregate rating  
30                 0.0  
58                 0.0  
6331               0.0  
6332               0.0  
6333               0.0  
