## Task: Restaurant Recommendation
###### Objective: Create a restaurant recommendation system based on user preferences.

 Steps:
 
 Preprocess the dataset by handling missing values and encoding categorical variables.
 
 Determine the criteria for restaurant recommendations (e.g., cuisine preference, price range).
 
 Implement a content-based filtering approach where users are recommended restaurants similar to their preferred criteria.
 
 Test the recommendation system by providing sample user preferences and evaluating the quality of recommendations.


In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Load the dataset
df = pd.read_csv('dataset.csv')
df.shape

(9551, 21)

In [3]:
# Preprocessing
## Handle missing values (you might want to handle these differently depending on your dataset)
df.isnull().sum()

Restaurant ID           0
Restaurant Name         0
Country Code            0
City                    0
Address                 0
Locality                0
Locality Verbose        0
Longitude               0
Latitude                0
Cuisines                9
Average Cost for two    0
Currency                0
Has Table booking       0
Has Online delivery     0
Is delivering now       0
Switch to order menu    0
Price range             0
Aggregate rating        0
Rating color            0
Rating text             0
Votes                   0
dtype: int64

In [19]:
# Separate the data into rows with known Cuisines and unknown Cuisines
data= df # making a copy of the original data set
known_cuisines = data[data['Cuisines'].notna()]
unknown_cuisines = data[data['Cuisines'].isna()]

In [20]:
unknown_cuisines

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
368,17059060,Hillstone,216,Orlando,"215 South Orlando Avenue, Winter Park, FL 32789",Winter Park,"Winter Park, Orlando",-81.36526,28.596682,,...,Dollar($),No,No,No,No,3,4.4,Green,Very Good,1158
418,17142698,Leonard's Bakery,216,Rest of Hawaii,"933 Kapahulu Ave, Honolulu, HI 96816",Kaimuki,"Kaimuki, Rest of Hawaii",-157.813432,21.284586,,...,Dollar($),No,No,No,No,1,4.7,Dark Green,Excellent,707
455,17616465,Tybee Island Social Club,216,Savannah,"1311 Butler Ave, Tybee Island, GA 31328",Tybee Island,"Tybee Island, Savannah",-80.848297,31.99581,,...,Dollar($),No,No,No,No,1,3.9,Yellow,Good,309


In [10]:
# Assuming 'df' is your DataFrame
cuisines_in_albany = df[df['City'] == 'Albany'].sort_values(by='Votes', ascending=False)

# Print the sorted cuisines in Albany
print(cuisines_in_albany['Cuisines'])


99         Seafood, Tapas, Bar Food
94                              NaN
93                          Chinese
90        Pizza, Bar Food, Sandwich
89       Asian, Chinese, Vegetarian
100                  Italian, Pizza
97           Japanese, Steak, Sushi
92     Chinese, Seafood, Vegetarian
88                        Fast Food
96                  American, Steak
95       American, Burger, Sandwich
91           Steak, Tapas, Bar Food
98           Japanese, Steak, Sushi
85                          Mexican
87                              NaN
82             BBQ, Burger, Seafood
84                              NaN
86         Coffee and Tea, Sandwich
83                    American, BBQ
101      American, Breakfast, Diner
Name: Cuisines, dtype: object


In [17]:
# missing at Albany
df.loc[df['Restaurant Name'] == 'Cookie Shoppe', 'Cuisines'] = 'Coffee, Tea, cookie'
df.loc[df['Restaurant Name'] == "Pearly's Famous Country Cookng", 'Cuisines'] = 'American, Breakfast, Diner'
df.loc[df['Restaurant Name'] == "Jimmie's Hot Dogs", 'Cuisines'] = 'Hot Dogs'

In [18]:
df.loc[df['Restaurant Name'] == "Corkscrew Cafe", 'Cuisines'] = 'Coffee and Tea, Sandwich'
df.loc[df['Restaurant Name'] == 'Dovetail', 'Cuisines'] = 'Italian'
df.loc[df['Restaurant Name'] == 'HI Lite Bar & Lounge', 'Cuisines'] = 'American, Breakfast, Diner'
df.loc[df['Restaurant Name'] == 'Dovetail', 'Cuisines'] = 'Italian'


In [16]:
# Assuming 'df' is your DataFrame
cuisines_in_albany = df[df['City'] == 'Miller'].sort_values(by='Votes', ascending=False)

# Print the sorted cuisines in Albany
print(cuisines_in_albany['Cuisines'])


346    NaN
Name: Cuisines, dtype: object


In [6]:
known_cuisines.columns

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Has Table booking',
       'Has Online delivery', 'Is delivering now', 'Switch to order menu',
       'Price range', 'Aggregate rating', 'Rating color', 'Rating text',
       'Votes'],
      dtype='object')

In [8]:
known_cuisines['City'=='Albany']

KeyError: False

In [None]:
filtered_known_cuisines= known_cuisines[[ 'City', 'Votes', 'Cuisines']]

In [None]:
#Encode categorical variables
label_encoder = LabelEncoder()
for column in filtered_known_cuisines.columns:
    if filtered_known_cuisines[column].dtype == type(object):
        filtered_known_cuisines[column] = label_encoder.fit_transform(known_cuisines[column])

In [None]:
filtered_known_cuisines

### Classifier Model

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import accuracy_score, r2_score


In [None]:
# Select the features for the prediction model
# = ['Average Cost for two', 'Has Table booking', 'Has Online delivery', 'Price range']

X = filtered_known_cuisines.drop('Cuisines', axis = 1)
y = filtered_known_cuisines['Cuisines']


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Assuming X is your feature set and y are the labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, 
                             max_depth=100, 
                             min_samples_split=50, 
                             min_samples_leaf=50, 
                             max_features='sqrt', 
                             random_state=42)

# Train the model using the training sets
clf.fit(X_train, y_train)

# Predict the response for test dataset
y_pred = clf.predict(X_test)

# Model Accuracy
print("Accuracy:", accuracy_score(y_test, y_pred))


In [None]:

r2 = r2_score(y_test, y_pred)
print("R_squared :", r2)

In [None]:
# Predict the missing Cuisines in the unknown_cuisines data
unknown_cuisines['Cuisines'] = clf.predict(unknown_cuisines[['City']])

# Transform the predicted numerical Cuisines back to their original categorical values
unknown_cuisines['Cuisines'] = le.inverse_transform(unknown_cuisines['Cuisines'])

# Combine the known_cuisines and unknown_cuisines data back together
data = pd.concat([known_cuisines, unknown_cuisines])
unknown_cuisines

## step3 :

In [None]:
## Encoding categorical variables
label_encoder = LabelEncoder()
categorical_features = ['cuisine', 'price_range']  # Add other categorical features here
for feature in categorical_features:
    df[feature] = label_encoder.fit_transform(df[feature])

# Determine the criteria for restaurant recommendations
## This will depend on the user preferences. For example:
user_preferences = {
    'cuisine': 'Italian',
    'price_range': 'Medium'
}
# Convert user preferences to encoded form
for feature in user_preferences:
    user_preferences[feature] = label_encoder.transform([user_preferences[feature]])

# Implement a content-based filtering approach
## Compute the cosine similarity between user preferences and restaurants
user_vector = list(user_preferences.values())
restaurant_vectors = df[categorical_features].values
similarities = cosine_similarity([user_vector], restaurant_vectors)

# Get the top 5 recommended restaurants
top_5_index = similarities[0].argsort()[-5:][::-1]
recommended_restaurants = df.iloc[top_5_index]

print("Recommended Restaurants:")
print(recommended_restaurants)
