## Task: Restaurant Recommendation
###### Objective: Create a restaurant recommendation system based on user preferences.

 Steps:
 
 Preprocess the dataset by handling missing values and encoding categorical variables.
 
 Determine the criteria for restaurant recommendations (e.g., cuisine preference, price range).
 
 Implement a content-based filtering approach where users are recommended restaurants similar to their preferred criteria.
 
 Test the recommendation system by providing sample user preferences and evaluating the quality of recommendations.


In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Load the dataset
df = pd.read_csv('dataset.csv')
df

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.584450,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9546,5915730,Naml۱ Gurme,208,��stanbul,"Kemanke�� Karamustafa Pa��a Mahallesi, R۱ht۱m ...",Karak�_y,"Karak�_y, ��stanbul",28.977392,41.022793,Turkish,...,Turkish Lira(TL),No,No,No,No,3,4.1,Green,Very Good,788
9547,5908749,Ceviz A��ac۱,208,��stanbul,"Ko��uyolu Mahallesi, Muhittin ��st�_nda�� Cadd...",Ko��uyolu,"Ko��uyolu, ��stanbul",29.041297,41.009847,"World Cuisine, Patisserie, Cafe",...,Turkish Lira(TL),No,No,No,No,3,4.2,Green,Very Good,1034
9548,5915807,Huqqa,208,��stanbul,"Kuru�_e��me Mahallesi, Muallim Naci Caddesi, N...",Kuru�_e��me,"Kuru�_e��me, ��stanbul",29.034640,41.055817,"Italian, World Cuisine",...,Turkish Lira(TL),No,No,No,No,4,3.7,Yellow,Good,661
9549,5916112,A���k Kahve,208,��stanbul,"Kuru�_e��me Mahallesi, Muallim Naci Caddesi, N...",Kuru�_e��me,"Kuru�_e��me, ��stanbul",29.036019,41.057979,Restaurant Cafe,...,Turkish Lira(TL),No,No,No,No,4,4.0,Green,Very Good,901


In [3]:
# Preprocessing
## Handle missing values (you might want to handle these differently depending on your dataset)
df.isnull().sum()

Restaurant ID           0
Restaurant Name         0
Country Code            0
City                    0
Address                 0
Locality                0
Locality Verbose        0
Longitude               0
Latitude                0
Cuisines                9
Average Cost for two    0
Currency                0
Has Table booking       0
Has Online delivery     0
Is delivering now       0
Switch to order menu    0
Price range             0
Aggregate rating        0
Rating color            0
Rating text             0
Votes                   0
dtype: int64

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, r2_score


In [5]:
# Separate the data into rows with known Cuisines and unknown Cuisines
data= df # making a copy of the original data set
known_cuisines = data[data['Cuisines'].notna()]
unknown_cuisines = data[data['Cuisines'].isna()]

In [6]:
unknown_cuisines

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
84,17284105,Cookie Shoppe,216,Albany,"115 N Jackson St, Albany, GA 31701",Albany,"Albany, Albany",-84.154,31.5772,,...,Dollar($),No,No,No,No,1,3.4,Orange,Average,34
87,17284211,Pearly's Famous Country Cookng,216,Albany,"814 N Slappey Blvd, Albany, GA 31701",Albany,"Albany, Albany",-84.1759,31.5882,,...,Dollar($),No,No,No,No,1,3.4,Orange,Average,36
94,17284158,Jimmie's Hot Dogs,216,Albany,"204 S Jackson St, Albany, GA 31701",Albany,"Albany, Albany",-84.1534,31.5751,,...,Dollar($),No,No,No,No,1,3.9,Yellow,Good,160
297,17374552,Corkscrew Cafe,216,Gainesville,"51 W Main St, Dahlonega, GA 30533",Dahlonega,"Dahlonega, Gainesville",-83.9858,34.5318,,...,Dollar($),No,No,No,No,3,3.9,Yellow,Good,209
328,17501439,Dovetail,216,Macon,"543 Cherry St, Macon, GA 31201",Macon,"Macon, Macon",-83.627979,32.83641,,...,Dollar($),No,No,No,No,3,3.8,Yellow,Good,102
346,17606621,HI Lite Bar & Lounge,216,Miller,"109 N Broadway Ave, Miller, SD 57362",Miller,"Miller, Miller",-98.9891,44.5158,,...,Dollar($),No,No,No,No,1,3.4,Orange,Average,11
368,17059060,Hillstone,216,Orlando,"215 South Orlando Avenue, Winter Park, FL 32789",Winter Park,"Winter Park, Orlando",-81.36526,28.596682,,...,Dollar($),No,No,No,No,3,4.4,Green,Very Good,1158
418,17142698,Leonard's Bakery,216,Rest of Hawaii,"933 Kapahulu Ave, Honolulu, HI 96816",Kaimuki,"Kaimuki, Rest of Hawaii",-157.813432,21.284586,,...,Dollar($),No,No,No,No,1,4.7,Dark Green,Excellent,707
455,17616465,Tybee Island Social Club,216,Savannah,"1311 Butler Ave, Tybee Island, GA 31328",Tybee Island,"Tybee Island, Savannah",-80.848297,31.99581,,...,Dollar($),No,No,No,No,1,3.9,Yellow,Good,309


In [7]:
known_cuisines= known_cuisines[['Average Cost for two', 'Has Table booking', 'Has Online delivery', 'Price range', 'Aggregate rating', 'City', 'Locality Verbose', 'Cuisines' ]]

In [8]:
known_cuisines

Unnamed: 0,Average Cost for two,Has Table booking,Has Online delivery,Price range,Aggregate rating,City,Locality Verbose,Cuisines
0,1100,Yes,No,3,4.8,Makati City,"Century City Mall, Poblacion, Makati City, Mak...","French, Japanese, Desserts"
1,1200,Yes,No,3,4.5,Makati City,"Little Tokyo, Legaspi Village, Makati City, Ma...",Japanese
2,4000,Yes,No,4,4.4,Mandaluyong City,"Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...","Seafood, Asian, Filipino, Indian"
3,1500,No,No,4,4.9,Mandaluyong City,"SM Megamall, Ortigas, Mandaluyong City, Mandal...","Japanese, Sushi"
4,1500,Yes,No,4,4.8,Mandaluyong City,"SM Megamall, Ortigas, Mandaluyong City, Mandal...","Japanese, Korean"
...,...,...,...,...,...,...,...,...
9546,80,No,No,3,4.1,��stanbul,"Karak�_y, ��stanbul",Turkish
9547,105,No,No,3,4.2,��stanbul,"Ko��uyolu, ��stanbul","World Cuisine, Patisserie, Cafe"
9548,170,No,No,4,3.7,��stanbul,"Kuru�_e��me, ��stanbul","Italian, World Cuisine"
9549,120,No,No,4,4.0,��stanbul,"Kuru�_e��me, ��stanbul",Restaurant Cafe


In [9]:
#Encode categorical variables
label_encoder = LabelEncoder()
for column in known_cuisines.columns:
    if known_cuisines[column].dtype == type(object):
        known_cuisines[column] = label_encoder.fit_transform(known_cuisines[column])

In [10]:
known_cuisines

Unnamed: 0,Average Cost for two,Has Table booking,Has Online delivery,Price range,Aggregate rating,City,Locality Verbose,Cuisines
0,1100,1,0,3,4.8,73,172,920
1,1200,1,0,3,4.5,73,600,1111
2,4000,1,0,4,4.4,75,314,1671
3,1500,0,0,4,4.9,75,873,1126
4,1500,1,0,4,4.8,75,873,1122
...,...,...,...,...,...,...,...,...
9546,80,0,0,3,4.1,139,522,1813
9547,105,0,0,3,4.2,139,557,1824
9548,170,0,0,4,3.7,139,560,1110
9549,120,0,0,4,4.0,139,560,1657


In [11]:
# Select the features for the prediction model
features = ['Average Cost for two', 'Has Table booking', 'Has Online delivery', 'Price range']

X = known_cuisines[features]
y = known_cuisines['Cuisines']


In [12]:
# Split the data into training and testing sets
#, 'Aggregate rating', 'City', 'Locality Verbose'
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Decision Tree model
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)


In [13]:
y_pred = dt.predict(X_test)


In [14]:

r2 = r2_score(y_test, y_pred)
print("R_squared :", r2)

R_squared : -0.5942477545613725


In [None]:
# Predict the missing Cuisines in the unknown_cuisines data
unknown_cuisines['Cuisines'] = dt.predict(unknown_cuisines[features])

# Transform the predicted numerical Cuisines back to their original categorical values
unknown_cuisines['Cuisines'] = le.inverse_transform(unknown_cuisines['Cuisines'])

# Combine the known_cuisines and unknown_cuisines data back together
data = pd.concat([known_cuisines, unknown_cuisines])


## step3 :

In [None]:
## Encoding categorical variables
label_encoder = LabelEncoder()
categorical_features = ['cuisine', 'price_range']  # Add other categorical features here
for feature in categorical_features:
    df[feature] = label_encoder.fit_transform(df[feature])

# Determine the criteria for restaurant recommendations
## This will depend on the user preferences. For example:
user_preferences = {
    'cuisine': 'Italian',
    'price_range': 'Medium'
}
# Convert user preferences to encoded form
for feature in user_preferences:
    user_preferences[feature] = label_encoder.transform([user_preferences[feature]])

# Implement a content-based filtering approach
## Compute the cosine similarity between user preferences and restaurants
user_vector = list(user_preferences.values())
restaurant_vectors = df[categorical_features].values
similarities = cosine_similarity([user_vector], restaurant_vectors)

# Get the top 5 recommended restaurants
top_5_index = similarities[0].argsort()[-5:][::-1]
recommended_restaurants = df.iloc[top_5_index]

print("Recommended Restaurants:")
print(recommended_restaurants)
