In [217]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error, r2_score

In [218]:
df = pd.read_csv('C:/Users/91742/Downloads/Dataset.csv')

In [219]:
df.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


In [220]:

categorical_cols = ['Cuisines', 'City']
numerical_cols = ['Longitude', 'Latitude']
categorical_transformer = OneHotEncoder(handle_unknown ='ignore')
numerical_transformer = SimpleImputer(strategy = 'mean')


In [221]:
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_cols),
        ('num', numerical_transformer, numerical_cols)
    ],
    remainder='drop'
)

In [222]:
X = df.drop(['Aggregate rating', 'Restaurant Name'], axis = 1)
y = df['Aggregate rating']

In [223]:
X_preprocessed = preprocessor.fit_transform(X)

In [224]:
model = LinearRegression()

In [225]:
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', model)
])

In [226]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [227]:
pipeline.fit(X_train, y_train)

In [228]:
X_test_processed = pipeline.named_steps['preprocessor'].transform(X_test)

In [229]:
y_pred = pipeline.predict(X_test)

In [230]:
mse = mean_squared_error(y_test, y_pred)

In [231]:
r2 = r2_score(y_test, y_pred)

In [232]:
print("Mean Squared Error :", mse)

Mean Squared Error : 1.6468555553376414


In [233]:
print("R-squared :", r2)

R-squared : 0.2764601509202256


# Sample User

In [234]:
user_pref = {
    'City' : 'Makati City',
    'Cuisines' : 'Japanese',
    'Price range' : 3
}

In [237]:
filtered_restaurants = df[
    (df['City'] == user_pref['City'])&
    (df['Cuisines'].str.contains(user_pref['Cuisines'], case = False))&
    (df['Price range'] == user_pref['Price range'])
]

In [240]:
filtered_restaurants['Predicted Rating'] = pipeline.predict(filtered_restaurants[X.columns])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_restaurants['Predicted Rating'] = pipeline.predict(filtered_restaurants[X.columns])


In [246]:
recommended_restaurants = filtered_restaurants.sort_values(by='Predicted Rating', ascending=False)

In [256]:
print("Recommended Restaurants : ", recommended_restaurants[['Restaurant Name', 'Cuisines', 'Price range', 'Predicted Rating']].head())

Recommended Restaurants :      Restaurant Name                    Cuisines  Price range  Predicted Rating
0  Le Petit Souffle  French, Japanese, Desserts            3          4.503999
1  Izakaya Kikufuji                    Japanese            3          4.473559
