In [80]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error, r2_score


In [81]:
df = pd.read_csv('C:/Users/91742/Downloads/Dataset.csv')

In [82]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9551 entries, 0 to 9550
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Restaurant ID         9551 non-null   int64  
 1   Restaurant Name       9551 non-null   object 
 2   Country Code          9551 non-null   int64  
 3   City                  9551 non-null   object 
 4   Address               9551 non-null   object 
 5   Locality              9551 non-null   object 
 6   Locality Verbose      9551 non-null   object 
 7   Longitude             9551 non-null   float64
 8   Latitude              9551 non-null   float64
 9   Cuisines              9542 non-null   object 
 10  Average Cost for two  9551 non-null   int64  
 11  Currency              9551 non-null   object 
 12  Has Table booking     9551 non-null   object 
 13  Has Online delivery   9551 non-null   object 
 14  Is delivering now     9551 non-null   object 
 15  Switch to order menu 

In [83]:

categorical_cols = ['Cuisines', 'City']
numerical_cols = ['Longitude', 'Latitude']
categorical_transformer = OneHotEncoder(handle_unknown ='ignore')
numerical_transformer = SimpleImputer(strategy = 'mean')


In [84]:
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_cols),
        ('num', numerical_transformer, numerical_cols)
    ],
    remainder='drop'
)

In [85]:
X = df.drop(['Aggregate rating', 'Restaurant Name'], axis = 1)
y = df['Aggregate rating']

In [86]:
X_preprocessed = preprocessor.fit_transform(X)

In [87]:
model = LinearRegression()

In [88]:
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', model)
])

In [89]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [90]:
pipeline.fit(X_train, y_train)

In [91]:
X_test_processed = pipeline.named_steps['preprocessor'].transform(X_test)

In [92]:
y_pred = pipeline.predict(X_test)

In [93]:
mse = mean_squared_error(y_test, y_pred)

In [94]:
r2 = r2_score(y_test, y_pred)

In [95]:
print("Mean Squared Error :", mse)

Mean Squared Error : 1.6468555553376414


In [96]:
print("R-squared :", r2)

R-squared : 0.2764601509202256


In [97]:
y_pred 

array([1.75548956, 3.02130778, 2.54422742, ..., 1.66054735, 4.34502313,
       4.69239439])

# Sample User

In [98]:
user_pref = {
    'City' : 'Makati City',
    'Cuisines' : 'Japanese',
    'Price range' : 3
}

In [99]:
filtered_restaurants = df[
    (df['City'] == user_pref['City'])&
    (df['Cuisines'].str.contains(user_pref['Cuisines'], case = False))&
    (df['Price range'] == user_pref['Price range'])
]

In [100]:
filtered_restaurants['Predicted Rating'] = pipeline.predict(filtered_restaurants[X.columns])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_restaurants['Predicted Rating'] = pipeline.predict(filtered_restaurants[X.columns])


# Recommended Restaurants with Predicted Rating

In [101]:
recommended_restaurants = filtered_restaurants.sort_values(by='Predicted Rating', ascending=False)

In [102]:
print("Recommended Restaurants : ") 
print(recommended_restaurants[['Restaurant Name', 'Cuisines', 'Price range', 'Predicted Rating']].head())

Recommended Restaurants : 
    Restaurant Name                    Cuisines  Price range  Predicted Rating
0  Le Petit Souffle  French, Japanese, Desserts            3          4.503999
1  Izakaya Kikufuji                    Japanese            3          4.473559
