In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder


resataurant_df= pd.read_csv('Dataset.csv')  


resataurant_df.fillna(resataurant_df.median(numeric_only=True), inplace=True)
resataurant_df.fillna("Unknown", inplace=True) 


binary_columns = ['Has Table booking', 'Has Online delivery', 'Is delivering now']
for col in binary_columns:
    resataurant_df[col] = resataurant_df[col].map({'Yes': 1, 'No': 0})


categorical_columns = ['Country Code', 'City', 'Cuisines', 'Currency', 'Rating color', 'Rating text']
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    resataurant_df[col] = le.fit_transform(resataurant_df[col])
    label_encoders[col] = le


features = ['Country Code', 'City', 'Cuisines', 'Average Cost for two', 'Has Table booking', 
            'Has Online delivery', 'Is delivering now', 'Price range', 'Votes']
X = resataurant_df[features]
Y= resataurant_df['Aggregate rating']


x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)


model = RandomForestRegressor(random_state=42)
model.fit(x_train, y_train)


y_pred = model.predict(x_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")


importance = model.feature_importances_
feature_importance = pd.DataFrame({'Feature': features, 'Importance': importance})
print(feature_importance.sort_values(by='Importance', ascending=False))


Mean Squared Error: 0.10082387222210552
R-squared: 0.9557034075910414
                Feature  Importance
8                 Votes    0.954267
2              Cuisines    0.015237
3  Average Cost for two    0.009963
0          Country Code    0.008271
1                  City    0.007404
5   Has Online delivery    0.002082
7           Price range    0.001705
4     Has Table booking    0.000910
6     Is delivering now    0.000161


In [2]:
from sklearn.preprocessing import LabelEncoder
import numpy as np


city_encoder = LabelEncoder()
city_encoder.classes_ = np.array(['New Delhi', 'Mumbai', 'Bangalore'])  # Example classes
cuisines_encoder = LabelEncoder()
cuisines_encoder.classes_ = np.array(['North Indian', 'South Indian', 'Chinese'])  # Example classes


test_input = [
    [1,              # Country Code
     "New Delhi",    # City
     "North Indian", # Cuisines
     800,            # Average Cost for two
     1,              # Has Table booking
     1,              # Has Online delivery
     0,              # Is delivering now
     2,              # Price range
     2500]           # Votes
]


test_input[0][1] = city_encoder.transform([test_input[0][1]])[0]
test_input[0][2] = cuisines_encoder.transform([test_input[0][2]])[0]


test_input = np.array(test_input, dtype=float)


predicted_rating = model.predict(test_input)
print("Predicted Aggregate Rating:", predicted_rating[0])


Predicted Aggregate Rating: 4.289999999999999


