In [None]:
# Step 1: Load the dataset
import pandas as pd
import numpy as np
data = pd.read_csv('swiggy_final_versionV2.csv')

In [None]:
print(data.columns)

Index(['id', 'name', 'city', 'rating', 'rating_count', 'cost', 'cuisine',
       'lic_no', 'link', 'address', 'menu', 'weekly_avg_salary',
       'no_years_open'],
      dtype='object')


In [None]:
# Step 2: Data Preprocessing
columns_to_remove = ['id', 'name', 'lic_no', 'link', 'address', 'menu']
data = data.drop(columns_to_remove, axis=1)
data['cuisine'] = data['cuisine'].astype('category')
data['city'] = data['city'].astype('category')

In [None]:
# Split the dataset into training and testing sets
from sklearn.model_selection import train_test_split
X = data.drop('rating', axis=1)
X = pd.get_dummies(X, columns=['cuisine', 'city'])
y = data['rating']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
#print(X_train)
#print(y_train)
print(len(X_train.columns))

2945


In [None]:
# Step 3: Define XGBoost parameters
from sklearn.tree import DecisionTreeRegressor
params = {'criterion': 'mse', 'max_depth': 5, 'min_samples_split': 2, 'min_samples_leaf': 1}

In [None]:

# Step 4: Train the model
dt = DecisionTreeRegressor(**params)
dt.fit(X_train, y_train)



In [None]:

# Step 5: Evaluate the model
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
y_pred = dt.predict(X_test)

print('MAE:', mean_absolute_error(y_test, y_pred))
print('RMSE:', mean_squared_error(y_test, y_pred, squared=False))
print('R-squared:', r2_score(y_test, y_pred))

MAE: 0.21282522809262056
RMSE: 0.42590335215887926
R-squared: 0.8660202975653544


In [None]:

# Step 6: Predict ratings
# Use the trained XGBoost model to predict the rating of a restaurant based on its features
new_data = pd.DataFrame({'city': ['Abohar'], 'rating_count': [1.0], 'cost': [200.0], 'cuisine': ['Beverages,Pizzas'], 'weekly_avg_salary': [4529.75], 'no_years_open': [1]})
new_data_encoded = pd.get_dummies(new_data, columns=['cuisine', 'city'])
new_data_encoded = new_data_encoded.reindex(columns=X_train.columns, fill_value=0)
rating = dt.predict(new_data_encoded)
print('Predicted Rating:', rating[0])

Predicted Rating: 1.6428380192028247
