In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from imblearn.over_sampling import SMOTE

In [2]:
df = pd.read_csv("Dataset .csv", encoding="latin1")
df["Cuisines"].fillna("Other", inplace=True)
df["Locality"].fillna("Unknown", inplace=True)
df.fillna(df.mean(numeric_only=True), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Cuisines"].fillna("Other", inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Locality"].fillna("Unknown", inplace=True)


In [3]:
def map_cuisine(c):
    c = str(c).lower()
    if "indian" in c or "biryani" in c or "mughlai" in c or "south indian" in c or "north indian" in c:
        return "Indian"
    elif "chinese" in c or "thai" in c or "asian" in c:
        return "Chinese"
    elif "italian" in c or "pizza" in c or "pasta" in c:
        return "Italian"
    elif "burger" in c or "fast food" in c or "sandwich" in c or "snack" in c:
        return "Fast Food"
    elif "cafe" in c or "bakery" in c or "dessert" in c or "coffee" in c:
        return "Cafe"
    else:
        return "Other"

df["Cuisine_Group"] = df["Cuisines"].apply(map_cuisine)

In [4]:
def cuisine_from_name(name):
    name = str(name).lower()
    if "pizza" in name or "pasta" in name:
        return "Italian"
    elif "biryani" in name or "indian" in name or "mughlai" in name:
        return "Indian"
    elif "burger" in name or "fast food" in name or "sandwich" in name:
        return "Fast Food"
    elif "cafe" in name or "coffee" in name or "bakery" in name or "dessert" in name:
        return "Cafe"
    elif "chinese" in name or "thai" in name or "asian" in name:
        return "Chinese"
    else:
        return "Other"

df["Cuisine_from_Name"] = df["Restaurant Name"].apply(cuisine_from_name)

In [5]:
top_cuisines = df['Cuisines'].str.get_dummies(sep=',')
top_cuisines = top_cuisines.groupby(top_cuisines.columns, axis=1).sum()  
top_10_cuisines = top_cuisines.sum().sort_values(ascending=False).head(10).index
for cuisine in top_10_cuisines:
    df[f"Cuisine_{cuisine}"] = df['Cuisines'].str.contains(cuisine, case=False).astype(int)

  top_cuisines = top_cuisines.groupby(top_cuisines.columns, axis=1).sum()


In [6]:
le_city = LabelEncoder()
df["City"] = le_city.fit_transform(df["City"])

le_locality = LabelEncoder()
df["Locality"] = le_locality.fit_transform(df["Locality"])

le_name = LabelEncoder()
df["Cuisine_from_Name_Encoded"] = le_name.fit_transform(df["Cuisine_from_Name"])

le_cuisine = LabelEncoder()
df["Cuisine_Group_Encoded"] = le_cuisine.fit_transform(df["Cuisine_Group"])

df["Has Table booking"] = df["Has Table booking"].map({"Yes": 1, "No": 0})
df["Has Online delivery"] = df["Has Online delivery"].map({"Yes": 1, "No": 0})

In [7]:
df["Popularity_Score"] = df["Votes"] * df["Aggregate rating"]

In [8]:
feature_columns = ["Country Code", "City", "Locality", "Longitude", "Latitude",
                   "Average Cost for two", "Has Table booking", "Has Online delivery",
                   "Price range", "Votes", "Aggregate rating", "Popularity_Score",
                   "Cuisine_from_Name_Encoded"] + [f"Cuisine_{c}" for c in top_10_cuisines]

X = df[feature_columns]
y = df["Cuisine_Group_Encoded"]

In [9]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [10]:
sm = SMOTE(random_state=42)
X_train_bal, y_train_bal = sm.fit_resample(X_train, y_train)

In [11]:
gb = GradientBoostingClassifier(
    n_estimators=700,
    max_depth=6,
    learning_rate=0.05,
    random_state=42
)
gb.fit(X_train_bal, y_train_bal)

y_pred = gb.predict(X_test)

In [12]:
accuracy = accuracy_score(y_test, y_pred)
print("Optimized Cuisine Classification Accuracy:", accuracy)

print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

Optimized Cuisine Classification Accuracy: 0.8560962846677133

Classification Report:

              precision    recall  f1-score   support

           0       0.76      0.86      0.81       215
           1       0.78      0.82      0.80       241
           2       0.89      0.92      0.91       223
           3       0.98      0.93      0.96       898
           4       0.75      0.71      0.73       129
           5       0.57      0.58      0.57       205

    accuracy                           0.86      1911
   macro avg       0.79      0.80      0.80      1911
weighted avg       0.86      0.86      0.86      1911

Confusion Matrix:
 [[185   5   3   4   0  18]
 [  6 198   4   4   4  25]
 [  0   1 205   0   6  11]
 [ 13  21   4 837   6  17]
 [  6   3   6   2  92  20]
 [ 32  25   8   7  14 119]]


In [13]:
rf = RandomForestClassifier(n_estimators=300, max_depth=12, random_state=42, class_weight='balanced')
rf.fit(X_train_bal, y_train_bal)

importance = pd.DataFrame({
    "Feature": X.columns,
    "Importance": rf.feature_importances_
}).sort_values(by="Importance", ascending=False)

importance

Unnamed: 0,Feature,Importance
13,Cuisine_North Indian,0.20287
19,Cuisine_Fast Food,0.155973
17,Cuisine_Chinese,0.145173
12,Cuisine_from_Name_Encoded,0.069891
5,Average Cost for two,0.066241
21,Cuisine_Cafe,0.042724
20,Cuisine_Bakery,0.039036
14,Cuisine_ Chinese,0.034254
8,Price range,0.024257
15,Cuisine_ Fast Food,0.022529
