In [28]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


df = pd.read_csv("/content/Dataset .csv")

#Rename columns
df = df.rename(columns={
    "Country Code": "Country_Code",
    "Price range" : "Price_range",
    "Average Cost for two" : "Average_Cost_for_two",
    "Has Online delivery" : "Has_Online_delivery",
    "Has Table booking" : "Has_Table_booking",
    "Is delivering now" : "Is_delivering_now",
})

df["Cuisines"] = df["Cuisines"].fillna("Unknown")
#print(df.isnull().sum())

#Take only first Cuisine value to evaluate
df["Cuisines"] = df["Cuisines"].apply(lambda x: x.split(",")[0].strip())

top_cuisines = df["Cuisines"].value_counts().nlargest(10).index
df = df[df["Cuisines"].isin(top_cuisines)]

#Encoding Categorical Columns

# 1.Lable Encoding
le = LabelEncoder()
df["Has_Online_delivery"] = le.fit_transform(df["Has_Online_delivery"])
df["Has_Table_booking"] = le.fit_transform(df["Has_Table_booking"])
df["Is_delivering_now"] = le.fit_transform(df["Is_delivering_now"])
df["Cuisines"] = le.fit_transform(df["Cuisines"])

#Spliting Data into Model
df.columns
feature_columns = [
    "Country_Code",
    "Price_range",
    "Average_Cost_for_two",
    "Votes",
    "Has_Online_delivery",
    "Has_Table_booking",
    "Is_delivering_now",
]

X = df[feature_columns]
y = df["Cuisines"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size = 0.2, random_state=42
)

#Random Forest Model
model = RandomForestClassifier(
    n_estimators=300,
    class_weight="balanced",
    random_state=42
)
model.fit(X_train, y_train)
predictions = model.predict(X_test)
print(predictions)


#Accuracy
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)

#Classification Report
print("Classification Report:\n",classification_report(y_test, predictions))

#Confusion Matrix
print("Confusion Matrix:\n", confusion_matrix(y_test, predictions))


[0 6 7 ... 7 7 7]
Accuracy: 0.30791161796151106
Classification Report:
               precision    recall  f1-score   support

           0       0.51      0.57      0.54       227
           1       0.16      0.18      0.17       469
           2       0.20      0.20      0.20       490
           3       0.19      0.19      0.19       693
           4       0.13      0.11      0.12       192
           5       0.18      0.24      0.21       539
           6       0.15      0.22      0.18       198
           7       0.53      0.43      0.47      2395
           8       0.07      0.09      0.08       210
           9       0.19      0.27      0.22       199

    accuracy                           0.31      5612
   macro avg       0.23      0.25      0.24      5612
weighted avg       0.34      0.31      0.32      5612

Confusion Matrix:
 [[ 129    5   16    4    4   15    0   49    5    0]
 [   7   84   34   68    2   94   39   84   31   26]
 [  32   51   97   47   11   31    0  203   