In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

# Load the CSV file
df = pd.read_csv('cuisines.csv')

In [8]:
# Feature and label extraction
# Assumes 'cuisine' is the label column
X = df.drop('cuisine', axis=1)
y = df['cuisine']

In [10]:
# Encode the target labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)


In [12]:
# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.3, random_state=42)

In [14]:
# Define classifiers
models = {
    "KNN": KNeighborsClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=2000),  # Increased iterations
    "SVM": SVC(probability=True),
    "Random Forest": RandomForestClassifier(),
    "AdaBoost": AdaBoostClassifier(algorithm='SAMME')  # Future-proofing
}

In [16]:
# Train and evaluate each model
for name, model in models.items():
    print(f"\n====================== {name} ======================")
    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    acc = accuracy_score(y_test, preds)
    prec = precision_score(y_test, preds, average='weighted', zero_division=0)

    print(f"Accuracy: {acc:.4f}")
    print(f"Precision (weighted): {prec:.4f}")
    print("Classification Report:")
    print(classification_report(y_test, preds, target_names=le.classes_, zero_division=0))
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, preds))


Accuracy: 0.7361
Precision (weighted): 0.7501
Classification Report:
              precision    recall  f1-score   support

     chinese       0.61      0.63      0.62       145
      indian       0.82      0.90      0.86       177
    japanese       0.70      0.50      0.58        88
      korean       0.72      0.88      0.79       229
        thai       0.96      0.46      0.62        96

    accuracy                           0.74       735
   macro avg       0.76      0.67      0.69       735
weighted avg       0.75      0.74      0.73       735

Confusion Matrix:
[[ 92  11   5  36   1]
 [  8 159   0   9   1]
 [ 14   5  44  25   0]
 [ 16   2   9 202   0]
 [ 21  17   5   9  44]]

Accuracy: 0.8054
Precision (weighted): 0.8042
Classification Report:
              precision    recall  f1-score   support

     chinese       0.71      0.76      0.73       145
      indian       0.90      0.92      0.91       177
    japanese       0.67      0.59      0.63        88
      korean       0