In [48]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, precision_score, confusion_matrix, classification_report, precision_recall_curve
from sklearn.multiclass import OneVsRestClassifier
import numpy as np

# Load the CSV from current directory
cuisines_df = pd.read_csv("cuisines.csv")

In [50]:
# Prepare features and labels
X = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)
y = cuisines_df['cuisine']

In [52]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [54]:
# Use OneVsRestClassifier wrapper for LogisticRegression to avoid future warnings
lr = LogisticRegression(solver='liblinear')
model = OneVsRestClassifier(lr)
model.fit(X_train, y_train)

In [56]:
# Calculate accuracy on test set
accuracy = model.score(X_test, y_test)
print(f"Accuracy is {accuracy:.4f}")


Accuracy is 0.8286


In [58]:
# Select a test sample index to inspect
sample_index = 50

In [60]:
# Print the ingredients (features with non-zero values) of the sample
ingredients = X_test.iloc[sample_index][X_test.iloc[sample_index] != 0].index.tolist()
print(f"Ingredients: {ingredients}")


Ingredients: ['chive', 'egg', 'fish', 'lemon_juice', 'mushroom', 'salmon', 'soy_sauce', 'vegetable_oil', 'vinegar', 'wine']


In [62]:
# Print the actual cuisine label for this sample
print(f"Cuisine: {y_test.iloc[sample_index]}")

Cuisine: japanese


In [64]:

# Prepare the sample as a DataFrame (keep columns intact to avoid warnings)
test_sample = X_test.iloc[[sample_index]]  # double brackets keep it 2D DataFrame

In [66]:
# Predict probability distribution for the sample
proba = model.predict_proba(test_sample)

In [68]:
# Get classes
classes = model.classes_

In [70]:
# Create a DataFrame to show probabilities nicely
result_df = pd.DataFrame(data=proba, columns=classes)

In [72]:
# Show the top predictions sorted by probability descending
top_predictions = result_df.T.sort_values(by=0, ascending=False)
print(top_predictions.head())

                 0
japanese  0.837174
thai      0.120900
chinese   0.032310
korean    0.008548
indian    0.001067


In [74]:
# Predict the full test set and print classification report
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

     chinese       0.78      0.72      0.75       100
      indian       0.89      0.95      0.92       112
    japanese       0.66      0.67      0.67        58
      korean       0.85      0.86      0.86       162
        thai       0.88      0.86      0.87        58

    accuracy                           0.83       490
   macro avg       0.81      0.81      0.81       490
weighted avg       0.83      0.83      0.83       490

