# Build Classification Models

In [1]:
import pandas as pd

cuisine_df = pd.read_csv('../data/cleaned_cuisines.csv')

cuisine_df.head()

Unnamed: 0.1,Unnamed: 0,cuisine,almond,angelica,anise,anise_seed,apple,apple_brandy,apricot,armagnac,...,whiskey,white_bread,white_wine,whole_grain_wheat_flour,wine,wood,yam,yeast,yogurt,zucchini
0,0,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,indian,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import (
    accuracy_score, classification_report, precision_score,
    precision_recall_curve, confusion_matrix
)
from sklearn.svm import SVC
import numpy as np

In [3]:
cuisine_label_df = cuisine_df['cuisine']

cuisine_label_df.head()

0    indian
1    indian
2    indian
3    indian
4    indian
Name: cuisine, dtype: object

In [4]:
cuisine_feature_df = cuisine_df.drop(['Unnamed: 0', 'cuisine'], axis=1)

cuisine_feature_df.head()

Unnamed: 0,almond,angelica,anise,anise_seed,apple,apple_brandy,apricot,armagnac,artemisia,artichoke,...,whiskey,white_bread,white_wine,whole_grain_wheat_flour,wine,wood,yam,yeast,yogurt,zucchini
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [5]:
X_train, X_test, y_train, y_test = train_test_split(cuisine_feature_df, cuisine_label_df, test_size=0.3)

In [6]:
log_reg = LogisticRegression(multi_class='ovr', solver='lbfgs')
model = log_reg.fit(X_train, np.ravel(y_train))
accuracy = model.score(X_test, y_test)

print(f'Accuracy: is {accuracy}')

Accuracy: is 0.7748123436196831


In [7]:
print(f'ingredients: {X_test.iloc[50][X_test.iloc[50]!=0].keys()}')
print(f'cuisine: {y_test.iloc[50]}')

ingredients: Index(['carrot', 'cayenne', 'celery', 'chive', 'coriander', 'cumin', 'lentil',
       'mustard', 'onion', 'tomato', 'turmeric', 'vegetable', 'vegetable_oil'],
      dtype='object')
cuisine: indian


In [8]:
test = X_test.iloc[50].values.reshape(-1, 1).T
proba = model.predict_proba(test)
classes = model.classes_
result_df = pd.DataFrame(data=proba, columns=classes)

top_prediction = result_df.T.sort_values(by=[0], ascending=False)
top_prediction.head()



Unnamed: 0,0
indian,0.989186
korean,0.006153
japanese,0.002106
thai,0.001952
chinese,0.000602


In [9]:
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

     chinese       0.72      0.66      0.69       240
      indian       0.92      0.90      0.91       221
    japanese       0.62      0.80      0.70       240
      korean       0.81      0.79      0.80       245
        thai       0.85      0.74      0.79       253

    accuracy                           0.77      1199
   macro avg       0.79      0.78      0.78      1199
weighted avg       0.79      0.77      0.78      1199

