# Build Classification Model

In [19]:
import pandas as pd
cuisines_df = pd.read_csv("../data/cleaned_cuisines.csv")
cuisines_df.head()

Unnamed: 0.1,Unnamed: 0,cuisine,almond,angelica,anise,anise_seed,apple,apple_brandy,apricot,armagnac,...,whiskey,white_bread,white_wine,whole_grain_wheat_flour,wine,wood,yam,yeast,yogurt,zucchini
0,0,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,indian,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [20]:
cuisines_label_df = cuisines_df['cuisine']
cuisines_label_df.head()

0    indian
1    indian
2    indian
3    indian
4    indian
Name: cuisine, dtype: object

In [21]:
cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)
cuisines_feature_df.head()

Unnamed: 0,almond,angelica,anise,anise_seed,apple,apple_brandy,apricot,armagnac,artemisia,artichoke,...,whiskey,white_bread,white_wine,whole_grain_wheat_flour,wine,wood,yam,yeast,yogurt,zucchini
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [22]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve
import numpy as np

In [23]:
X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)

In [30]:
C = 10
# Create different classifiers.
classifiers = {
    'KNN classifier': KNeighborsClassifier(C),
    'Linear SVC': SVC(kernel='linear', C=C, probability=True,random_state=0),
    'SVC': SVC(),
    'RFST': RandomForestClassifier(n_estimators=100),
    'ADA': AdaBoostClassifier(n_estimators=100)
}

In [31]:
n_classifiers = len(classifiers)

for index, (name, classifier) in enumerate(classifiers.items()):
    classifier.fit(X_train, np.ravel(y_train))

    y_pred = classifier.predict(X_test.values)
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy (train) for %s: %0.1f%% " % (name, accuracy * 100))
    print(classification_report(y_test,y_pred))

  break


Accuracy (train) for KNN classifier: 72.6% 
              precision    recall  f1-score   support

     chinese       0.71      0.65      0.68       225
      indian       0.81      0.80      0.81       237
    japanese       0.60      0.83      0.69       236
      korean       0.91      0.58      0.71       242
        thai       0.72      0.76      0.74       259

    accuracy                           0.73      1199
   macro avg       0.75      0.73      0.73      1199
weighted avg       0.75      0.73      0.73      1199



  break


Accuracy (train) for Linear SVC: 78.1% 
              precision    recall  f1-score   support

     chinese       0.67      0.72      0.69       225
      indian       0.86      0.90      0.88       237
    japanese       0.76      0.69      0.73       236
      korean       0.81      0.74      0.78       242
        thai       0.79      0.85      0.82       259

    accuracy                           0.78      1199
   macro avg       0.78      0.78      0.78      1199
weighted avg       0.78      0.78      0.78      1199



  break


Accuracy (train) for SVC: 82.3% 
              precision    recall  f1-score   support

     chinese       0.74      0.73      0.73       225
      indian       0.88      0.95      0.91       237
    japanese       0.81      0.77      0.79       236
      korean       0.87      0.80      0.83       242
        thai       0.81      0.86      0.83       259

    accuracy                           0.82      1199
   macro avg       0.82      0.82      0.82      1199
weighted avg       0.82      0.82      0.82      1199



  break


Accuracy (train) for RFST: 84.2% 
              precision    recall  f1-score   support

     chinese       0.80      0.78      0.79       225
      indian       0.88      0.95      0.91       237
    japanese       0.80      0.80      0.80       236
      korean       0.89      0.83      0.86       242
        thai       0.85      0.86      0.85       259

    accuracy                           0.84      1199
   macro avg       0.84      0.84      0.84      1199
weighted avg       0.84      0.84      0.84      1199



  break


Accuracy (train) for ADA: 68.1% 
              precision    recall  f1-score   support

     chinese       0.59      0.35      0.44       225
      indian       0.81      0.81      0.81       237
    japanese       0.62      0.66      0.64       236
      korean       0.65      0.80      0.72       242
        thai       0.70      0.76      0.73       259

    accuracy                           0.68      1199
   macro avg       0.67      0.68      0.67      1199
weighted avg       0.68      0.68      0.67      1199

