# Build Classification Model

In [2]:
import pandas as pd
cuisines_df = pd.read_csv("../data/cleaned_cuisines.csv")
cuisines_df.head()

Unnamed: 0.1,Unnamed: 0,cuisine,soy_sauce,cayenne,scallion,vegetable_oil,onion,sesame_oil,black_pepper,vinegar,...,kumquat,raw_beef,red_algae,chervil,sauerkraut,chayote,champagne_wine,catfish,brussels_sprout,liver
0,0,indian,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3,indian,0,1,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,indian,0,1,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
cuisines_label_df = cuisines_df['cuisine']
cuisines_label_df.head()

0    indian
1    indian
2    indian
3    indian
4    indian
Name: cuisine, dtype: object

In [4]:
cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)
cuisines_feature_df.head()

Unnamed: 0,soy_sauce,cayenne,scallion,vegetable_oil,onion,sesame_oil,black_pepper,vinegar,cumin,fish,...,kumquat,raw_beef,red_algae,chervil,sauerkraut,chayote,champagne_wine,catfish,brussels_sprout,liver
0,0,1,0,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,1,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,1,0,0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [38]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, ExtraTreesClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, precision_score, confusion_matrix, classification_report, precision_recall_curve
import numpy as np

In [8]:
x_train, x_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)

In [44]:
C = 10

# Create different classifiers.
classifiers = {
    'Native Linear SVC': LinearSVC(max_iter=6000),
    'Linear SVC': SVC(kernel='linear', C=C, probability=True, random_state=0),
    'SVC': SVC(C=C),
    'KNN classifier': KNeighborsClassifier(C),
    'RFST': RandomForestClassifier(n_estimators=100),
    'ADA': AdaBoostClassifier(n_estimators=100),
    'Gradient Booster classifier': GradientBoostingClassifier(),
    'Extra Tree Classifier': ExtraTreesClassifier()
}

In [45]:
for name, classifier in classifiers.items():
    classifier.fit(x_train, y_train)

    pred = classifier.predict(x_test)

    print(name)
    print(accuracy_score(y_test, pred))
    print(classification_report(y_test, pred))



Native Linear SVC
0.79232693911593
              precision    recall  f1-score   support

     chinese       0.73      0.69      0.71       246
      indian       0.89      0.89      0.89       247
    japanese       0.80      0.71      0.75       241
      korean       0.81      0.81      0.81       216
        thai       0.74      0.86      0.80       249

    accuracy                           0.79      1199
   macro avg       0.79      0.79      0.79      1199
weighted avg       0.79      0.79      0.79      1199

Linear SVC
0.7748123436196831
              precision    recall  f1-score   support

     chinese       0.67      0.71      0.69       246
      indian       0.88      0.82      0.85       247
    japanese       0.79      0.71      0.75       241
      korean       0.84      0.77      0.80       216
        thai       0.73      0.86      0.79       249

    accuracy                           0.77      1199
   macro avg       0.78      0.77      0.78      1199
weighted avg