In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report

from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

In [21]:
df = pd.read_csv('cleaned_cuisines.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,almond,angelica,anise,anise_seed,apple,apple_brandy,apricot,armagnac,artemisia,...,white_bread,white_wine,whole_grain_wheat_flour,wine,wood,yam,yeast,yogurt,zucchini,cuisine
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,indian
1,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,indian
2,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,indian
3,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,indian
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,indian


In [6]:
X = df.drop(['Unnamed: 0', 'cuisine'], axis=1)
y = df['cuisine']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((2796, 380), (1199, 380), (2796,), (1199,))

In [22]:
model = SVC(kernel='linear', C=10, probability=True,random_state=0)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print('Accuracy: ', accuracy_score(y_test, y_pred))
print('Precision: ', precision_score(y_test, y_pred, average='weighted'))
print('Classification Report: \n', classification_report(y_test, y_pred))

Accuracy:  0.7881567973311092
Precision:  0.7901489365762009
Classification Report: 
               precision    recall  f1-score   support

     chinese       0.68      0.69      0.68       236
      indian       0.90      0.88      0.89       245
    japanese       0.76      0.76      0.76       231
      korean       0.84      0.76      0.79       242
        thai       0.76      0.85      0.80       245

    accuracy                           0.79      1199
   macro avg       0.79      0.79      0.79      1199
weighted avg       0.79      0.79      0.79      1199



<h3>Convert Model to Onnx </h3>

Make sure to do conversion with the proper tensor number. This dataset as 380 ingredients listed, so we need to notate that number in `FloatTensorType`

Convert using a tensor number of 380.

In [23]:
initial_type = [('float_input', FloatTensorType([None, 380]))]
options = {id(model): {'nocl' : True, 'zipmap': False}}

Create the onx and store as a file model.onnx

In [25]:
onx = convert_sklearn(model, initial_types=initial_type, options=options)
with open("./model.onnx", "wb") as f:
    f.write(onx.SerializeToString())