# Build a cuisine recommender

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

data = pd.read_csv('../data/cleaned_cuisines.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,cuisine,soy_sauce,cayenne,scallion,vegetable_oil,onion,sesame_oil,black_pepper,vinegar,...,kumquat,raw_beef,red_algae,chervil,sauerkraut,chayote,champagne_wine,catfish,brussels_sprout,liver
0,0,indian,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3,indian,0,1,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,indian,0,1,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [40]:
cuisines_label_df = data['cuisine']
cuisines_feature_df = data.drop(['Unnamed: 0', 'cuisine'], axis=1)

pick_ingredients = { 'apple', 'pear', 'cherry', 'fenugreek', 'sake', 'soy_sauce', 'cumin', 'sesame_oil' }

print(cuisines_feature_df.columns)
print([(c, item) for c, item in enumerate(cuisines_feature_df.columns) if item in pick_ingredients])

x_train, x_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)

Index(['soy_sauce', 'cayenne', 'scallion', 'vegetable_oil', 'onion',
       'sesame_oil', 'black_pepper', 'vinegar', 'cumin', 'fish',
       ...
       'kumquat', 'raw_beef', 'red_algae', 'chervil', 'sauerkraut', 'chayote',
       'champagne_wine', 'catfish', 'brussels_sprout', 'liver'],
      dtype='object', length=281)
[(0, 'soy_sauce'), (5, 'sesame_oil'), (8, 'cumin'), (24, 'sake'), (27, 'fenugreek'), (78, 'pear'), (95, 'apple'), (214, 'cherry')]


In [41]:
model = SVC(kernel="linear", C=10, random_state=0)
model.fit(x_train, y_train.values.ravel())

y_pred = model.predict(x_test)
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

0.7973311092577148
              precision    recall  f1-score   support

     chinese       0.69      0.76      0.72       238
      indian       0.87      0.92      0.89       233
    japanese       0.76      0.76      0.76       224
      korean       0.84      0.73      0.78       230
        thai       0.83      0.81      0.82       274

    accuracy                           0.80      1199
   macro avg       0.80      0.80      0.80      1199
weighted avg       0.80      0.80      0.80      1199



In [42]:
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

initial_type = [('float_input', FloatTensorType([None, len(x_train.columns)]))]
options = {id(model): {'nocl': True, 'zipmap': False}}

In [43]:
onx = convert_sklearn(model, initial_types=initial_type, options=options)
with open("./app/model.onnx", "wb") as f:
    f.write(onx.SerializeToString())