In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from keras.models import model_from_json
from tensorflow.keras.models import load_model

Using TensorFlow backend.


In [4]:
file="data/train.json"
data = pd.read_json(file, orient='columns')
data.head()

Unnamed: 0,cuisine,id,ingredients
0,greek,10259,"[romaine lettuce, black olives, grape tomatoes..."
1,southern_us,25693,"[plain flour, ground pepper, salt, tomatoes, g..."
2,filipino,20130,"[eggs, pepper, salt, mayonaise, cooking oil, g..."
3,indian,22213,"[water, vegetable oil, wheat, salt]"
4,indian,13162,"[black pepper, shallots, cornflour, cayenne pe..."


In [5]:
cuisine_list = data['cuisine']
cuisine_compilation = []
for cuisine in cuisine_list:
    cuisine_compilation.append(cuisine)
      
cuis_unique = list(set(cuisine_compilation))


In [6]:
label_encoder_cuis = LabelEncoder()
label_encoder_cuis.fit(cuis_unique)
encoded_cuis = label_encoder_cuis.transform(data['cuisine'])

In [7]:
one_hot_cuis = to_categorical(encoded_cuis)
one_hot_cuis[0]

array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0.], dtype=float32)

In [8]:
data["one_hot_cuisine"] = list(one_hot_cuis)

In [21]:
# data.head()

In [10]:
data = data[["cuisine", "one_hot_cuisine", "ingredients"]]
data.head()

Unnamed: 0,cuisine,one_hot_cuisine,ingredients
0,greek,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...","[romaine lettuce, black olives, grape tomatoes..."
1,southern_us,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[plain flour, ground pepper, salt, tomatoes, g..."
2,filipino,"[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...","[eggs, pepper, salt, mayonaise, cooking oil, g..."
3,indian,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[water, vegetable oil, wheat, salt]"
4,indian,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[black pepper, shallots, cornflour, cayenne pe..."


In [11]:
ingredients = data.loc[:,'ingredients']

i_map = {}
i_list = []
counter = 0
for lists in ingredients:
    for items in lists:
        if items not in i_map:
            i_list.append(items)
            i_map[items] = counter
            counter = counter + 1

ingredients_encodings = []
for lists in ingredients:
    encoding = [0]*len(i_map)
    for items in lists:
        encoding[i_map[items]] = 1
    ingredients_encodings.append(encoding)


In [12]:
data["one_hot_ingredients"] = ingredients_encodings

In [13]:
data.head()

Unnamed: 0,cuisine,one_hot_cuisine,ingredients,one_hot_ingredients
0,greek,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...","[romaine lettuce, black olives, grape tomatoes...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, ..."
1,southern_us,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[plain flour, ground pepper, salt, tomatoes, g...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, ..."
2,filipino,"[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...","[eggs, pepper, salt, mayonaise, cooking oil, g...","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ..."
3,indian,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[water, vegetable oil, wheat, salt]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ..."
4,indian,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[black pepper, shallots, cornflour, cayenne pe...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ..."


In [14]:
one_hot_cuisine = data['one_hot_cuisine']
one_hot_ingredients = data['one_hot_ingredients']

In [15]:

ing_train, ing_test,cuis_train, cuis_test = train_test_split(one_hot_ingredients, one_hot_cuisine, random_state=1)

In [16]:
cuis_train = np.array(cuis_train.tolist())
cuis_test = np.array(cuis_test.tolist())
ing_train = np.array(ing_train.tolist())
ing_test = np.array(ing_test.tolist())

In [17]:
deep_model = Sequential()
deep_model.add(Dense(units=20, activation='relu', input_dim=6714))
deep_model.add(Dense(units=15, activation='relu'))
deep_model.add(Dense(units=10, activation='relu'))
deep_model.add(Dense(units=20, activation='softmax'))

Instructions for updating:
Colocations handled automatically by placer.


In [18]:
deep_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 20)                134300    
_________________________________________________________________
dense_1 (Dense)              (None, 15)                315       
_________________________________________________________________
dense_2 (Dense)              (None, 10)                160       
_________________________________________________________________
dense_3 (Dense)              (None, 20)                220       
Total params: 134,995
Trainable params: 134,995
Non-trainable params: 0
_________________________________________________________________


In [19]:
deep_model.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

deep_model.fit(
    ing_train,
    cuis_train,
    epochs=20,
    shuffle=True,
    verbose=2
)

Instructions for updating:
Use tf.cast instead.
Epoch 1/20
 - 4s - loss: 1.6149 - acc: 0.5290
Epoch 2/20
 - 4s - loss: 0.9320 - acc: 0.7282
Epoch 3/20
 - 4s - loss: 0.7511 - acc: 0.7785
Epoch 4/20
 - 5s - loss: 0.6501 - acc: 0.8118
Epoch 5/20
 - 5s - loss: 0.5773 - acc: 0.8326
Epoch 6/20
 - 5s - loss: 0.5196 - acc: 0.8508
Epoch 7/20
 - 6s - loss: 0.4720 - acc: 0.8644
Epoch 8/20
 - 5s - loss: 0.4316 - acc: 0.8755
Epoch 9/20
 - 4s - loss: 0.3958 - acc: 0.8863
Epoch 10/20
 - 5s - loss: 0.3669 - acc: 0.8932
Epoch 11/20
 - 5s - loss: 0.3387 - acc: 0.9016
Epoch 12/20
 - 5s - loss: 0.3138 - acc: 0.9086
Epoch 13/20
 - 5s - loss: 0.2922 - acc: 0.9162
Epoch 14/20
 - 5s - loss: 0.2707 - acc: 0.9231
Epoch 15/20
 - 4s - loss: 0.2528 - acc: 0.9280
Epoch 16/20
 - 4s - loss: 0.2360 - acc: 0.9329
Epoch 17/20
 - 4s - loss: 0.2200 - acc: 0.9374
Epoch 18/20
 - 4s - loss: 0.2070 - acc: 0.9404
Epoch 19/20
 - 4s - loss: 0.1928 - acc: 0.9451
Epoch 20/20
 - 4s - loss: 0.1818 - acc: 0.9489


<tensorflow.python.keras.callbacks.History at 0x1ee002a6320>

In [20]:
deep_model_loss, deep_model_accuracy = deep_model.evaluate(
    ing_test, cuis_test, verbose=2)
print(
    f"Deep Neural Network - Loss: {deep_model_loss}, Accuracy: {deep_model_accuracy}")

 - 1s - loss: 1.7483 - acc: 0.7080
Deep Neural Network - Loss: 1.7483013000396213, Accuracy: 0.7079645991325378


In [None]:
## Save the model
# deep_model.save("cuisine_deep_model_trained.h5")

In [None]:
from xgboost import XGBClassifier
from sklearn.feature_extraction import DictVectorizer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score

clf = XGBClassifier()
vec = DictVectorizer()
pipeline = make_pipeline(vec, clf)

def evaluate(_clf):
    scores = cross_val_score(_clf, all_xs, all_ys, scoring='accuracy', cv=10)
    print('Accuracy: {:.3f} ± {:.3f}'.format(np.mean(scores), 2 * np.std(scores)))
    _clf.fit(train_xs, train_ys)  # so that parts of the original pipeline are fitted

evaluate(pipeline)