In [None]:
import json
import numpy as np
import tensorflow as tf

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [None]:
with open("../input/whats-cooking-kernels-only/train.json","r") as f:
    datastore = json.load(f)
print(datastore[0])

In [None]:
ingredients = []
cuisine = []
for i in datastore:
    ingredients.append(i['ingredients'])
    cuisine.append(i['cuisine'])
print(len(ingredients))
print(len(cuisine))
print(ingredients[101])
print(cuisine[101])
print(datastore[101])

In [None]:
oov_token = "<OOV>"
tokenizer = Tokenizer(oov_token = oov_token)

tokenizer.fit_on_texts(ingredients)
total_words = len(tokenizer.word_index)+1

sequences = tokenizer.texts_to_sequences(ingredients)
max_seq_len = max([len(x) for x in sequences])
padded = pad_sequences(sequences,maxlen=max_seq_len,padding='pre')
print(max_seq_len)
print(padded[0:2])

In [None]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(cuisine)
labels

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(padded,labels,test_size=0.2,random_state=42)
print(len(x_train))
print(len(x_test))
print(len(y_train))
print(len(y_test))

In [None]:
tf.keras.backend.clear_session()

model = Sequential([
    Embedding(total_words,64,input_length=max_seq_len),
    Bidirectional(LSTM(20)),
    Dense(30,activation='relu'),
    Dense(labels.max()+1, activation='softmax')
])
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

In [None]:
es = EarlyStopping(patience=5)
cp = ModelCheckpoint("cooking_model.h5",save_best_only=True)

history = model.fit(x_train,y_train,epochs=30,validation_data=(x_test,y_test),callbacks=[es,cp])

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(10,6))
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

plt.plot(acc,label='acc')
plt.plot(val_acc,label='val_acc')
plt.legend()
plt.show()

In [None]:
# from tensorflow.keras import models
# model = models.load_model("./cooking_model.h5")

In [None]:
import pandas as pd
df = pd.read_json("../input/whats-cooking-kernels-only/test.json")
df

In [None]:
ingre = df['ingredients'].values.tolist()
pre_seq = tokenizer.texts_to_sequences(ingre)
pre_pad = pad_sequences(pre_seq,maxlen=max_seq_len,padding='pre')
prediction = model.predict(pre_pad)

In [None]:
df['cuisine'] = label_encoder.inverse_transform( np.argmax(prediction,axis=1) )
df

In [None]:
df[['id','cuisine']].to_csv('submission.csv',index=False)