In [None]:
%config Completer.use_jedi = False

# Load IMDB dataset

In [None]:
import warnings
import numpy as np
from tensorflow.keras.datasets import imdb

warnings.filterwarnings('ignore')

In [None]:
(x_train_all, y_train_all), (x_test, y_test) = imdb.load_data(skip_top=20, num_words=100)

# Remove digits 0, 1 and 2

In [None]:
for i in range(len(x_train_all)):
    x_train_all[i] = [n for n in x_train_all[i] if n > 2]

# Change indexes to words

In [None]:
word2index = imdb.get_word_index()
index2word = {word2index[k]: k for k in word2index}

# Shuffle and split dataset

In [None]:
np.random.seed(42)
random_index = np.random.permutation(25000)

x_train = x_train_all[random_index[:20000]]
y_train = y_train_all[random_index[:20000]]
x_val = x_train_all[random_index[20000:]]
y_val = y_train_all[random_index[20000:]]

# One hot encoding

In [None]:
from tensorflow.keras.preprocessing import sequence

In [None]:
maxlen = 100
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_val = sequence.pad_sequences(x_val, maxlen=maxlen)

In [None]:
from tensorflow.keras.utils import to_categorical

In [None]:
x_train_onehot = to_categorical(x_train)
x_val_onebot = to_categorical(x_val)

# Build model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN

In [None]:
model = Sequential()

In [None]:
model.add(SimpleRNN(32, input_shape=(100, 100)))
model.add(Dense(1, activation='sigmoid'))
model.summary()

# Train a model

In [None]:
model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(x_train_onehot, y_train, batch_size=32, epochs=20, validation_data=(x_val_onebot, y_val))

# Visualize a result

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss')
plt.legend(['train', 'val'])
plt.grid()
plt.show()

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy')
plt.legend(['train', 'val'])
plt.grid()
plt.show()

# Evaluate a model

In [None]:
loss, accuracy = model.evaluate(x_val_onebot, y_val, verbose=2)

# Build new model with embedding

In [None]:
from tensorflow.keras.layers import Embedding, BatchNormalization, Dropout

In [None]:
(x_train_all, y_train_all), (x_test, y_test) = imdb.load_data(num_words=1000, skip_top=20)

In [None]:
for i, sample in enumerate(x_train_all):
    x_train_all[i] = [n for n in sample if n > 2]

In [None]:
random_index = np.random.permutation(np.arange(25000))

x_train = x_train_all[random_index[:20000]]
y_train = y_train_all[random_index[:20000]]
x_val = x_train_all[random_index[20000:]]
y_val = y_train_all[random_index[20000:]]

In [None]:
maxlen = 100
x_train_seq = sequence.pad_sequences(x_train, maxlen)
x_val_seq = sequence.pad_sequences(x_val, maxlen)

In [None]:
model = Sequential()

In [None]:
model.add(Embedding(1000, 32))
model.add(SimpleRNN(8))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.summary()

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(x_train_seq, y_train, epochs=20, batch_size=32, validation_data=(x_val_seq, y_val), verbose=2)

# Visualize a result

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['train', 'val'])
plt.grid()
plt.show()

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['train', 'val'])
plt.grid()
plt.show()

# Evaluate a model

In [None]:
loss, accuracy = model.evaluate(x_val_seq, y_val, verbose=2)