In [0]:
# Initialisation

In [0]:
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, CuDNNGRU
from keras.callbacks import EarlyStopping
from keras.datasets import imdb

In [0]:
# Constants and helpers

In [0]:
num_words = 3000

In [0]:
# Load and massage data

In [34]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words, skip_top=5)  # downloads ~20mb of data

print(len(x_train), x_train[0], y_train[0])

25000 [2, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 2, 66, 2, 2, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 2, 172, 112, 167, 2, 336, 385, 39, 2, 172, 2, 1111, 17, 546, 38, 13, 447, 2, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 2, 1920, 2, 469, 2, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 2, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 2, 2223, 2, 16, 480, 66, 2, 33, 2, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 2, 107, 117, 2, 15, 256, 2, 2, 7, 2, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 2, 2, 1029, 13, 104, 88, 2, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 2, 18, 2, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 2, 18, 51, 36, 28, 224, 92, 25, 104, 2, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 2, 113, 103, 32, 15, 16, 2, 19, 178, 32] 1


In [35]:
# ensure all reviews are padded to the longest review
max_review_length = max(len(review) for review in x_train + x_test)
x_test = pad_sequences(x_test, maxlen=max_review_length)
x_train = pad_sequences(x_train, maxlen=max_review_length)

print(max_review_length)

2697


In [0]:
# Model definition and training

In [0]:
model = Sequential()

model.add(Embedding(num_words, 32))  # Layer, coverts each token to vector in a 32-dim space
model.add(CuDNNGRU(32))  # GRU layer with specific CUDA optimization, does not support dropout
model.add(Dense(1, activation='sigmoid'))  # Fully connected positive/negative layer

model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [36]:
early_stop_callback = EarlyStopping(
    monitor='acc',
    patience=3,
    mode='max'
)

train_history = model.fit(
    x_train,
    y_train,
    batch_size=256,  # aggressive batch size to speed up this test + better estimate gradient
    epochs=10,
    callbacks=[early_stop_callback,]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f16fb2fbf60>

In [0]:
# Scoring and evaluation

In [39]:
score, accuracy = model.evaluate(
    x_test,
    y_test,
    batch_size=256
)
print('test score:', score, ' test accuracy:', accuracy)



In [0]:
epoch_list = list(range(1, len(train_history.history['acc']) + 1))  # values
plt.figure(figsize=(12, 8))
plt.plot(epoch_list, train_history.history['acc'], epoch_list, train_history.history['val_acc'])
plt.legend(('Training Accuracy', 'Validation Accuracy'))

plt.show()