In [47]:
import numpy as np

from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding
from keras.layers import LSTM
from keras.datasets import imdb
from tensorflow.keras.utils import to_categorical
import keras.backend as K

In [41]:
max_features = 20000
# cut texts after this number of words (among top max_features most common words)
maxlen = 80

print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features, seed=1)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

Loading data...
25000 train sequences
25000 test sequences


In [44]:
lens = np.array([len(x) for x in x_train])

print("Average number of words in each review:", lens.mean())

Average number of words in each review: 238.71364


In [52]:
print("One-hot encoding of labels")
y_train = to_categorical(y_train, 2)
y_test = to_categorical(y_test, 2)
y_train.shape

One-hot encoding of labels


(25000, 2)

In [56]:
maxlen = 256

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

Pad sequences (samples x time)
x_train shape: (25000, 256)
x_test shape: (25000, 256)


In [57]:
K.clear_session()

In [58]:
print('Build model...')
model = Sequential()
model.add(Embedding(max_features, 128, input_length=256))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(2, activation='softmax'))

# try using different optimizers and different optimizer configs
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

Build model...
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 256, 128)          2560000   
_________________________________________________________________
lstm_1 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 258       
Total params: 2,691,842
Trainable params: 2,691,842
Non-trainable params: 0
_________________________________________________________________


In [None]:
batch_size = 32

print('Train...')
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=2,
          validation_data=(x_test, y_test))
score, acc = model.evaluate(x_test, y_test)
print('Test score:', score)
print('Test accuracy:', acc)


Train...
Train on 25000 samples, validate on 25000 samples
Epoch 1/2

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [7]:
y_pred = model.predict(x_test[0:10])
y_pred


array([[0.88624954, 0.11375047],
       [0.03575445, 0.9642455 ],
       [0.49988407, 0.50011593],
       [0.3513052 , 0.64869475],
       [0.00361466, 0.99638534],
       [0.29427862, 0.7057214 ],
       [0.44014853, 0.5598515 ],
       [0.98499334, 0.01500667],
       [0.80127096, 0.1987291 ],
       [0.16342324, 0.83657676]], dtype=float32)

In [9]:
y_test[0:10]

array([[1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.]], dtype=float32)

In [10]:
model.evaluate(x_test[0:10], y_test[0:10])



[0.2168009728193283, 0.8999999761581421]