# RNN with Keras

In [32]:
import numpy
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
# fix random seed for reproducibility
numpy.random.seed(7)

# surpress warning
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]="3"

In [33]:
# load dataset
top_words = 5000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)

In [34]:
print('X_train.shape = %s' % str(X_train.shape))
print('length of X_train[0] = %s' % str(len(X_train[0])))
print('length of X_train[10] = %s' % str(len(X_train[10])))

print('y_train.shape = %s' % str(y_train.shape))
print('y_train[0] ~ y_train[4] = %s' % y_train[0:5])

X_train.shape = (25000,)
length of X_train[0] = 218
length of X_train[10] = 450
y_train.shape = (25000,)
y_train[0] ~ y_train[4] = [1 0 0 1 0]


In [35]:
# truncate and pad input sequences
max_review_length = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)

In [36]:
print('X_train.shape = %s' % str(X_train.shape))
print('length of X_train[0] = %s' % str(len(X_train[0])))
print('length of X_train[10] = %s' % str(len(X_train[10])))

X_train.shape = (25000, 500)
length of X_train[0] = 500
length of X_train[10] = 500


In [37]:
# create the model
embedding_vecor_length = 32
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [38]:
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 500, 32)           160000    
_________________________________________________________________
lstm_2 (LSTM)                (None, 100)               53200     
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 101       
Total params: 213,301
Trainable params: 213,301
Non-trainable params: 0
_________________________________________________________________
None


In [39]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=3, batch_size=64)

Train on 25000 samples, validate on 25000 samples
Epoch 1/3
Epoch 2/3
 5824/25000 [=====>........................] - ETA: 2:45 - loss: 0.2814 - acc: 0.8910

KeyboardInterrupt: 