In [4]:
#!/bin/env python
# 2020/01/22
# Sentiment analysis based on
# https://towardsdatascience.com/a-beginners-guide-on-sentiment-analysis-with-rnn-9e100627c02e
# Stanislaw Grams <sjg@fmdx.pl>
# 08-text_mining/02-sentiment_analysis.ipybc
from keras.datasets      import imdb
from keras.preprocessing import sequence
from keras               import Sequential
from keras.layers        import Embedding, LSTM, Dense, Dropout

## load imdb reviews database
VOCABULARY_SIZE = 5000
(X_train, Y_train), (X_test, Y_test) = imdb.load_data (num_words = VOCABULARY_SIZE)
print ('Loaded dataset with {} training samples, {} test samples'.format (len (X_train), len (X_test)))

## all reviews are ought to be the same length
MAX_WORDS = 500
X_train = sequence.pad_sequences (X_train, maxlen=MAX_WORDS)
X_test  = sequence.pad_sequences (X_test, maxlen=MAX_WORDS)

## sequential NN model for sentiment analysis
EMBEDDING_SIZE = 32
model = Sequential ()
model.add (Embedding (VOCABULARY_SIZE, EMBEDDING_SIZE, input_length=MAX_WORDS))
model.add (LSTM (100))
model.add (Dense (1, activation='sigmoid'))

model.summary ()

## compile model
BATCH_SIZE = 64
NUM_EPOCHS = 3
model.compile (loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

X_valid, Y_valid   = X_train[:BATCH_SIZE], Y_train[:BATCH_SIZE]
X_train2, Y_train2 = X_train[BATCH_SIZE:], Y_train[BATCH_SIZE:]

model.fit (X_train2, Y_train2, validation_data=(X_valid, Y_valid), batch_size=BATCH_SIZE, epochs=NUM_EPOCHS)
scores = model.evaluate (X_test, Y_test, verbose=1)
print ('Test accuracy:', scores[1])

Loaded dataset with 25000 training samples, 25000 test samples
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 500, 32)           160000    
_________________________________________________________________
lstm_3 (LSTM)                (None, 100)               53200     
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 101       
Total params: 213,301
Trainable params: 213,301
Non-trainable params: 0
_________________________________________________________________




Train on 24936 samples, validate on 64 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Test accuracy: 0.8508800268173218
