In [1]:
# https://github.com/jskDr/keraspp
# nb_ex5_1_lstm_imdb_cl.ipynb

# IMDB dataset : http://ai.stanford.edu/~amaas/data/sentiment/

"""imdb is a dataset in which the words in the review sentence 
are sorted in order of appearance frequency, and 
the sequence that converts the word to an integer is x 
and the evaluation is positive or negative.
y is recorded as 1 (positive) or 2 (negative).
Our goal is to predict y with x.
"""
#-*- coding: utf-8 -*-
 
import numpy as np
import keras
from keras import layers, models, datasets
from keras import backend as K
from keras.utils import np_utils
from keras.preprocessing import sequence
import matplotlib.pyplot as plt
from sklearn import model_selection, metrics
from sklearn.preprocessing import MinMaxScaler
import os

"""We call the imdb dataset with datasets.imdb.load_data(). 
If (num_words = max_features) is set, only num_words word 
types will be fetched(imported,loaded) in the order of the most frequently 
occurring words(the order of appearance frequency), 
otherwise they will be replaced with oov_char values.

preprocessing.sequence.pad_sequences serves to equalize the lengths of the sequences.
"""
class Data:
    def __init__(self, max_features=20000, maxlen=80):
        (x_train, y_train), (x_test, y_test) = datasets.imdb.load_data(
            num_words=max_features)
        x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
        x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
        self.x_train, self.y_train = x_train, y_train
        self.x_test, self.y_test = x_test, y_test
 
class RNN_LSTM(models.Model):
    def __init__(self, max_features, maxlen):
        x = layers.Input((maxlen,))  # 80,20000 == length,features
        h = layers.Embedding(max_features, 128)(x)  # features 20000(0~19999) ===> 128 embedding 
        h = layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2)(h)
        y = layers.Dense(1, activation='sigmoid')(h)
        super().__init__(x, y)
 
        self.compile(loss='binary_crossentropy',
            optimizer='adam', metrics=['accuracy'])
 
class Machine:
    def __init__(self, max_features=20000, maxlen=80):
        self.data = Data(max_features, maxlen)
        self.model = RNN_LSTM(max_features, maxlen)
 
    def run(self, epochs=3, batch_size=32):
        data = self.data
        model = self.model
 
        print('Training stage')
        model.fit(data.x_train, data.y_train,
            batch_size=batch_size, epochs=epochs,
            validation_data=(data.x_test, data.y_test),
            verbose=1)
 
        loss, acc = model.evaluate(data.x_test, data.y_test,
            batch_size=batch_size, verbose=2)
 
        print('Test performance: accuracy={0}, loss={1}'.format(acc, loss))
 
def main():
    m = Machine()
    m.run()
 
if __name__ == '__main__':
    main()

Using TensorFlow backend.


Training stage
Train on 25000 samples, validate on 25000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Test performance: accuracy=0.82152, loss=0.4127060862159729
