In [1]:
import numpy as np
from keras.datasets import imdb
from keras.models import Sequential, Model
from keras.layers import Activation, TimeDistributed, Dense, RepeatVector, Embedding
from keras.layers.recurrent import LSTM
from keras.optimizers import Adam, RMSprop
from keras.preprocessing import sequence

Using TensorFlow backend.


In [2]:
top_words = 5000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)

In [3]:
X_train.shape

(25000,)

In [4]:
max_len = 500

In [5]:
X_train = sequence.pad_sequences(X_train, maxlen=max_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_len)

In [6]:
X_train.shape

(25000, 500)

In [14]:
model = Sequential()

# Creating encoder network
model.add(Embedding(top_words, output_dim=100, input_length=max_len, mask_zero=True))
model.add(LSTM(units=1000))
model.add(RepeatVector(max_len))

# Creating decoder network
num_hidden_layers = 3
for _ in range(num_hidden_layers):
    model.add(LSTM(units=1000, return_sequences=True))
model.add(TimeDistributed(Dense(top_words)))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [8]:
def process_data(word_sentences, max_len, word_to_ix):
    # Vectorizing each element in each sequence
    sequences = np.zeros((len(word_sentences), max_len, top_words))
    for i, sentence in enumerate(word_sentences):
        for j, word in enumerate(sentence):
            sequences[i, j, word] = 1.
    return sequences

In [None]:
for k in xrange(0, 5):
    # Shuffling the training data every epoch to avoid local minima
    indices = np.arange(len(X_train))
    np.random.shuffle(indices)
    X = X_train[indices]
    # y = X_train[indices]

    # Training 100 sequences at a time
    for i in xrange(0, len(X), 100):
        if i + 100 >= len(X):
            i_end = len(X)
        else:
            i_end = i + 100
        y_sequences = process_data(X[i:i_end], max_len, top_words)
        print('[INFO] Training model: epoch {}th {}/{} samples'.format(k, i, len(X)))
        model.fit(X[i:i_end], y_sequences, batch_size=100, epochs=1, verbose=2)
    model.save_weights('checkpoint_epoch_{}.hdf5'.format(k))

In [15]:
fc1 = Dense(500, activation='relu')(model.layers[1].output)
fc2 = Dense(200, activation='relu')(fc1)
output = Dense(1, activation='sigmoid')(fc2)
im_model = Model(inputs=model.input, outputs=output)

print(im_model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2_input (InputLaye (None, 500)               0         
_________________________________________________________________
embedding_2 (Embedding)      (None, 500, 100)          500000    
_________________________________________________________________
lstm_5 (LSTM)                (None, 1000)              4404000   
_________________________________________________________________
dense_11 (Dense)             (None, 500)               500500    
_________________________________________________________________
dense_12 (Dense)             (None, 200)               100200    
_________________________________________________________________
dense_13 (Dense)             (None, 1)                 201       
Total params: 5,504,901
Trainable params: 5,504,901
Non-trainable params: 0
_________________________________________________________________


In [16]:
im_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
for k in xrange(0, 5):
    # Shuffling the training data every epoch to avoid local minima
    indices = np.arange(len(X_train))
    np.random.shuffle(indices)
    X = X_train[indices]
    y = y_train[indices]

    # Training 100 sequences at a time
    for i in xrange(0, len(X), 1000):
        if i + 1000 >= len(X):
            i_end = len(X)
        else:
            i_end = i + 1000
        print('[INFO] Training model: epoch {}th {}/{} samples'.format(k, i, len(X)))
        im_model.fit(X[i:i_end], y[i:i_end, batch_size=100, epochs=1, verbose=2)
    im_model.save_weights('im_checkpoint_epoch_{}.hdf5'.format(k))