In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from scipy.spatial.distance import cdist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GRU, Embedding
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [6]:
import imdb

In [7]:
imdb.maybe_download_and_extract()

- Download progress: 100.0%
Download finished. Extracting files.
Done.


In [22]:
X_train, y_train = imdb.load_data(train=True)
X_test, y_test = imdb.load_data(train=False)

In [56]:
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)

In [23]:
data_text = X_train + X_test

In [24]:
num_words = 1000

In [25]:
tokenizer = Tokenizer(num_words=num_words)
tokenizer.fit_on_texts(data_text)

In [None]:
tokenizer.word_index

In [30]:
sequence_train = tokenizer.texts_to_sequences(X_train)
sequence_test = tokenizer.texts_to_sequences(X_test)

In [31]:
num_tokens = [len(tokens) for tokens in sequence_train + sequence_test]
num_tokens = np.array(num_tokens)

In [32]:
print(num_tokens)

[ 175  123 1058 ...  162  101  358]


In [36]:
max_tokens = np.mean(num_tokens) + 2 * np.std(num_tokens)
max_tokens = int(max_tokens)
print(np.sum(num_tokens < max_tokens) / len(num_tokens))

0.94542


In [37]:
pad_sequence_train = pad_sequences(sequence_train, maxlen=max_tokens,
                                   padding='pre', truncating='pre')
pad_sequence_test = pad_sequences(sequence_test, maxlen=max_tokens,
                                   padding='pre', truncating='pre')

In [None]:
pad_sequence_train[0]

In [50]:
model = Sequential()
embedding_size = 32

In [51]:
model.add(Embedding(input_dim=num_words,
                    output_dim=embedding_size,
                    input_length=max_tokens,
                    name='layer_embedding'))

In [52]:
model.add(GRU(units=64, return_sequences=True))
model.add(GRU(units=32, return_sequences=True))
model.add(GRU(units=16))
model.add(Dense(1, activation='sigmoid'))
optimizer = Adam(lr=1e-3)
model.compile(loss='binary_crossentropy',
              optimizer=optimizer,
              metrics=['acc'])

In [53]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
layer_embedding (Embedding)  (None, 435, 32)           32000     
_________________________________________________________________
gru_9 (GRU)                  (None, 435, 64)           18816     
_________________________________________________________________
gru_10 (GRU)                 (None, 435, 32)           9408      
_________________________________________________________________
gru_11 (GRU)                 (None, 16)                2400      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 17        
Total params: 62,641
Trainable params: 62,641
Non-trainable params: 0
_________________________________________________________________


In [54]:
import keras

callbacks = [
             keras.callbacks.TensorBoard(
                 log_dir = 'my_log_dir',
                 histogram_freq=1,
                 embeddings_freq=1
             ),
             keras.callbacks.EarlyStopping(
                 monitor='val_acc',
                 patience=2
             )
]

In [57]:
model.fit(pad_sequence_train, y_train,
          validation_split=0.05, callbacks=callbacks, epochs=10, batch_size=32)

Epoch 1/10
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 2/10
Epoch 3/10


<tensorflow.python.keras.callbacks.History at 0x7f92f00eb6d8>

In [58]:
result = model.evaluate(pad_sequence_test, y_test)



In [67]:
print('Accuracy: %.2f' %(result[1]*100))

Accuracy: 87.20
