In [None]:
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import sklearn
import sys
import tensorflow as tf
import time

from tensorflow import keras

print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

In [None]:
import tensorflow_datasets as tfds

dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)

train_dataset, test_dataset = dataset['train'], dataset['test']

In [None]:
print(info)

In [None]:
tokenizer = info.features['text'].encoder
print('vocabulary size: {}'.format(tokenizer.vocab_size))

In [None]:
sample_string = "Tensorflow is cool."
tokenized_string = tokenizer.encode(sample_string)
print('tokenized string is {}'.format(tokenized_string))

original_string = tokenizer.decode(tokenized_string)
print('original string is {}'.format(original_string))

In [None]:
for token in tokenized_string:
    print('{} --> {}'.format(token, tokenizer.decode([token])))

In [None]:
buffer_size = 10000
batch_size = 64

train_dataset = train_dataset.shuffle(buffer_size).padded_batch(batch_size, train_dataset.output_shapes)
test_dataset = test_dataset.shuffle(buffer_size).padded_batch(batch_size, test_dataset.output_shapes)

print(train_dataset.output_shapes)
print(test_dataset.output_shapes)

In [None]:
embedding_dim = 16
batch_size = 512
vocab_size = tokenizer.vocab_size

bi_rnn_model = keras.models.Sequential([
        # 1. define matrix: [vocab_size, embedding_dim]
        # 2. [1,2,3,4..], max_length * embedding_dim
        # 3. batch_size * max_length * embedding_dim
        keras.layers.Embedding(vocab_size, embedding_dim),
        keras.layers.Bidirectional(keras.layers.LSTM(units=32, return_sequences=False)),
        keras.layers.Dense(32, activation='relu'),
        keras.layers.Dense(1, activation='sigmoid')
    ])
bi_rnn_model.summary()
bi_rnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
def plot_learning_curves(history, label, epochs, min_value, max_value):
    data = {}
    data[label] = history.history[label]
    data['val_' + label] = history.history['val_' + label]
    pd.DataFrame(data).plot(figsize=(8, 5))
    plt.grid(True)
    plt.axis([0, epochs, min_value, max_value])
    plt.show()

In [None]:
epochs = 10

bi_rnn_history = bi_rnn_model.fit(
    train_dataset,
    epochs=epochs,
    validation_data=test_dataset)
plot_learning_curves(bi_rnn_history, 'accuracy', epochs, 0, 1)
plot_learning_curves(bi_rnn_history, 'loss', epochs, 0, 1)
bi_rnn_model.evaluate(test_dataset)