In [4]:
import numpy as np

import tensorflow_datasets as tfds
import tensorflow as tf

import matplotlib.pyplot as plt


def plot_graphs(history, metric):
    plt.plot(history.history[metric])
    plt.plot(history.history['val_'+metric], '')
    plt.xlabel("Epochs")
    plt.ylabel(metric)
    plt.legend([metric, 'val_'+metric])

In [35]:
dataset, info = tfds.load('imdb_reviews', with_info=True, as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']
train_dataset.element_spec

(TensorSpec(shape=(), dtype=tf.string, name=None),
 TensorSpec(shape=(), dtype=tf.int64, name=None))

In [36]:
for example, label in train_dataset.take(1):
  print('text: ', example.numpy())
  print('label: ', label.numpy())

text:  b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it."
label:  0


In [37]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64

In [38]:
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [39]:
for example, label in train_dataset.take(1):
  print('texts: ', example.numpy()[:3])
  print()
  print('labels: ', label.numpy()[:3])

texts:  [b"This picture for me scores very highly as it is a hugely enjoyable and amusing spoof of Alien Invaders taking over a town and many of its' men folk.<br /><br />The town and the players are all decked out in sort of 1950's style and the whole movie has a deliberate tacky and kitschy feel to it. Some of the scenes are hilarious like with the birth of an alien creature.<br /><br />All the actors give full blooded and serious performances which makes the film even funnier and the special effects and Aliens are at least it seems to me intentionally 3rd rate to add to the amusement.<br /><br />These type of films often deserve a cult following:<br /><br />8/10."
 b'OK me and a friend rented this a few days ago because we like to keep track of b-movies since we do them ourselves. Anyway, the cover contained blood and weird looking naked girls with fangs and stuff... and Tom Savini! There is just no way this movie can fail! Right? wrong!! It just seems like such a waste! There was r

In [25]:
VOCAB_SIZE = 1000
encoder = tf.keras.layers.TextVectorization(max_tokens=VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text, label: text))

In [40]:
vocab = np.array(encoder.get_vocabulary())
vocab[:20]

array(['', '[UNK]', 'the', 'and', 'a', 'of', 'to', 'is', 'in', 'it', 'i',
       'this', 'that', 'br', 'was', 'as', 'for', 'with', 'movie', 'but'],
      dtype='<U14')

In [43]:
encoded_example = encoder(example)[:3].numpy()
encoded_example

array([[ 11, 433,  16, ...,   0,   0,   0],
       [  1,   1, 101, ...,   0,   0,   0],
       [597,  70,   3, ...,   0,   0,   0]], dtype=int64)

In [44]:
for n in range(3):
  print("Original: ", example[n].numpy())
  print("Round-trip: ", " ".join(vocab[encoded_example[n]]))
  print()

Original:  b"This picture for me scores very highly as it is a hugely enjoyable and amusing spoof of Alien Invaders taking over a town and many of its' men folk.<br /><br />The town and the players are all decked out in sort of 1950's style and the whole movie has a deliberate tacky and kitschy feel to it. Some of the scenes are hilarious like with the birth of an alien creature.<br /><br />All the actors give full blooded and serious performances which makes the film even funnier and the special effects and Aliens are at least it seems to me intentionally 3rd rate to add to the amusement.<br /><br />These type of films often deserve a cult following:<br /><br />8/10."
Round-trip:  this picture for me [UNK] very highly as it is a [UNK] enjoyable and [UNK] [UNK] of [UNK] [UNK] taking over a town and many of its men [UNK] br the town and the [UNK] are all [UNK] out in sort of [UNK] style and the whole movie has a [UNK] [UNK] and [UNK] feel to it some of the scenes are hilarious like with

In [50]:
model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=64,
        # Use masking to handle the variable sequence lengths
        mask_zero=True),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

In [51]:
print([layer.supports_masking for layer in model.layers])

[False, True, True, True, True]


In [52]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

In [53]:
history = model.fit(train_dataset, epochs=10,
                    validation_data=test_dataset,
                    validation_steps=30)

Epoch 1/10
[1m 91/391[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m14:56[0m 3s/step - accuracy: 0.5047 - loss: 0.6929

KeyboardInterrupt: 