In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds

import numpy as np

tfds.disable_progress_bar()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset, info = tfds.load('imdb_reviews', with_info = True, as_supervised = True)
train_dataset, test_dataset = dataset['train'], dataset['test']

In [3]:
train_dataset.element_spec

(TensorSpec(shape=(), dtype=tf.string, name=None),
 TensorSpec(shape=(), dtype=tf.int64, name=None))

In [4]:
for example, label in train_dataset.take(1):
    print('text: ', example.numpy())
    print('label: ', label.numpy())

text:  b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it."
label:  0


In [5]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64

train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [6]:
for example, label in train_dataset.take(1):
    print('text: ', example.numpy()[:3])
    print('\n','labels: ', label.numpy()[:3])

text:  [b'It is a rare occasion when I want to see a movie again. "The Amati Girls" is such a movie. In old time movie theaters I would have stayed put for more showings. Was this story autobiographical for the writer/director? It has the aura of reality.<br /><br />The all star cast present their characters believably and with tenderness. Who would not want Mercedes Ruehl as an older sister? I have loved her work since "For Roseanna".<br /><br />With most movies, one suspends belief because we know that it is the work of actors, producers, directors, sound technicians, etc. It was hard to suspend such belief in "The Amati Girls". One feels such a part of this family! How I wanted to come to the defense of Dolores when her family is stifling her emotional life. And wanted to cheer Lee Grant as she levels criticism at Cloris Leachman\'s hair color. The humor throughout is not belly laugh humor, but instead has a feel-good quality that satisfies far more than pratfalls and such.<br /><br

In [7]:
VOCAB_SIZE = 1000
encoder = tf.keras.layers.TextVectorization(max_tokens = VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text, label: text))

In [8]:
vocab = np.array(encoder.get_vocabulary())
print(vocab[:20])

['' '[UNK]' 'the' 'and' 'a' 'of' 'to' 'is' 'in' 'it' 'i' 'this' 'that'
 'br' 'was' 'as' 'for' 'with' 'movie' 'but']


In [9]:
encoded_example = encoder(example)[:3].numpy()
encoded_example

array([[  9,   7,   4, ...,   0,   0,   0],
       [ 10, 100,  26, ...,   0,   0,   0],
       [ 10,   1,  11, ...,   0,   0,   0]], dtype=int64)

In [10]:
for n in range(3):
    print('Original: ',example[n].numpy())
    print('Roundtrip: ', ' '.join(vocab[encoded_example[n]]))
    print('')

Original:  b'It is a rare occasion when I want to see a movie again. "The Amati Girls" is such a movie. In old time movie theaters I would have stayed put for more showings. Was this story autobiographical for the writer/director? It has the aura of reality.<br /><br />The all star cast present their characters believably and with tenderness. Who would not want Mercedes Ruehl as an older sister? I have loved her work since "For Roseanna".<br /><br />With most movies, one suspends belief because we know that it is the work of actors, producers, directors, sound technicians, etc. It was hard to suspend such belief in "The Amati Girls". One feels such a part of this family! How I wanted to come to the defense of Dolores when her family is stifling her emotional life. And wanted to cheer Lee Grant as she levels criticism at Cloris Leachman\'s hair color. The humor throughout is not belly laugh humor, but instead has a feel-good quality that satisfies far more than pratfalls and such.<br />

In [11]:
model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(input_dim = len(encoder.get_vocabulary()), output_dim = 64, mask_zero = True),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(64, activation = 'relu'),
    tf.keras.layers.Dense(1)
])

In [12]:
sample_text = ('The movie was cool. The animation and the graphics '
               'were out of this world. I would recommend this movie.')

predictions = model.predict(np.array([sample_text]))
print(predictions[0])

[-0.00029764]


In [13]:
padding = 'the '*2000
predictions = model.predict(np.array([sample_text, padding]))
print(predictions[0])

[-0.00029764]


In [14]:
model.compile(loss = tf.keras.losses.BinaryCrossentropy(from_logits = True), optimizer = tf.keras.optimizers.Adam(1e-4), metrics = ['accuracy'])

In [None]:
history = model.fit(train_dataset, epochs = 10, validation_data = test_dataset, validation_steps = 30)