In [1]:
import tensorflow_datasets as tfds

In [2]:
import tensorflow as tf

In [3]:
dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)



In [4]:
train_dataset, test_dataset = dataset['train'], dataset['test']

In [5]:
encoder = info.features['text'].encoder

In [6]:
BUFFER_SIZE=10000
BATCH_SIZE=64

In [7]:
padded_shapes=([None], ())

In [8]:
train_dataset = train_dataset.shuffle(BUFFER_SIZE).padded_batch(BATCH_SIZE, padded_shapes=padded_shapes)

In [9]:
test_dataset = test_dataset.padded_batch(BATCH_SIZE, padded_shapes=padded_shapes)

In [10]:
model = tf.keras.Sequential([tf.keras.layers.Embedding(encoder.vocab_size, 64),
                            tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
                            tf.keras.layers.Dense(64, activation='relu'),
                            tf.keras.layers.Dense(1, activation='sigmoid')])

In [11]:
model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(1e-4), metrics=['accuracy'])

In [12]:
history = model.fit(train_dataset, epochs=5, validation_data=test_dataset, validation_steps=30)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [13]:
def pad_to_size(vec, size):
    zeros = [0] * (size - len(vec))
    vec.extend(zeros)
    return vec

In [14]:
def sample_predict(sentence, pad):
    encoded_sample_pred_text = encoder.encode(sentence)
    if pad:
        encoded_sample_pred_text = pad_to_size(encoded_sample_pred_text, 64)
    encoded_sample_pred_text = tf.cast(encoded_sample_pred_text, tf.float32)
    predictions = model.predict(tf.expand_dims(encoded_sample_pred_text, 0))
    return predictions

In [15]:
sample_text = ("This movie was awesome. The acting was incredible. Highly recommend!")

In [16]:
predictions = sample_predict(sample_text, pad=True) * 100

In [18]:
print("probability this is a positive review: %.2f" % predictions)

probability this is a positive review: 79.93
