In [1]:
import numpy as np

import tensorflow_datasets as tfds
import tensorflow as tf

tfds.disable_progress_bar()

In [None]:
dataset, info = tfds.load('imdb_reviews', with_info=True, as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']

In [5]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64
VOCAB_SIZE = 900

In [4]:
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [6]:
encoder = tf.keras.layers.experimental.preprocessing.TextVectorization(
    max_tokens=VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text, label: text))


In [7]:
model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=64,
        mask_zero=True),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])


In [8]:
history = model.fit(train_dataset, epochs=10, validation_data=test_dataset)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [20]:
#same domain 
movie_sents = ["The Martian was the most scientifically accurate movie of all time.", 
               "Interstellar was a thrill ride but the ending was fluffy and chesey.", 
               "Although it was boring at times, Lord of the Rings is without doubt one of the greatest trilogies in cinematic history."]
model.predict(movie_sents)


array([[ 0.11185782],
       [-0.2494679 ],
       [ 0.49514487]], dtype=float32)

In [21]:
#different domain
stock_sents = ["The S&P500 closed at all-time highs today, once again providing millions of 401ks an extra boost.", 
               "CNBC is always pumping the companies that their parent company has vested interest in.", 
               "The entire U.S stock market is a house of cards waiting to crumble."]

model.predict(stock_sents)

array([[ 1.3884223 ],
       [-0.302824  ],
       [-0.31304908]], dtype=float32)