In [2]:
import tensorflow as tf
import tensorflow_datasets as tfds

import numpy as np
import matplotlib.pyplot as plt

In [3]:
dataset = tfds.load('imdb_reviews', as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']

Downloading and preparing dataset 80.23 MiB (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incompleteO7BMAU/imdb_reviews-train.tfrecord…

Generating test examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incompleteO7BMAU/imdb_reviews-test.tfrecord*…

Generating unsupervised examples...:   0%|          | 0/50000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incompleteO7BMAU/imdb_reviews-unsupervised.t…

Dataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.


In [4]:
batch_size = 32
train_dataset = train_dataset.shuffle(10000)
train_dataset = train_dataset.batch(batch_size)
test_dataset = test_dataset.batch(batch_size)

In [6]:
example, label = next(iter(train_dataset))
print('Text:\n', example.numpy()[0])
print('\n Label: ',label.numpy()[0])

Text:
 b"That hilarious line is typical of what these naughty sisters say. (It's funny on its own terms and pretty funny unintentionally , too.) Only two of the sisters are really bad. Boy, are they bad, too! One is given to pinup poses and salacious comments where e'er she goes. The other is got up to look like Marilyn Monroe. She has those sensual, slightly parted lips. And, not to give anything away, she is even more bad than the other.<br /><br />All three sisters are played by starlets. The man who stumbles into their lives is played by John Bromfield. He had something of a career.<br /><br />This looks today like possibly the first mainstream soft-core porn ever marketed. Well, of course not the first but the raciest at that time.<br /><br />The girls wear as little as possible and let's not forget about the female audience members: Bromfield is shown shaving with an electric razor -- whose fetish was this? -- bare-chested. He also is shown sopping wet in a swimsuit.<br /><br />T

In [7]:
encoder = tf.keras.layers.TextVectorization(max_tokens=10000)
encoder.adapt(train_dataset.map(lambda text, _: text))

vocabulary = np.array(encoder.get_vocabulary())

original_text = example.numpy()[0]
encoded_text = encoder(original_text).numpy()
decoded_text = ' '.join(vocabulary[encoded_text])

print('original: ', original_text)
print('encoded: ', encoded_text)
print('decoded: ', decoded_text)

original:  b"That hilarious line is typical of what these naughty sisters say. (It's funny on its own terms and pretty funny unintentionally , too.) Only two of the sisters are really bad. Boy, are they bad, too! One is given to pinup poses and salacious comments where e'er she goes. The other is got up to look like Marilyn Monroe. She has those sensual, slightly parted lips. And, not to give anything away, she is even more bad than the other.<br /><br />All three sisters are played by starlets. The man who stumbles into their lives is played by John Bromfield. He had something of a career.<br /><br />This looks today like possibly the first mainstream soft-core porn ever marketed. Well, of course not the first but the raciest at that time.<br /><br />The girls wear as little as possible and let's not forget about the female audience members: Bromfield is shown shaving with an electric razor -- whose fetish was this? -- bare-chested. He also is shown sopping wet in a swimsuit.<br /><br

In [9]:
# BiLSTM

model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        len(encoder.get_vocabulary()), 64, mask_zero=True),
    tf.keras.layers.Bidirectional(
        tf.keras.layers.LSTM(64,  return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

model.summary()

model.compile(
    loss = tf.keras.losses.BinaryCrossentropy(from_logits=True),
    optimizer = tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVe  (None, None)              0         
 ctorization)                                                    
                                                                 
 embedding (Embedding)       (None, None, 64)          640000    
                                                                 
 bidirectional (Bidirection  (None, None, 128)         66048     
 al)                                                             
                                                                 
 bidirectional_1 (Bidirecti  (None, 64)                41216     
 onal)                                                           
                                                                 
 dense (Dense)               (None, 64)                4160      
                                                        

In [None]:
history = model.fit(
    train_dataset,
    epochs = 5,
    validation_data = test_dataset
)

Epoch 1/5
Epoch 2/5

In [None]:
history_dict = history.history

acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']


loss = history_dict['loss']
val_loss = history_dict['val_loss']


plt.figure(figsize=(8, 4))
plt.subplot(1, 2, 1)
plt.plot(acc)
plt.plot(val_acc)
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['Accuracy', 'Validation Accuracy'])


plt.subplot(1, 2, 2)
plt.plot(loss)
plt.plot(val_loss)
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(['Loss', 'Validation Loss'])

plt.show()


In [None]:
sample_text = ('''The movie by NM & Edunet foundation was so good and the animation are so dope.
    I would recommend my friends to watch it.'''
)

predictions = model.predict(np.array([sample_text]))
print(*predictions[0])

if predictions[0] > 0:
    print('The review is positive')
else:
    print('The review is negative')