<a href="https://colab.research.google.com/github/onuralpArsln/MlAiTutorialProjects/blob/main/11-TextClass/rnnTextClass.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

import tensorflow_datasets as tfds
import tensorflow as tf

tfds.disable_progress_bar()

In [2]:
import matplotlib.pyplot as plt


def plot_graphs(history, metric):
  plt.plot(history.history[metric])
  plt.plot(history.history['val_'+metric], '')
  plt.xlabel("Epochs")
  plt.ylabel(metric)
  plt.legend([metric, 'val_'+metric])

In [3]:
dataset, info = tfds.load('imdb_reviews', with_info=True,
                          as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']

train_dataset.element_spec

Downloading and preparing dataset 80.23 MiB (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...
Dataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.


(TensorSpec(shape=(), dtype=tf.string, name=None),
 TensorSpec(shape=(), dtype=tf.int64, name=None))

In [4]:
for example, label in train_dataset.take(1):
  print('text: ', example.numpy())
  print('label: ', label.numpy())

text:  b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it."
label:  0


In [5]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64


In [6]:
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [7]:
for example, label in train_dataset.take(1):
  print('texts: ', example.numpy()[:3])
  print()
  print('labels: ', label.numpy()[:3])

texts:  [b'Before I start, I should point out that I know the editor of this film. We\'ve never met, but we belong to the same fanzine(those things which came before message boards), and we have talked on the phone, so I do have a bias here. Anyway...<br /><br />Somehow, it\'s ironic how while the "Rat Pack" culture of the late 50\'s and early to mid-60\'s made a comeback in the mid-90\'s, this movie, from the son of one of the original Rat Pack, and which was made in a similar fashion, was a flop. Not only that, it was a critical flop; I believe Peter Travers of Rolling Stone was the only one who did not savage this(he gave it a mixed review, as I recall). And while I don\'t think this is the greatest film in the world, and I am not a fan of the Rat Pack, or "cocktail," culture, I do think this is worth seeing.<br /><br />For one thing, this looks stylish, and moves right along. For another, the core performances are all good. Richard Dreyfus is surprisingly restrained here as the hea

Define the Inference Models
During training, the decoder received the correct previous word character as input (teacher forcing). During inference, we don't have the correct next character, so we generate it step by step.

In [8]:
VOCAB_SIZE = 1000
encoder = tf.keras.layers.TextVectorization(
    max_tokens=VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text, label: text))

 Define a Function for Decoding
Now, let's create a function to predict a word from a given SHA-256 hash.

In [9]:
vocab = np.array(encoder.get_vocabulary())
vocab[:20]

array(['', '[UNK]', 'the', 'and', 'a', 'of', 'to', 'is', 'in', 'it', 'i',
       'this', 'that', 'br', 'was', 'as', 'for', 'with', 'movie', 'but'],
      dtype='<U14')

model

In [10]:
encoded_example = encoder(example)[:3].numpy()
encoded_example

array([[156,  10, 369, ...,   0,   0,   0],
       [ 11,   7,  29, ...,   0,   0,   0],
       [  1,   2, 119, ...,   0,   0,   0]])

In [11]:
for n in range(3):
  print("Original: ", example[n].numpy())
  print("Round-trip: ", " ".join(vocab[encoded_example[n]]))
  print()

Original:  b'Before I start, I should point out that I know the editor of this film. We\'ve never met, but we belong to the same fanzine(those things which came before message boards), and we have talked on the phone, so I do have a bias here. Anyway...<br /><br />Somehow, it\'s ironic how while the "Rat Pack" culture of the late 50\'s and early to mid-60\'s made a comeback in the mid-90\'s, this movie, from the son of one of the original Rat Pack, and which was made in a similar fashion, was a flop. Not only that, it was a critical flop; I believe Peter Travers of Rolling Stone was the only one who did not savage this(he gave it a mixed review, as I recall). And while I don\'t think this is the greatest film in the world, and I am not a fan of the Rat Pack, or "cocktail," culture, I do think this is worth seeing.<br /><br />For one thing, this looks stylish, and moves right along. For another, the core performances are all good. Richard Dreyfus is surprisingly restrained here as the h

In [12]:
model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=64,
        # Use masking to handle the variable sequence lengths
        mask_zero=True),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

In [13]:
print([layer.supports_masking for layer in model.layers])

[False, True, True, True, True]


In [1]:
# predict on a sample text without padding.

sample_text = ('The movie was cool. The animation and the graphics '
               'were out of this world. I would recommend this movie.')
input_text = tf.convert_to_tensor([sample_text])
predictions = model.predict(input_text)
print(predictions[0])

NameError: name 'tf' is not defined

In [None]:
sample_text = ('The movie was bad. The animation and the graphics are poor '
               ' I would not recommend this movie.')

# Convert to tensor first
input_text = tf.convert_to_tensor([sample_text])
predictions = model.predict(input_text)
print(predictions)

In [None]:
# predict on a sample text with padding

padding = "the " * 2000
predictions = model.predict(np.array([sample_text, padding]))
print(predictions[0])

In [None]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

In [None]:
history = model.fit(train_dataset, epochs=10,
                    validation_data=test_dataset,
                    validation_steps=30)

In [None]:
test_loss, test_acc = model.evaluate(test_dataset)

print('Test Loss:', test_loss)
print('Test Accuracy:', test_acc)

In [None]:
plt.figure(figsize=(16, 8))
plt.subplot(1, 2, 1)
plot_graphs(history, 'accuracy')
plt.ylim(None, 1)
plt.subplot(1, 2, 2)
plot_graphs(history, 'loss')
plt.ylim(0, None)

In [None]:
sample_text = ('The movie was cool. The animation and the graphics '
               'were out of this world. I would recommend this movie.')
predictions = model.predict(np.array([sample_text]))