In [None]:
import numpy as np

import tensorflow_datasets as tfds
import tensorflow as tf

import matplotlib.pyplot as plt

In [None]:
def plot_graphs(history, metric):
  plt.plot(history.history[metric])
  plt.plot(history.history['val_'+metric], '')
  plt.xlabel("Epochs")
  plt.ylabel(metric)
  plt.legend([metric, 'val_'+metric])

## Load Dataset

from: https://www.tensorflow.org/datasets/catalog/imdb_reviews


In [None]:
dataset= tfds.load('imdb_reviews', as_supervised=True)

## Train & Test Dataset

In [None]:
train_dataset, test_dataset = dataset['train'], dataset['test']
train_dataset.element_spec

## Example of Data

label: 0 (neg), 1 (pos)

In [None]:
for example, label in train_dataset.take(1):
  print('text: ', example.numpy())
  print('label: ', label.numpy())

## Data Preparation

In [None]:
buffer_size = 10000
batch_size = 64

In [None]:
train_dataset = train_dataset.shuffle(buffer_size).batch(batch_size).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

In [None]:
for example, label in train_dataset.take(1):
  print('texts: ', example.numpy()[3])
  print()
  print('labels: ', label.numpy()[3])

In [None]:
VOCAB_SIZE = 1000
encoder = tf.keras.layers.experimental.preprocessing.TextVectorization(
    max_tokens=VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text, label: text))

## Create Model

In [None]:
model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=64,
        mask_zero=True),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(2)
])

In [None]:
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

In [None]:
history = model.fit(train_dataset, epochs=15,
                    validation_data=test_dataset,
                    validation_steps=30)

## Test Model

In [None]:
test_loss, test_acc = model.evaluate(test_dataset)

print('Test Loss:', test_loss)
print('Test Accuracy:', test_acc)

## Visualization

In [None]:
plt.figure(figsize=(16, 8))
plt.subplot(1, 2, 1)
plot_graphs(history, 'accuracy')
plt.ylim(None, 1)
plt.subplot(1, 2, 2)
plot_graphs(history, 'loss')
plt.ylim(0, None)

## Predict Model

Input New Text

In [None]:
label_class=[]
label_class.append("negative") #0
label_class.append("positive") #1

In [None]:
sample_text = ('the movie is good scenario')
predictions = model.predict(np.array([sample_text]))
neg_score = predictions[0][0]
pos_score = predictions[0][1]

if neg_score > pos_score:
  result = "negative"
else:
  result = "positive"

print(sample_text)
print("This comment is:")
print(result)

In [None]:
sample_text = ('bad movie')
predictions = model.predict(np.array([sample_text]))
neg_score = predictions[0][0]
pos_score = predictions[0][1]

if neg_score > pos_score:
  result = "negative"
else:
  result = "positive"

print(sample_text)
print("This comment is:")
print(result)