# **Recurrent Neural Network for Text Classification**
*   Implementation of a recurrent neural network classifier supporting **multi-precision** training.
    *   Implementation currently supports training in either double, single, or half precision.
    *   This implies that both the computations and parameter storage are done in the specified precision.
*   Implementation of a recurrent neural network classifier supporting **mixed-precision** training.
    *   Implementation currently supports half precision computations with single precision parameter storage.
*   Implementations are based off of TensorFlow's very own example: [TensorFlow RNN Tutorial](https://www.tensorflow.org/text/tutorials/text_classification_rnn)

In [1]:
import time
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras import layers, models

In [3]:
tfds.disable_progress_bar()
# Set a global random seed
tf.random.set_seed(12)

In [12]:
# Define number of training runs to compute the average training time over
NUM_TRAINING_RUNS = 3
# Values are specific to the imdb_reviews dataset
VOCAB_SIZE = 1000
BUFFER_SIZE = 10000
BATCH_SIZE = 64

In [7]:
def build_and_train(train_ds, encoder, precision):
    if precision == 'double':
        dtype = tf.float64
    elif precision == 'single':
        dtype = tf.float32
    else: # half
        dtype = tf.float16

    model = models.Sequential([
      encoder,
      layers.Embedding(
          input_dim=len(encoder.get_vocabulary()),
          output_dim=64,
          mask_zero=True,
          dtype=dtype),
      layers.Bidirectional(layers.LSTM(64), dtype=dtype),
      layers.Dense(64, activation='relu', dtype=dtype),
      layers.Dense(1, dtype=dtype)
    ])
    model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

    start_time = time.time()
    model.fit(train_ds, epochs=5)
    end_time = time.time()
    training_time = end_time - start_time

    return model, training_time

In [17]:
def build_and_train_mixed(train_ds, encoder):
    tf.keras.mixed_precision.set_global_policy('mixed_float16')

    model = models.Sequential([
      encoder,
      layers.Embedding(
          input_dim=len(encoder.get_vocabulary()),
          output_dim=64,
          mask_zero=True),
      layers.Bidirectional(layers.LSTM(64)),
      layers.Dense(64, activation='relu'),
      layers.Dense(1)
    ])
    model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

    start_time = time.time()
    model.fit(train_ds, epochs=5)
    end_time = time.time()
    training_time = end_time - start_time

    tf.keras.mixed_precision.set_global_policy('float32')
    return model, training_time

In [4]:
# Load dataset and split into train and test sets
dataset, info = tfds.load('imdb_reviews', with_info=True, as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [5]:
# Create the text encoder
encoder = tf.keras.layers.TextVectorization(max_tokens=VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text, label: text))

In [8]:
# Test run to make sure that everything is working properly before starting actual measurements
_ = build_and_train(train_dataset, encoder, precision='single')
_ = build_and_train_mixed(train_dataset, encoder)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [13]:
# Train with double precision
time_double = 0.0
for _ in range(NUM_TRAINING_RUNS):
    model_double, training_time = build_and_train(train_dataset, encoder, 'double')
    time_double += training_time
accuracy_double = model_double.evaluate(test_dataset, verbose=2)[1]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
391/391 - 13s - loss: 0.3222 - accuracy: 0.8587 - 13s/epoch - 34ms/step


In [14]:
# Train with single precision
time_single = 0.0
for _ in range(NUM_TRAINING_RUNS):
    model_single, training_time = build_and_train(train_dataset, encoder, 'single')
    time_single += training_time
accuracy_single = model_single.evaluate(test_dataset, verbose=2)[1]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
391/391 - 12s - loss: 0.3168 - accuracy: 0.8617 - 12s/epoch - 31ms/step


In [15]:
# Train with half precision
time_half = 0.0
for _ in range(NUM_TRAINING_RUNS):
    model_half, training_time = build_and_train(train_dataset, encoder, 'half')
    time_half += training_time
accuracy_half = model_half.evaluate(test_dataset, verbose=2)[1]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
391/391 - 12s - loss: 0.4990 - accuracy: 0.7753 - 12s/epoch - 32ms/step


In [18]:
# Train with mixed half precision
time_mixed = 0.0
for _ in range(NUM_TRAINING_RUNS):
    model_mixed, training_time = build_and_train_mixed(train_dataset, encoder)
    time_mixed += training_time
accuracy_mixed = model_mixed.evaluate(test_dataset, verbose=2)[1]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
391/391 - 13s - loss: 0.3197 - accuracy: 0.8634 - 13s/epoch - 32ms/step


In [19]:
print("---RESULTS---")
print("Average training time in double precision:", time_double / NUM_TRAINING_RUNS, "seconds")
print("Average training time in single precision:", time_single/ NUM_TRAINING_RUNS, "seconds")
print("Average training time in half precision:", time_half/ NUM_TRAINING_RUNS, "seconds")
print("Average training time in mixed half precision:", time_mixed/ NUM_TRAINING_RUNS, "seconds")
print("-------------")
print("Accuracy with double precision:", accuracy_double)
print("Accuracy with single precision:", accuracy_single)
print("Accuracy with half precision:", accuracy_half)
print("Accuracy with mixed half precision:", accuracy_mixed)

---RESULTS---
Average training time in double precision: 203.3181173801422 seconds
Average training time in single precision: 159.53325843811035 seconds
Average training time in half precision: 180.95898509025574 seconds
Average training time in mixed half precision: 161.46620202064514 seconds
-------------
Accuracy with double precision: 0.858680009841919
Accuracy with single precision: 0.8617200255393982
Accuracy with half precision: 0.7752799987792969
Accuracy with mixed half precision: 0.8634399771690369
