# **Neural Network for Text Classification**
*   Implementation of a classic neural network for text classification supporting **multi-precision** training.
    *   Implementation currently supports training in either double, single, or half precision.
    *   This implies that both the computations and parameter storage are done in the specified precision.
*   Implementation of a classic neural network for text classification supporting **mixed-precision** training.
    *   Implementation currently supports half precision computations with single precision parameter storage.
*   Implementations are based off of TensorFlow's very own example: [TensorFlow Text Classification Tutorial](https://www.tensorflow.org/tutorials/keras/text_classification)

In [1]:
import time
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras import layers, models

In [2]:
tfds.disable_progress_bar()
# Set a global random seed
tf.random.set_seed(12)

In [3]:
# Define number of training runs to compute the average training time over
NUM_TRAINING_RUNS = 3
# Values are specific to the sentiment analysis dataset
VOCAB_SIZE = 1000
EMBEDDING_DIM = 64
BUFFER_SIZE = 10000
BATCH_SIZE = 64

In [12]:
def build_and_train(train_ds, encoder, precision):
    if precision == 'double':
        dtype = tf.float64
    elif precision == 'single':
        dtype = tf.float32
    else: # half
        dtype = tf.float16

    model = models.Sequential([
        encoder,
        layers.Embedding(VOCAB_SIZE, EMBEDDING_DIM, dtype=dtype),
        layers.Dropout(0.2, dtype=dtype), # !!!
        layers.GlobalAveragePooling1D(dtype=dtype),
        layers.Dropout(0.2, dtype=dtype), # !!!
        layers.Dense(1, activation='sigmoid', dtype=dtype)
    ])
    model.compile(optimizer='adam',
                  loss=tf.keras.losses.BinaryCrossentropy(),
                  metrics=['accuracy'])

    start_time = time.time()
    model.fit(train_ds, epochs=5)
    end_time = time.time()
    training_time = end_time - start_time

    return model, training_time

In [10]:
def build_and_train_mixed(train_ds, encoder):
    tf.keras.mixed_precision.set_global_policy('mixed_float16')

    model = models.Sequential([
        encoder,
        layers.Embedding(VOCAB_SIZE, EMBEDDING_DIM),
        layers.Dropout(0.2),
        layers.GlobalAveragePooling1D(),
        layers.Dropout(0.2),
        layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam',
                  loss=tf.keras.losses.BinaryCrossentropy(),
                  metrics=['accuracy'])

    start_time = time.time()
    model.fit(train_ds, epochs=5)
    end_time = time.time()
    training_time = end_time - start_time

    tf.keras.mixed_precision.set_global_policy('float32')
    return model, training_time

In [7]:
# Load dataset and split into train and test sets
dataset, info = tfds.load('imdb_reviews', with_info=True, as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

Downloading and preparing dataset 80.23 MiB (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...
Dataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.


In [8]:
# Create the text encoder
encoder = tf.keras.layers.TextVectorization(max_tokens=VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text, label: text))

In [13]:
# Test run to make sure that everything is working properly before starting actual measurements
_ = build_and_train(train_dataset, encoder, precision='single')
_ = build_and_train_mixed(train_dataset, encoder)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [14]:
# Train with double precision
time_double = 0.0
for _ in range(NUM_TRAINING_RUNS):
    model_double, training_time = build_and_train(train_dataset, encoder, 'double')
    time_double += training_time
accuracy_double = model_double.evaluate(test_dataset, verbose=2)[1]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
391/391 - 4s - loss: 0.4323 - accuracy: 0.8342 - 4s/epoch - 10ms/step


In [15]:
# Train with single precision
time_single = 0.0
for _ in range(NUM_TRAINING_RUNS):
    model_single, training_time = build_and_train(train_dataset, encoder, 'single')
    time_single += training_time
accuracy_single = model_single.evaluate(test_dataset, verbose=2)[1]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
391/391 - 2s - loss: 0.4345 - accuracy: 0.8342 - 2s/epoch - 6ms/step


In [16]:
# Train with half precision
time_half = 0.0
for _ in range(NUM_TRAINING_RUNS):
    model_half, training_time = build_and_train(train_dataset, encoder, 'half')
    time_half += training_time
accuracy_half = model_half.evaluate(test_dataset, verbose=2)[1]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
391/391 - 2s - loss: 0.4848 - accuracy: 0.7747 - 2s/epoch - 6ms/step


In [17]:
# Train with mixed half precision
time_mixed = 0.0
for _ in range(NUM_TRAINING_RUNS):
    model_mixed, training_time = build_and_train_mixed(train_dataset, encoder)
    time_mixed += training_time
accuracy_mixed = model_mixed.evaluate(test_dataset, verbose=2)[1]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
391/391 - 2s - loss: 0.4391 - accuracy: 0.8330 - 2s/epoch - 6ms/step


In [18]:
print("---RESULTS---")
print("Average training time in double precision:", time_double / NUM_TRAINING_RUNS, "seconds")
print("Average training time in single precision:", time_single/ NUM_TRAINING_RUNS, "seconds")
print("Average training time in half precision:", time_half/ NUM_TRAINING_RUNS, "seconds")
print("Average training time in mixed half precision:", time_mixed/ NUM_TRAINING_RUNS, "seconds")
print("-------------")
print("Accuracy with double precision:", accuracy_double)
print("Accuracy with single precision:", accuracy_single)
print("Accuracy with half precision:", accuracy_half)
print("Accuracy with mixed half precision:", accuracy_mixed)

---RESULTS---
Average training time in double precision: 63.01487056414286 seconds
Average training time in single precision: 31.267759720484417 seconds
Average training time in half precision: 270.6589232285817 seconds
Average training time in mixed half precision: 42.05624405543009 seconds
-------------
Accuracy with double precision: 0.8341599702835083
Accuracy with single precision: 0.8341599702835083
Accuracy with half precision: 0.7747200131416321
Accuracy with mixed half precision: 0.8330399990081787
