# **Neural Network for Text Classification**
*   Implementation of a classic neural network for text classification supporting **multi-precision** training.
    *   Implementation currently supports training in either double, single, or half precision.
    *   This implies that both the computations and parameter storage are done in the specified precision.
*   Implementation of a classic neural network for text classification supporting **mixed-precision** training.
    *   Implementation currently supports half precision computations with single precision parameter storage.
*   Implementations are based off of TensorFlow's very own example: [TensorFlow Text Classification Tutorial](https://www.tensorflow.org/tutorials/keras/text_classification)

In [1]:
import time, os, re, shutil, string
import tensorflow as tf
from tensorflow.keras import layers, models

In [2]:
# Set a global random seed
tf.random.set_seed(12)

In [3]:
# Define number of training runs to compute the average training time over
NUM_TRAINING_RUNS = 2
# Values are specific to the sentiment analysis dataset
MAX_FEATURES = 10000
EMBEDDING_DIM = 16
SEQ_LENGTH = 250

In [4]:
def build_and_train(train_ds, precision):
    if precision == 'double':
        dtype = tf.float64
    elif precision == 'single':
        dtype = tf.float32
    else: # half
        dtype = tf.float16

    model = models.Sequential([
        layers.Embedding(MAX_FEATURES, EMBEDDING_DIM, dtype=dtype),
        layers.Dropout(0.2),
        layers.GlobalAveragePooling1D(dtype=dtype),
        layers.Dropout(0.2),
        layers.Dense(1, activation='sigmoid', dtype=dtype)
    ])
    model.compile(optimizer='adam',
                  loss=tf.keras.losses.BinaryCrossentropy(),
                  metrics=['accuracy'])

    start_time = time.time()
    model.fit(train_ds, epochs=5)
    end_time = time.time()
    training_time = end_time - start_time

    return model, training_time

In [5]:
def build_and_train_mixed(train_ds):
    model = models.Sequential([
        layers.Embedding(MAX_FEATURES, EMBEDDING_DIM),
        layers.Dropout(0.2),
        layers.GlobalAveragePooling1D(),
        layers.Dropout(0.2),
        layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam',
                  loss=tf.keras.losses.BinaryCrossentropy(),
                  metrics=['accuracy'])

    start_time = time.time()
    model.fit(train_ds, epochs=5)
    end_time = time.time()
    training_time = end_time - start_time

    tf.keras.mixed_precision.set_global_policy('float32')
    return model, training_time

In [6]:
# Load the sentiment analysis dataset
url = "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"
dataset = tf.keras.utils.get_file("aclImdb_v1", url, untar=True, cache_dir='.', cache_subdir='')
dataset_dir = os.path.join(os.path.dirname(dataset), 'aclImdb')
train_dir = os.path.join(dataset_dir, 'train')
remove_dir = os.path.join(train_dir, 'unsup')
shutil.rmtree(remove_dir)

Downloading data from https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz


In [7]:
# Separate the training and test sets
raw_train_ds = tf.keras.utils.text_dataset_from_directory(
    'aclImdb/train',
    batch_size=32,
    validation_split=0.2,
    subset='training',
    seed=12)
raw_test_ds = tf.keras.utils.text_dataset_from_directory(
    'aclImdb/train',
    batch_size=32,
    validation_split=0.2,
    subset='validation',
    seed=12)

Found 25000 files belonging to 2 classes.
Using 20000 files for training.
Found 25000 files belonging to 2 classes.
Using 5000 files for validation.


In [8]:
# Prepare the dataset for training
def custom_standardization(input_data):
  lowercase = tf.strings.lower(input_data)
  stripped_html = tf.strings.regex_replace(lowercase, '<br />', ' ')
  return tf.strings.regex_replace(stripped_html,'[%s]' % re.escape(string.punctuation), '')

vectorize_layer = layers.TextVectorization(
    standardize=custom_standardization,
    max_tokens=MAX_FEATURES,
    output_mode='int',
    output_sequence_length=SEQ_LENGTH)

# Make a text-only dataset (without labels), then call adapt
train_text = raw_train_ds.map(lambda x, y: x)
vectorize_layer.adapt(train_text)

def vectorize_text(text, label):
  text = tf.expand_dims(text, -1)
  return vectorize_layer(text), label

train_ds = raw_train_ds.map(vectorize_text)
test_ds = raw_test_ds.map(vectorize_text)

# Configure the dataset for performance
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [9]:
# Test run to make sure that everything is working properly before starting actual measurements
_ = build_and_train(train_ds, precision='single')
_ = build_and_train_mixed(train_ds)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [10]:
# Train with double precision
time_double = 0.0
for _ in range(NUM_TRAINING_RUNS):
    model_double, training_time = build_and_train(train_ds, 'double')
    time_double += training_time
accuracy_double = model_double.evaluate(test_ds, verbose=2)[1]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
157/157 - 1s - loss: 0.3492 - accuracy: 0.8630 - 675ms/epoch - 4ms/step


In [11]:
# Train with single precision
time_single = 0.0
for _ in range(NUM_TRAINING_RUNS):
    model_single, training_time = build_and_train(train_ds, 'single')
    time_single += training_time
accuracy_single = model_single.evaluate(test_ds, verbose=2)[1]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
157/157 - 0s - loss: 0.3487 - accuracy: 0.8630 - 358ms/epoch - 2ms/step


In [12]:
# Train with half precision
time_half = 0.0
for _ in range(NUM_TRAINING_RUNS):
    model_half, training_time = build_and_train(train_ds, 'half')
    time_half += training_time
accuracy_half = model_half.evaluate(test_ds, verbose=2)[1]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
157/157 - 0s - loss: 0.3777 - accuracy: 0.8336 - 361ms/epoch - 2ms/step


In [15]:
# Train with mixed half precision
time_mixed = 0.0
for _ in range(NUM_TRAINING_RUNS):
    model_mixed, training_time = build_and_train_mixed(train_ds)
    time_mixed += training_time
accuracy_mixed = model_mixed.evaluate(test_ds, verbose=2)[1]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
157/157 - 0s - loss: 0.3478 - accuracy: 0.8624 - 338ms/epoch - 2ms/step


In [16]:
print("---RESULTS---")
print("Average training time in double precision:", time_double / NUM_TRAINING_RUNS, "seconds")
print("Average training time in single precision:", time_single/ NUM_TRAINING_RUNS, "seconds")
print("Average training time in half precision:", time_half/ NUM_TRAINING_RUNS, "seconds")
print("Average training time in mixed half precision:", time_mixed/ NUM_TRAINING_RUNS, "seconds")
print("-------------")
print("Accuracy with double precision:", accuracy_double)
print("Accuracy with single precision:", accuracy_single)
print("Accuracy with half precision:", accuracy_half)
print("Accuracy with mixed half precision:", accuracy_mixed)

---RESULTS---
Average training time in double precision: 68.55340373516083 seconds
Average training time in single precision: 39.94835674762726 seconds
Average training time in half precision: 61.70956468582153 seconds
Average training time in mixed half precision: 42.24100053310394 seconds
-------------
Accuracy with double precision: 0.8629999756813049
Accuracy with single precision: 0.8629999756813049
Accuracy with half precision: 0.8335999846458435
Accuracy with mixed half precision: 0.8623999953269958
