In [25]:
# a model without any quantization is trained, then run inference with the model with quantized activation function 
# for different quantization levels, and compare their accuracy.
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist, fashion_mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import os

In [26]:
# dataset:mnist, quantization of relu
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train/255.0 , x_test/255.0   # Normalize to [0,1]

# Ensure the data is of type float32
x_train = x_train.astype(np.float32)
x_test = x_test.astype(np.float32)

def create_model():
    model = Sequential([
        Flatten(input_shape=(28, 28)),
        Dense(300, activation='relu'),
        Dense(100, activation='relu'),
        Dense(10, activation='softmax')
    ])
    model.compile(optimizer=Adam(),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Train the model without quantization
model = create_model()
model.fit(x_train, y_train, epochs=5, validation_split=0.1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fdbe4435f40>

In [27]:
# Evaluate the model without quantized activation function during inference
original_accuracy = model.evaluate(x_test, y_test, verbose=0)[1]  # Get accuracy
print(f"Original Model Accuracy: {original_accuracy:.4f}")


def quantized_relu(x, levels):
    x = tf.nn.relu(x)
    max_val = tf.reduce_max(x)
    # Normalize the clipped output to [0, 1] for quantization
    x_normalized = x / max_val
    # Quantize the normalized output
    x_quantized = tf.round(x_normalized * (levels - 1)) / (levels - 1)
    # Scale back to [0, max_val]
    x_scaled_back = x_quantized * max_val
    return x_scaled_back
    
def quantized_softmax(x, levels):
    x_softmax = tf.nn.softmax(x)
    # Since softmax outputs are already in [0, 1], we can quantize them directly
    x_quantized = tf.round(x_softmax * (levels - 1)) / (levels - 1)
    return x_quantized

Original Model Accuracy: 0.9742


In [28]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  
quantization_levels = [4, 8, 16, 32]
accuracy_list = []

for levels in quantization_levels:
    new_model = Sequential([
        Flatten(input_shape=(28, 28)),
        Dense(300, activation=lambda x: quantized_relu(x, levels)),
        Dense(100, activation=lambda x: quantized_relu(x, levels)),
        Dense(10, activation=lambda x: quantized_softmax(x, levels))
    ])
    # get trained weights
    for layer, new_layer in zip(model.layers, new_model.layers):
        new_layer.set_weights(layer.get_weights())
        
    new_model.compile(optimizer=Adam(),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    
    # Generate representative dataset for quantization
    def representative_dataset_gen():
        for _ in range(100):
            # Get sample input data as a numpy array in a method of your choosing
            yield [x_train[np.random.randint(x_train.shape[0], size=1)]]

    # Convert the model to a TensorFlow Lite model with quantization
    converter = tf.lite.TFLiteConverter.from_keras_model(new_model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.representative_dataset = representative_dataset_gen
    # Ensure that if any ops can't be quantized, they are converted to float32 instead of failing
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    # Ensure the input and output tensors are also quantized to int8
    converter.inference_input_type = tf.uint8
    converter.inference_output_type = tf.uint8
    
    tflite_model = converter.convert()
    
    # Get the current working directory
    current_directory = os.getcwd()
    # Save the quantized model in the current directory
    model_path = os.path.join(current_directory, f'quantized_mnist_model_{levels}levels.tflite')
    with open(model_path, 'wb') as f:
        f.write(tflite_model)
    
    print(f"Quantized model saved to: {model_path}")



    # Load the TFLite model
    interpreter = tf.lite.Interpreter(model_path=f'quantized_mnist_model_{levels}levels.tflite')
    interpreter.allocate_tensors()
    # Get input and output details
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    # Convert test data to uint8
    x_test_uint8 = (x_test * 255).astype(np.uint8)
    # Function to run inference on a single image
    def run_inference(image):
        interpreter.set_tensor(input_details[0]['index'], image)
        interpreter.invoke()
        output = interpreter.get_tensor(output_details[0]['index'])
        return np.argmax(output)
    # Compute accuracy on the test dataset
    correct_predictions = 0
    total_predictions = 0
    
    for i in range(len(x_test_uint8)):
        image = np.expand_dims(x_test_uint8[i], axis=0)
        label = y_test[i]
        prediction = run_inference(image)
        if prediction == label:
            correct_predictions += 1
        total_predictions += 1
    
    accuracy = correct_predictions / total_predictions
    accuracy_list.append(accuracy)
    print(f"Quantized Model Accuracy with Quantized relu({levels} levels): {accuracy * 100:.2f}%")

INFO:tensorflow:Assets written to: /tmp/tmpr7niz7vy/assets


INFO:tensorflow:Assets written to: /tmp/tmpr7niz7vy/assets
2024-06-04 14:32:19.313307: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:357] Ignored output_format.
2024-06-04 14:32:19.313332: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:360] Ignored drop_control_dependency.
2024-06-04 14:32:19.313514: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /tmp/tmpr7niz7vy
2024-06-04 14:32:19.314510: I tensorflow/cc/saved_model/reader.cc:78] Reading meta graph with tags { serve }
2024-06-04 14:32:19.314524: I tensorflow/cc/saved_model/reader.cc:119] Reading SavedModel debug info (if present) from: /tmp/tmpr7niz7vy
2024-06-04 14:32:19.317358: I tensorflow/cc/saved_model/loader.cc:228] Restoring SavedModel bundle.
2024-06-04 14:32:19.332843: I tensorflow/cc/saved_model/loader.cc:212] Running initialization op on SavedModel bundle at path: /tmp/tmpr7niz7vy
2024-06-04 14:32:19.340685: I tensorflow/cc/saved_model/loader.cc:301] SavedModel

Quantized model saved to: /nas/ei/home/ge36cig/Desktop/ma_zhang_masking_activation_functions_for_nns/jupyternotebook/quantized_mnist_model_4levels.tflite
Quantized Model Accuracy with Quantized relu(4 levels): 96.82%
INFO:tensorflow:Assets written to: /tmp/tmpplwjz9jk/assets


INFO:tensorflow:Assets written to: /tmp/tmpplwjz9jk/assets
2024-06-04 14:32:21.745904: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:357] Ignored output_format.
2024-06-04 14:32:21.745929: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:360] Ignored drop_control_dependency.
2024-06-04 14:32:21.746107: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /tmp/tmpplwjz9jk
2024-06-04 14:32:21.747141: I tensorflow/cc/saved_model/reader.cc:78] Reading meta graph with tags { serve }
2024-06-04 14:32:21.747155: I tensorflow/cc/saved_model/reader.cc:119] Reading SavedModel debug info (if present) from: /tmp/tmpplwjz9jk
2024-06-04 14:32:21.750047: I tensorflow/cc/saved_model/loader.cc:228] Restoring SavedModel bundle.
2024-06-04 14:32:21.765295: I tensorflow/cc/saved_model/loader.cc:212] Running initialization op on SavedModel bundle at path: /tmp/tmpplwjz9jk
2024-06-04 14:32:21.773393: I tensorflow/cc/saved_model/loader.cc:301] SavedModel

Quantized model saved to: /nas/ei/home/ge36cig/Desktop/ma_zhang_masking_activation_functions_for_nns/jupyternotebook/quantized_mnist_model_8levels.tflite
Quantized Model Accuracy with Quantized relu(8 levels): 97.33%
INFO:tensorflow:Assets written to: /tmp/tmpm7xdlif8/assets


INFO:tensorflow:Assets written to: /tmp/tmpm7xdlif8/assets
2024-06-04 14:32:24.004381: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:357] Ignored output_format.
2024-06-04 14:32:24.004405: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:360] Ignored drop_control_dependency.
2024-06-04 14:32:24.004584: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /tmp/tmpm7xdlif8
2024-06-04 14:32:24.005549: I tensorflow/cc/saved_model/reader.cc:78] Reading meta graph with tags { serve }
2024-06-04 14:32:24.005563: I tensorflow/cc/saved_model/reader.cc:119] Reading SavedModel debug info (if present) from: /tmp/tmpm7xdlif8
2024-06-04 14:32:24.008298: I tensorflow/cc/saved_model/loader.cc:228] Restoring SavedModel bundle.
2024-06-04 14:32:24.023112: I tensorflow/cc/saved_model/loader.cc:212] Running initialization op on SavedModel bundle at path: /tmp/tmpm7xdlif8
2024-06-04 14:32:24.030871: I tensorflow/cc/saved_model/loader.cc:301] SavedModel

Quantized model saved to: /nas/ei/home/ge36cig/Desktop/ma_zhang_masking_activation_functions_for_nns/jupyternotebook/quantized_mnist_model_16levels.tflite
Quantized Model Accuracy with Quantized relu(16 levels): 97.38%
INFO:tensorflow:Assets written to: /tmp/tmpfdtj936a/assets


INFO:tensorflow:Assets written to: /tmp/tmpfdtj936a/assets
2024-06-04 14:32:26.213952: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:357] Ignored output_format.
2024-06-04 14:32:26.213974: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:360] Ignored drop_control_dependency.
2024-06-04 14:32:26.214169: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /tmp/tmpfdtj936a
2024-06-04 14:32:26.215146: I tensorflow/cc/saved_model/reader.cc:78] Reading meta graph with tags { serve }
2024-06-04 14:32:26.215160: I tensorflow/cc/saved_model/reader.cc:119] Reading SavedModel debug info (if present) from: /tmp/tmpfdtj936a
2024-06-04 14:32:26.217784: I tensorflow/cc/saved_model/loader.cc:228] Restoring SavedModel bundle.
2024-06-04 14:32:26.232817: I tensorflow/cc/saved_model/loader.cc:212] Running initialization op on SavedModel bundle at path: /tmp/tmpfdtj936a
2024-06-04 14:32:26.240437: I tensorflow/cc/saved_model/loader.cc:301] SavedModel

Quantized model saved to: /nas/ei/home/ge36cig/Desktop/ma_zhang_masking_activation_functions_for_nns/jupyternotebook/quantized_mnist_model_32levels.tflite
Quantized Model Accuracy with Quantized relu(32 levels): 97.39%


In [29]:
i = 0
for levels in quantization_levels:
    print(f"Quantized Model Accuracy with Quantized relu({levels} levels): {accuracy_list[i] * 100:.2f}%")
    i = i + 1

Quantized Model Accuracy with Quantized relu(4 levels): 96.82%
Quantized Model Accuracy with Quantized relu(8 levels): 97.33%
Quantized Model Accuracy with Quantized relu(16 levels): 97.38%
Quantized Model Accuracy with Quantized relu(32 levels): 97.39%
