In [1]:
# a model without any quantization is trained, then run inference with the model with quantized activation function 
# for different quantization levels, and compare their accuracy.
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization

In [2]:
cifar10 = tf.keras.datasets.cifar10
(train_x,train_y),(test_x,test_y) = cifar10.load_data()
print('\n train_x:%s, train_y:%s, test_x:%s, test_y:%s'%(train_x.shape,train_y.shape,test_x.shape,test_y.shape)) 

x_train, x_test = train_x / 255.0, test_x / 255.0  # Normalize to [0,1]
y_train,y_test = tf.cast(train_y,tf.int16),tf.cast(test_y,tf.int16)


 train_x:(50000, 32, 32, 3), train_y:(50000, 1), test_x:(10000, 32, 32, 3), test_y:(10000, 1)


In [3]:
def create_model():
    model = Sequential([
    # Convolutional layer 1
    Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=x_train.shape[1:]),
    BatchNormalization(),
    
    # Convolutional layer 2
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),
    
    # Convolutional layer 3
    Conv2D(128, (3, 3), activation='relu'),
    Dropout(0.4),
    
    Flatten(),
    
    # Fully connected layer 1
    Dense(128, activation='relu'),
    Dropout(0.3),
    
    # Output layer
    Dense(10, activation='softmax')
])

    # Compile the model
    model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['sparse_categorical_accuracy']) 

    return model

# Train the model without quantization
model = create_model()
model.fit(x_train, y_train, epochs=10, validation_split=0.1)

# # Visualize the model
# model.summary()
# print("\nDetailed Summary with Activation Functions:")
# for layer in model.layers:
#     if hasattr(layer, 'activation'):
#         activation = layer.activation.__name__ if layer.activation else 'None'
#         print(f"Layer: {layer.name}, Activation Function: {activation}")
#     else:
#         print(f"Layer: {layer.name}, Activation Function: None")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1b75c38e908>

In [4]:
# Evaluate the model without quantized activation function during inference
original_accuracy = model.evaluate(x_test, y_test, verbose=0)[1]  # Get accuracy
print(f"Original Model Accuracy: {original_accuracy:.4f}")

# layer_outputs_model = tf.keras.Model(inputs=model.input, outputs=model.layers[2].output)
# test_data = x_test[0:1]
# layer_activations = layer_outputs_model.predict(test_data)
# print("shape of layer_activations: ", layer_activations.shape)
# print("layer_activations: ", layer_activations)

def quantized_relu(x, levels):
    x = tf.nn.relu(x)
    max_val = tf.reduce_max(x)
    # Normalize the clipped output to [0, 1] for quantization
    x_normalized = x / max_val
    # Quantize the normalized output
    x_quantized = tf.round(x_normalized * (levels - 1)) / (levels - 1)
    # Scale back to [0, max_val]
    x_scaled_back = x_quantized * max_val
    return x_scaled_back

def quantized_softmax(x, levels):
    x_softmax = tf.nn.softmax(x)
    # Since softmax outputs are already in [0, 1], we can quantize them directly
    x_quantized = tf.round(x_softmax * (levels - 1)) / (levels - 1)
    return x_quantized

quantization_levels = [4, 8, 16, 32, 64]

for levels in quantization_levels:
    new_model = Sequential([
    # Convolutional layer 1
    Conv2D(32, kernel_size=(3, 3), activation=lambda x: quantized_relu(x, levels), input_shape=x_train.shape[1:]),
    BatchNormalization(),
    
    # Convolutional layer 2
    Conv2D(64, (3, 3), activation=lambda x: quantized_relu(x, levels)),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),
    
    # Convolutional layer 3
    Conv2D(128, (3, 3), activation=lambda x: quantized_relu(x, levels)),
    Dropout(0.4),
    
    Flatten(),
    
    # Fully connected layer 1
    Dense(128, activation=lambda x: quantized_relu(x, levels)),
    Dropout(0.3),
    
    # Output layer
    Dense(10, activation=lambda x: quantized_softmax(x, levels))
])

    # get trained weights
    for layer, new_layer in zip(model.layers, new_model.layers):
        new_layer.set_weights(layer.get_weights())
        
    # Compile the model
    new_model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['sparse_categorical_accuracy']) 
    
    
    # visualize the quantized model
#     new_model.summary()
#     print("\nDetailed Summary with Activation Functions:")
#     for layer in new_model.layers:
#         if hasattr(layer, 'activation'):
#             activation = layer.activation.__name__ if layer.activation else 'None'
#             print(f"Layer: {layer.name}, Activation Function: {activation}")
#         else:
#             print(f"Layer: {layer.name}, Activation Function: None")

#     Evaluate the model with quantized activation function during inference
    quantized_accuracy = new_model.evaluate(x_test, y_test, verbose=0)[1]  # Get accuracy
    accuracy_loss = 0 if quantized_accuracy > original_accuracy else ((original_accuracy - quantized_accuracy) / original_accuracy) * 100
    print(f"Quantized Model Accuracy with Quantized relu({levels} levels): {quantized_accuracy:.4f}, Loss of Accuracy: {accuracy_loss:.2f}%")
    
#     # Verify the layer outputs are quantized
#     layer_outputs_model = tf.keras.Model(inputs=new_model.input, outputs=new_model.layers[2].output)
#     test_data = x_test[0:1]
#     layer_activations = layer_outputs_model.predict(test_data)
#     print("shape of layer_activations: ", layer_activations.shape)
#     print("layer_activations: ", layer_activations)

Original Model Accuracy: 0.7282
Quantized Model Accuracy with Quantized relu(4 levels): 0.1202, Loss of Accuracy: 83.49%
Quantized Model Accuracy with Quantized relu(8 levels): 0.4052, Loss of Accuracy: 44.36%
Quantized Model Accuracy with Quantized relu(16 levels): 0.6404, Loss of Accuracy: 12.06%
Quantized Model Accuracy with Quantized relu(32 levels): 0.7100, Loss of Accuracy: 2.50%
Quantized Model Accuracy with Quantized relu(64 levels): 0.7259, Loss of Accuracy: 0.32%


In [None]:
# quantization of tanh
def create_model():
    model = Sequential([
    # Convolutional layer 1
    Conv2D(32, kernel_size=(3, 3), activation='tanh', input_shape=x_train.shape[1:]),
    BatchNormalization(),
    
    # Convolutional layer 2
    Conv2D(64, (3, 3), activation='tanh'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),
    
    # Convolutional layer 3
    Conv2D(128, (3, 3), activation='tanh'),
    Dropout(0.4),
    
    Flatten(),
    
    # Fully connected layer 1
    Dense(128, activation='tanh'),
    Dropout(0.3),
    
    # Output layer
    Dense(10, activation='softmax')
])

    # Compile the model
    model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['sparse_categorical_accuracy']) 

    return model

# Train the model without quantization
model = create_model()
model.fit(x_train, y_train, epochs=10, validation_split=0.1)

# # Visualize the model
# model.summary()
# print("\nDetailed Summary with Activation Functions:")
# for layer in model.layers:
#     if hasattr(layer, 'activation'):
#         activation = layer.activation.__name__ if layer.activation else 'None'
#         print(f"Layer: {layer.name}, Activation Function: {activation}")
#     else:
#         print(f"Layer: {layer.name}, Activation Function: None")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

In [None]:
# Evaluate the model without quantized activation function during inference
original_accuracy = model.evaluate(x_test, y_test, verbose=0)[1]  # Get accuracy
print(f"Original Model Accuracy: {original_accuracy:.4f}")

# Quantization of tanh
def quantized_tanh(x, levels):
    # Apply the modified tanh activation
    x = 1 - 2 / (tf.math.exp(2 * x) + 1)
    # Normalize the activation output to [0, 1] to prepare for quantization
    x_normalized = (x + 1) / 2
    # Quantize the normalized output
    x_quantized = tf.round(x_normalized * (levels - 1)) / (levels - 1)
    # Scale back to [-1, 1]
    x_scaled_back = x_quantized * 2 - 1
    return x_scaled_back

def quantized_softmax(x, levels):
    x_softmax = tf.nn.softmax(x)
    # Since softmax outputs are already in [0, 1], we can quantize them directly
    x_quantized = tf.round(x_softmax * (levels - 1)) / (levels - 1)
    return x_quantized

quantization_levels = [4, 8, 16, 32, 64]

for levels in quantization_levels:
    new_model = Sequential([
    # Convolutional layer 1
    Conv2D(32, kernel_size=(3, 3), activation=lambda x: quantized_tanh(x, levels), input_shape=x_train.shape[1:]),
    BatchNormalization(),
    
    # Convolutional layer 2
    Conv2D(64, (3, 3), activation=lambda x: quantized_tanh(x, levels)),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),
    
    # Convolutional layer 3
    Conv2D(128, (3, 3), activation=lambda x: quantized_tanh(x, levels)),
    Dropout(0.4),
    
    Flatten(),
    
    # Fully connected layer 1
    Dense(128, activation=lambda x: quantized_tanh(x, levels)),
    Dropout(0.3),
    
    # Output layer
    Dense(10, activation=lambda x: quantized_softmax(x, levels))
    ])
    # get trained weights
    for layer, new_layer in zip(model.layers, new_model.layers):
        new_layer.set_weights(layer.get_weights())
        
    # Compile the model
    new_model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['sparse_categorical_accuracy']) 
    
#     # visualize the quantized model
#     new_model.summary()
#     print("\nDetailed Summary with Activation Functions:")
#     for layer in new_model.layers:
#         if hasattr(layer, 'activation'):
#             activation = layer.activation.__name__ if layer.activation else 'None'
#             print(f"Layer: {layer.name}, Activation Function: {activation}")
#         else:
#             print(f"Layer: {layer.name}, Activation Function: None")

    # Evaluate the model with quantized activation function during inference
    quantized_accuracy = new_model.evaluate(x_test, y_test, verbose=0)[1]  # Get accuracy
    accuracy_loss = 0 if quantized_accuracy > original_accuracy else ((original_accuracy - quantized_accuracy) / original_accuracy) * 100
    print(f"Quantized Model Accuracy with Quantized tanh({levels} levels): {quantized_accuracy:.4f}, Loss of Accuracy: {accuracy_loss:.2f}%")
    
#     # Verify the layer outputs are quantized
#     layer_outputs_model = tf.keras.Model(inputs=new_model.input, outputs=new_model.layers[2].output)
#     test_data = x_test[0:1]
#     layer_activations = layer_outputs_model.predict(test_data)
#     print("shape of layer_activations: ", layer_activations.shape)
#     print("layer_activations: ", layer_activations)