In [2]:
import tensorflow as tf
import numpy as np
from tensorflow.keras import datasets, layers, models, losses

2024-03-24 00:21:25.962693: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
# Load dataset
(train_images, train_labels), (test_images, test_labels)= datasets.mnist.load_data()
train_images.shape

(60000, 28, 28)

In [8]:
# pad input 28x28 images with zeros to 32x32 images and scaled 8-bit pixel values to values between 0-1
train_images = tf.pad(train_images, [[0, 0], [2,2], [2,2]])
test_images = tf.pad(test_images, [[0, 0], [2,2], [2,2]])
train_images.shape

TensorShape([60000, 32, 32])

In [9]:
train_images = tf.expand_dims(train_images, axis=3, name=None)
test_images = tf.expand_dims(test_images, axis=3, name=None)
train_images.shape

TensorShape([60000, 32, 32, 1])

In [10]:
val_images = train_images[-2000:,:,:,:] 
val_labels = train_labels[-2000:] 
train_images = train_images[:-2000,:,:,:] 
train_labels = train_labels[:-2000]

In [11]:
model = models.Sequential()
model.add(layers.Conv2D(6, 5, activation='relu6', input_shape=train_images.shape[1:]))
model.add(layers.MaxPooling2D(2))
# model.add(layers.Activation('sigmoid'))
model.add(layers.Conv2D(16, 5, activation='relu6'))
model.add(layers.MaxPooling2D(2))
# model.add(layers.Activation('sigmoid'))
model.add(layers.Conv2D(120, 5, activation='relu6'))
model.add(layers.Flatten())
model.add(layers.Dense(84, activation='relu6'))
model.add(layers.Dense(10, activation='softmax'))
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 6)         156       
                                                                 
 max_pooling2d (MaxPooling2  (None, 14, 14, 6)         0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 10, 10, 16)        2416      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 5, 5, 16)          0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 1, 1, 120)         48120     
                                                                 
 flatten (Flatten)           (None, 120)              

In [8]:
model.compile(optimizer='adam', loss=losses.sparse_categorical_crossentropy, metrics=['accuracy'])
history = model.fit(train_images, train_labels, batch_size=64, epochs=10, validation_data=(val_images, val_labels))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [11]:
def representative_data_gen():
    for input_value in tf.data.Dataset.from_tensor_slices(train_images).batch(1).take(100):
        # Model has only one input so each data point has one element.
        yield [tf.dtypes.cast(input_value, tf.float32)]

In [12]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8  # or tf.uint8
converter.inference_output_type = tf.int8  # or tf.uint8
tflite_quant_model = converter.convert()

INFO:tensorflow:Assets written to: /var/folders/xs/nn2f1m4d4vg3mp72k2gv6c8h0000gn/T/tmpb94q_6u2/assets


INFO:tensorflow:Assets written to: /var/folders/xs/nn2f1m4d4vg3mp72k2gv6c8h0000gn/T/tmpb94q_6u2/assets
2024-02-29 12:40:07.033486: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-02-29 12:40:07.033503: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-02-29 12:40:07.033769: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/xs/nn2f1m4d4vg3mp72k2gv6c8h0000gn/T/tmpb94q_6u2
2024-02-29 12:40:07.036329: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-02-29 12:40:07.036382: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /var/folders/xs/nn2f1m4d4vg3mp72k2gv6c8h0000gn/T/tmpb94q_6u2
2024-02-29 12:40:07.043364: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-02-29 12:40:07.127177: I tensorflow/cc/saved_model/loader.cc:217] Running initialization

In [13]:
interpreter = tf.lite.Interpreter(model_content=tflite_quant_model)
input_type = interpreter.get_input_details()[0]['dtype']
print('input: ', input_type)
output_type = interpreter.get_output_details()[0]['dtype']
print('output: ', output_type)

input:  <class 'numpy.int8'>
output:  <class 'numpy.int8'>


In [12]:
import pathlib

tflite_models_dir = pathlib.Path("../../saved_models")
tflite_models_dir.mkdir(exist_ok=True, parents=True)

# Save the unquantized model:
tf_model_file = tflite_models_dir/"lenet5.keras"
model.save(tf_model_file)

# Save the quantized model:
tflite_model_quant_file = tflite_models_dir/"lenet5_int8.tflite"
tflite_model_quant_file.write_bytes(tflite_quant_model)

NameError: name 'tflite_quant_model' is not defined

In [13]:
# import tflite_runtime.interpreter as tflite

# Helper function to run inference on a TFLite model
def run_tflite_model(tflite_file, test_image_indices):
  global test_images

  # Initialize the interpreter
  interpreter = tf.lite.Interpreter(model_path=str(tflite_file))
  interpreter.allocate_tensors()

  input_details = interpreter.get_input_details()[0]
  output_details = interpreter.get_output_details()[0]

  predictions = np.zeros((len(test_image_indices),), dtype=np.int32)
  for i, test_image_index in enumerate(test_image_indices):
    test_image = test_images[test_image_index]

    # Check if the input type is quantized, then rescale test data to int8
    if input_details['dtype'] == tf.int8:
      input_scale, input_zero_point = input_details["quantization"]
      # print(input_scale, input_zero_point)
      test_image = tf.clip_by_value(tf.round(tf.cast(test_image, dtype=tf.float32) / input_scale) + input_zero_point, clip_value_min=-128, clip_value_max=127)

    # print(test_image.shape)
    test_image = np.expand_dims(test_image, axis=0).astype(input_details["dtype"])
    # print(test_image.shape)
    interpreter.set_tensor(input_details["index"], test_image)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details["index"])[0]

    predictions[i] = output.argmax()

  return predictions

In [14]:
# Helper function to evaluate a TFLite model on all images
def evaluate_model(tflite_file, model_type):
  global test_images
  global test_labels

  test_image_indices = range(test_images.shape[0])
  predictions = run_tflite_model(tflite_file, test_image_indices)

  accuracy = (np.sum(test_labels== predictions) * 100) / len(test_images)

  print('%s model accuracy is %.4f%% (Number of test samples=%d)' % (
      model_type, accuracy, len(test_images)))

In [15]:
evaluate_model(tflite_model_quant_file, model_type="Int8")

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


Int8 model accuracy is 98.4100% (Number of test samples=10000)


In [17]:
import pickle

tflite_interpreter = tf.lite.Interpreter(model_path=str(tflite_model_quant_file))
tflite_interpreter.allocate_tensors()

tensor_details = tflite_interpreter.get_tensor_details()
num_fc_layers = 2
num_conv2d_layers = 3
num_reshape_layers = 1

obj = []
cache = []

for dict in tensor_details:
    print(dict)
    i = dict['index']
    name = dict['name']
    shape = dict['shape']
    if ';' in name:
        output_scales = dict['quantization_parameters']['scales']
        output_zero_points = dict['quantization_parameters']['zero_points']

        if 'Conv2D' in name:
            num_conv2d_layers += 1
            cache = {'conv' + str(num_conv2d_layers) + '.output.scales': output_scales, 'conv'
                     + str(num_conv2d_layers) + '.output.zero_points': output_zero_points}
            obj.append(cache)

        if 'MatMul' in name:
            num_fc_layers += 1
            cache = {'fc' + str(num_fc_layers) + '.output.scales': output_scales, 'fc'
                     + str(num_fc_layers) + '.output.zero_points': output_zero_points}
            obj.append(cache)
    else:
        if 'input' in name:
            input_scales = dict['quantization_parameters']['scales']
            input_zero_points = dict['quantization_parameters']['zero_points']
            cache = {'input.scales': input_scales, 'input.zero_points': input_zero_points}
            obj.append(cache)
            # print(name+':')
            # print('scales:')
            # print(input_scales)
            # print('zero_points:')
            # print(input_zero_points)

        if 'BiasAdd' in name:
            bias = tflite_interpreter.tensor(i)()
            bias_scales = dict['quantization_parameters']['scales']
            bias_zero_points = dict['quantization_parameters']['zero_points']
            # print(i, name, shape)
            # print(bias)
            # print(name+':')
            # print('scales:')
            # print(bias_scales)
            # print('zero_points:')
            # print(bias_zero_points)
            
        if 'MatMul' in name:
            weights = tflite_interpreter.tensor(i)()
            reshaped_weights = np.transpose(weights)
            weight_scales = dict['quantization_parameters']['scales']
            weight_zero_points = dict['quantization_parameters']['zero_points']
            # print(i, name, reshaped_weights.shape)
            # print(reshaped_weights)
            cache = {'fc' + str(num_fc_layers) + '.weights': reshaped_weights, 'fc' + str(num_fc_layers) + '.bias': bias,
                     'fc' + str(num_fc_layers) + '.weights.scales': weight_scales, 'fc' + str(num_fc_layers) + '.weights.zero_points': weight_zero_points,
                     'fc' + str(num_fc_layers) + '.bias.scales': bias_scales, 'fc' + str(num_fc_layers) + '.bias.zero_points': bias_zero_points}
            obj.append(cache)
            num_fc_layers -= 1
            # print(name+':')
            # print('scales:')
            # print(weight_scales)
            # print('zero_points:')
            # print(weight_zero_points)

        if 'Conv2D' in name:
            weights = tflite_interpreter.tensor(i)()
            # if num_conv2d_layers == 1:
            #     print(i, name, weights.shape)
                # print(weights)
            reshaped_weights = np.zeros(dtype=np.int8, shape=(weights.shape[0], weights.shape[3], weights.shape[2], weights.shape[1]))
            for l in range(weights.shape[0]):
                for k in range(weights.shape[1]):
                    for j in range(weights.shape[2]):
                        for i in range(weights.shape[3]):
                            reshaped_weights[l][i][k][j] = weights[l][k][j][i]

            weight_scales = dict['quantization_parameters']['scales']
            weight_zero_points = dict['quantization_parameters']['zero_points']
            # print(name+':')
            # print('scales:')
            # print(weight_scales)
            # print('zero_points:')
            # print(weight_zero_points)
            
            # if num_conv2d_layers == 2:
            #     print(i, name, reshaped_weights.shape)
            #     print(reshaped_weights)
            cache = {'conv' + str(num_conv2d_layers) + '.weights': reshaped_weights, 'conv' + str(num_conv2d_layers) + '.bias': bias,
                     'conv' + str(num_conv2d_layers) + '.weights.scales': weight_scales, 'conv' + str(num_conv2d_layers) + '.weights.zero_points': weight_zero_points,
                     'conv' + str(num_conv2d_layers) + '.bias.scales': bias_scales, 'conv' + str(num_conv2d_layers) + '.bias.zero_points': bias_zero_points}
            obj.append(cache)
            num_conv2d_layers -= 1

        if 'Reshape' in name:
            newshape = dict['shape_signature']
            cache = {'reshape' + str(num_reshape_layers) + '.newshape': newshape}
            obj.append(cache)
            num_reshape_layers -= 1

        if 'StatefulPartitionedCall' in name:
            output_scales = dict['quantization_parameters']['scales']
            output_zero_points = dict['quantization_parameters']['zero_points']
            cache = {'softmax.output.scales': output_scales, 'softmax.output.zero_points': output_zero_points}
            obj.append(cache)

with open('./params.pkl', 'wb') as handle:
    pickle.dump(obj, handle, protocol=pickle.HIGHEST_PROTOCOL)

{'name': 'serving_default_conv2d_input:0', 'index': 0, 'shape': array([ 1, 32, 32,  1], dtype=int32), 'shape_signature': array([-1, 32, 32,  1], dtype=int32), 'dtype': <class 'numpy.int8'>, 'quantization': (1.0, -128), 'quantization_parameters': {'scales': array([1.], dtype=float32), 'zero_points': array([-128], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}
{'name': 'sequential/flatten/Const', 'index': 1, 'shape': array([2], dtype=int32), 'shape_signature': array([2], dtype=int32), 'dtype': <class 'numpy.int32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}
{'name': 'sequential/dense_1/BiasAdd/ReadVariableOp', 'index': 2, 'shape': array([10], dtype=int32), 'shape_signature': array([10], dtype=int32), 'dtype': <class 'numpy.int32'>, 'quantization': (0.00011738256580429152, 0), 'quantization_parameters': {'scales': array([0.000117

In [19]:
with open('./params.pkl', 'rb') as handle:
    b = pickle.load(handle)
    print(b[5]['conv1.weights'].shape, b[5]['conv1.bias'].shape,
          b[0]['input.scales'].shape, b[0]['input.zero_points'].shape,
          b[5]['conv1.weights.scales'].shape, b[5]['conv1.weights.zero_points'].shape,
          b[5]['conv1.bias.scales'].shape, b[5]['conv1.bias.zero_points'].shape,
          b[6]['conv1.output.scales'].shape, b[6]['conv1.output.zero_points'].shape)
    print(b[4]['conv2.weights'].shape, b[4]['conv2.bias'].shape,
          b[4]['conv2.weights.scales'].shape, b[4]['conv2.weights.zero_points'].shape,
          b[4]['conv2.bias.scales'].shape, b[4]['conv2.bias.zero_points'].shape,
          b[7]['conv2.output.scales'].shape, b[7]['conv2.output.zero_points'].shape)
    print(b[3]['conv3.weights'].shape, b[3]['conv3.bias'].shape,
          b[3]['conv3.weights.scales'].shape, b[3]['conv3.weights.zero_points'].shape,
          b[3]['conv3.bias.scales'].shape, b[3]['conv3.bias.zero_points'].shape,
          b[8]['conv3.output.scales'].shape, b[8]['conv3.output.zero_points'].shape)
    print(b[9]['reshape1.newshape'].shape)
    print(b[2]['fc1.weights'].shape, b[2]['fc1.bias'].shape,
          b[2]['fc1.weights.scales'].shape, b[2]['fc1.weights.zero_points'].shape,
          b[2]['fc1.bias.scales'].shape, b[2]['fc1.bias.zero_points'].shape,
          b[10]['fc1.output.scales'].shape, b[10]['fc1.output.zero_points'].shape)
    print(b[1]['fc2.weights'].shape, b[1]['fc2.bias'].shape,
          b[1]['fc2.weights.scales'].shape, b[1]['fc2.weights.zero_points'].shape,
          b[1]['fc2.bias.scales'].shape, b[1]['fc2.bias.zero_points'].shape,
          b[11]['fc2.output.scales'].shape, b[11]['fc2.output.zero_points'].shape)
    print(b[12]['softmax.output.scales'].shape, b[12]['softmax.output.zero_points'].shape)

(6, 1, 5, 5) (6,) (1,) (1,) (6,) (6,) (6,) (6,) (1,) (1,)
(16, 6, 5, 5) (16,) (16,) (16,) (16,) (16,) (1,) (1,)
(120, 16, 5, 5) (120,) (120,) (120,) (120,) (120,) (1,) (1,)


KeyError: 'reshape.newshape'