In [68]:
import pickle
import numpy as np
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Conv2D
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam, SGD
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint
import timeit

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print(
      '\n\nThis error most likely means that this notebook is not '
      'configured to use a GPU.  Change this in Notebook Settings via the '
      'command palette (cmd/ctrl-shift-P) or the Edit menu.\n\n')
  raise SystemError('GPU device not found')

def gpu():
  with tf.device('/device:GPU:0'):
    random_image_gpu = tf.random.normal((100, 100, 100, 3))
    net_gpu = tf.keras.layers.Conv2D(32, 7)(random_image_gpu)
    return tf.math.reduce_sum(net_gpu)
  
# We run each op once to warm up; see: https://stackoverflow.com/a/45067900
gpu()

# Run the op several times.
print('Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images '
      '(batch x height x width x channel). Sum of ten runs.')
print('GPU (s):')
gpu_time = timeit.timeit('gpu()', number=10, setup="from __main__ import gpu")
print(gpu_time)

Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images (batch x height x width x channel). Sum of ten runs.
GPU (s):
0.04505607499595499


2023-05-09 14:16:50.870982: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-05-09 14:16:50.871263: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-05-09 14:16:50.871460: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-05-09 14:16:50.871717: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-05-09 14:16:50.871921: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from S

In [69]:
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [70]:
# Load the CIFAR-10 dataset
data_path = 'cifar-10-batches-py/'
train_data = []
train_labels = []
for i in range(1, 6):
    batch = unpickle(data_path + 'data_batch_' + str(i))
    train_data.append(batch[b'data'])
    train_labels += batch[b'labels']

In [71]:
train_data = np.concatenate(train_data)
train_data = train_data.reshape((50000, 3, 32, 32)).transpose(0, 2, 3, 1)
test_data = unpickle(data_path + 'test_batch')[b'data']
test_data = test_data.reshape((10000, 3, 32, 32)).transpose(0, 2, 3, 1)
train_labels = np.array(train_labels)
test_labels = np.array(unpickle(data_path + 'test_batch')[b'labels'])

# Load the CIFAR-100 dataset
data_path = 'cifar-100-python/'
train_data_100 = unpickle(data_path + 'train')[b'data']
train_data_100 = train_data_100.reshape((50000, 3, 32, 32)).transpose(0, 2, 3, 1)
test_data_100 = unpickle(data_path + 'test')[b'data']
test_data_100 = test_data_100.reshape((10000, 3, 32, 32)).transpose(0, 2, 3, 1)
train_labels_100 = np.array(unpickle(data_path + 'train')[b'fine_labels'])
test_labels_100 = np.array(unpickle(data_path + 'test')[b'fine_labels'])

In [72]:
# Print the number of features, classes, and training samples for CIFAR-10
print("CIFAR-10")
print("Number of features:", train_data.shape[1:])
print("Number of classes:", len(np.unique(train_labels)))
print("Number of training samples:", train_data.shape[0])

# Print the number of features, classes, and training samples for CIFAR-100
print("\nCIFAR-100")
print("Number of features:", train_data_100.shape[1:])
print("Number of classes:", len(np.unique(train_labels_100)))
print("Number of training samples:", train_data_100.shape[0])

CIFAR-10
Number of features: (32, 32, 3)
Number of classes: 10
Number of training samples: 50000

CIFAR-100
Number of features: (32, 32, 3)
Number of classes: 100
Number of training samples: 50000


In [73]:
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

# Shuffle your dataset
train_data, train_labels = shuffle(train_data, train_labels, random_state=42)
test_data, test_labels = shuffle(test_data, test_labels, random_state=42)

# Normalize pixel values
x_train = train_data.astype('float32') / 255.0
x_test = test_data.astype('float32') / 255.0

# Convert labels to categorical format
y_train = to_categorical(train_labels, num_classes=10)
y_test = to_categorical(test_labels, num_classes=10)

from tensorflow.keras.preprocessing.image import ImageDataGenerator

# create data generator
datagen = ImageDataGenerator(zoom_range=[0.5,1.5],
                             brightness_range=[0.25,1.0],
                             shear_range=0.5,
                             width_shift_range=0.5, 
                             height_shift_range=0.5,
                             vertical_flip=True,
                             horizontal_flip=True)
# prepare iterator
it_train = datagen.flow(x_train, y_train, batch_size=64)
# fit model
steps = int(x_train.shape[0] / 64)

In [84]:
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, BatchNormalization
import tensorflow_model_optimization as tfmot
from tensorflow.keras.callbacks import ReduceLROnPlateau

model_10 = Sequential()
model_10.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(32, 32, 3)))
model_10.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
model_10.add(MaxPooling2D((2, 2)))
model_10.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
model_10.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
model_10.add(MaxPooling2D((2, 2)))
model_10.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
model_10.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
model_10.add(MaxPooling2D((2, 2)))
model_10.add(Flatten())
model_10.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
model_10.add(Dense(10, activation='softmax'))

# compile model
opt = SGD(lr=0.01, momentum=0.9)

# define the ModelCheckpoint callback to save the model after each epoch
checkpoint_callback = ModelCheckpoint(filepath='model_10_QAT.h5', save_freq='epoch')

lr_scheduler = ReduceLROnPlateau(monitor="val_accuracy",
                                factor=0.1,
                                patience=10,
                                verbose=0,
                                mode="max",
                                min_delta=0.0001,
                                cooldown=0,
                                min_lr=0.001)

# Convert the model to a quantization aware model
quant_aware_model = tfmot.quantization.keras.quantize_model(model_10)

quant_aware_model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

quant_aware_model.summary()

# Train and evaluate the quantization aware model
quant_aware_model.fit(x_train,y_train,batch_size=64,epochs=50,validation_data=(x_test, y_test),callbacks=[checkpoint_callback,lr_scheduler])

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 quantize_layer_9 (QuantizeL  (None, 32, 32, 3)        3         
 ayer)                                                           
                                                                 
 quant_conv2d_58 (QuantizeWr  (None, 32, 32, 32)       963       
 apperV2)                                                        
                                                                 
 quant_conv2d_59 (QuantizeWr  (None, 32, 32, 32)       9315      
 apperV2)                                                        
                                                                 
 quant_max_pooling2d_24 (Qua  (None, 16, 16, 32)       1         
 ntizeWrapperV2)                                                 
                                                                 
 quant_conv2d_60 (QuantizeWr  (None, 16, 16, 64)      

Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7efb42256c10>

In [85]:
# evaluate the model on the test set
quant_loss, quant_acc = quant_aware_model.evaluate(x_test, y_test, verbose=0)
print('Quantization aware training loss: ', quant_loss)
print('Quantization aware training accuracy: ', quant_acc)

Quantization aware training loss:  1.980594277381897
Quantization aware training accuracy:  0.7885000109672546


In [86]:
# convert the QAT model to a fully quantized model using TFLite
def representative_data_gen():
    for input_value in tf.data.Dataset.from_tensor_slices(x_train).batch(1).take(100):
        yield [input_value]

converter = tf.lite.TFLiteConverter.from_keras_model(quant_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
# Ensure that if any ops can't be quantized, the converter throws an error
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
# Set the input and output tensors to uint8 (APIs added in r2.3)
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8

tflite_model_quant_int8_qat = converter.convert()



INFO:tensorflow:Assets written to: /tmp/tmpayrvpps3/assets


INFO:tensorflow:Assets written to: /tmp/tmpayrvpps3/assets
2023-05-09 14:33:42.220128: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2023-05-09 14:33:42.220155: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2023-05-09 14:33:42.220322: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmpayrvpps3
2023-05-09 14:33:42.225188: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2023-05-09 14:33:42.225207: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /tmp/tmpayrvpps3
2023-05-09 14:33:42.244851: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2023-05-09 14:33:42.331436: I tensorflow/cc/saved_model/loader.cc:213] Running initialization op on SavedModel bundle at path: /tmp/tmpayrvpps3
2023-05-09 14:33:42.353369: I tensorflow/cc/saved_model/loader.cc:305] SavedModel

In [87]:
interpreter = tf.lite.Interpreter(model_content=tflite_model_quant_int8_qat)
input_type = interpreter.get_input_details()[0]['dtype']
print('input: ', input_type)
output_type = interpreter.get_output_details()[0]['dtype']
print('output: ', output_type)
# Save the quantized model to disk
open("cifar10_qat_int8.tflite", "wb").write(tflite_model_quant_int8_qat)

input:  <class 'numpy.uint8'>
output:  <class 'numpy.uint8'>


571288

In [88]:
tflite_interpreter = tf.lite.Interpreter('cifar10_qat_int8.tflite')
tflite_interpreter.allocate_tensors()
input_details = tflite_interpreter.get_input_details()
output_details = tflite_interpreter.get_output_details()

print("== Input details ==")
print("name:", input_details[0]['name'])
print("shape:", input_details[0]['shape'])
print("type:", input_details[0]['dtype'])

print("\n== Output details ==")
print("name:", output_details[0]['name'])
print("shape:", output_details[0]['shape'])
print("type:", output_details[0]['dtype'])

== Input details ==
name: serving_default_conv2d_58_input:0
shape: [ 1 32 32  3]
type: <class 'numpy.uint8'>

== Output details ==
name: StatefulPartitionedCall:0
shape: [ 1 10]
type: <class 'numpy.uint8'>


In [89]:
predictions = np.zeros((len(x_test),), dtype=int)
input_scale, input_zero_point = input_details[0]["quantization"]
for i in range(len(x_test)):
    val_batch = x_test[i]
    val_batch = val_batch / input_scale + input_zero_point
    val_batch = np.expand_dims(val_batch, axis=0).astype(input_details[0]["dtype"])
    tflite_interpreter.set_tensor(input_details[0]['index'], val_batch)
    tflite_interpreter.allocate_tensors()
    tflite_interpreter.invoke()

    tflite_model_predictions = tflite_interpreter.get_tensor(output_details[0]['index'])
    #print("Prediction results shape:", tflite_model_predictions.shape)
    output = tflite_interpreter.get_tensor(output_details[0]['index'])
    predictions[i] = output.argmax()

In [90]:
sum = 0
for i in range(len(predictions)):
    if (predictions[i] == test_labels[i]):
        sum = sum + 1
accuracy_score = sum / 10000
print("Accuracy of quantized to int8 model is {}%".format(accuracy_score*100))

Accuracy of quantized to int8 model is 78.86999999999999%
