# **Worksapce set up**

In [38]:
# Connecting to google drive
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [39]:
# Import libraries
%pylab inline
!pip install -q tensorflow-model-optimization

import tensorflow as tf
import tensorflow_model_optimization as tfmot
from tensorflow.keras.optimizers import SGD
import numpy as np
import tempfile
import os

Populating the interactive namespace from numpy and matplotlib


In [40]:
# Display python and library versions
!python --versions
print('Numpy ' + np.__version__)
print('TensorFlow ' + tf.__version__)
print('Keras ' + tf.keras.__version__)

unknown option --versions
usage: python3 [option] ... [-c cmd | -m mod | file | -] [arg] ...
Try `python -h' for more information.
Numpy 1.21.6
TensorFlow 2.8.2
Keras 2.8.0


# **Dataset Management**

In [41]:
# Loads the data and splits it into 60% training and 40% testing sets
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
y_test_old = y_test

x_train = x_train.reshape((x_train.shape[0], 28, 28, 1))
x_test = x_test.reshape((x_test.shape[0], 28, 28, 1))

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

x_train = x_train / 255.0
x_test = x_test / 255.0

y_train = tf.keras.utils.to_categorical(y_train)
y_test = tf.keras.utils.to_categorical(y_test)

x_train = x_train[0:6000]
x_test = x_test[0:1000]
y_train = y_train[0:6000]
y_test = y_test[0:1000]

# **Quantization aware training**

In [42]:
# Loads the base model for ANN tests and gets baseline accuracy for result comparison
base_model = tf.keras.models.load_model('drive/MyDrive/GE_practicum/CNN_base')
base_model.fit(x_train,y_train,epochs = 10,validation_data = (x_test,y_test), batch_size=32)
base_model.summary()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_13 (Conv2D)          (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d_13 (MaxPoolin  (None, 8, 8, 32)         0         
 g2D)                                                            
                                                                 
 flatten_13 (Flatten)        (None, 2048)              0         
                                                                 
 dense_26 (Dense)            (None, 30)                61470     
                                                                 
 dense_27 (Dense)            (None, 10)                310       
                                                                 
Total pa

In [43]:
quantize_model = tfmot.quantization.keras.quantize_model
model = quantize_model(base_model)

opt = SGD(learning_rate=0.01, momentum=0.9)
model.compile(optimizer = opt,loss = 'categorical_crossentropy' , metrics=['accuracy'])

model.summary()

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 quantize_layer_3 (QuantizeL  (None, 28, 28, 1)        3         
 ayer)                                                           
                                                                 
 quant_conv2d_13 (QuantizeWr  (None, 26, 26, 32)       387       
 apperV2)                                                        
                                                                 
 quant_max_pooling2d_13 (Qua  (None, 8, 8, 32)         1         
 ntizeWrapperV2)                                                 
                                                                 
 quant_flatten_13 (QuantizeW  (None, 2048)             1         
 rapperV2)                                                       
                                                                 
 quant_dense_26 (QuantizeWra  (None, 30)             

In [44]:
# Performs quantization aware training
model.fit(x_train,y_train,epochs = 10,validation_data = (x_test,y_test), batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f852610a410>

# **Quantization and conversion to tflite**

In [45]:
# Conversion to tflite

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.target_spec.supported_ops = [
tf.lite.OpsSet.TFLITE_BUILTINS,
tf.lite.OpsSet.SELECT_TF_OPS
]
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

open('CNN_quant.tflite', 'wb').write(tflite_model)



INFO:tensorflow:Assets written to: /tmp/tmpsl815n4_/assets


INFO:tensorflow:Assets written to: /tmp/tmpsl815n4_/assets


66768

In [46]:
# Check tflite model characteristics
tf.lite.experimental.Analyzer.analyze(model_content=tflite_model)

=== TFLite ModelAnalyzer ===

Your TFLite model has '1' subgraph(s). In the subgraph description below,
T# represents the Tensor numbers. For example, in Subgraph#0, the QUANTIZE op takes
tensor #0 as input and produces tensor #5 as output.

Subgraph#0 main(T#0) -> [T#15]
  Op#0 QUANTIZE(T#0) -> [T#5]
  Op#1 CONV_2D(T#5, T#6, T#2) -> [T#7]
  Op#2 MAX_POOL_2D(T#7) -> [T#8]
  Op#3 RESHAPE(T#8, T#1) -> [T#9]
  Op#4 FULLY_CONNECTED(T#9, T#10, T#3) -> [T#11]
  Op#5 FULLY_CONNECTED(T#11, T#12, T#4) -> [T#13]
  Op#6 SOFTMAX(T#13) -> [T#14]
  Op#7 DEQUANTIZE(T#14) -> [T#15]

Tensors of Subgraph#0
  T#0(serving_default_conv2d_13_input:0) shape_signature:[-1, 28, 28, 1], type:FLOAT32
  T#1(sequential_13/quant_flatten_13/Const) shape:[2], type:INT32 RO 8 bytes
  T#2(conv2d_13/bias) shape:[32], type:INT32 RO 128 bytes
  T#3(dense_26/bias) shape:[30], type:INT32 RO 120 bytes
  T#4(dense_27/bias) shape:[10], type:INT32 RO 40 bytes
  T#5(sequential_13/quantize_layer_3/AllValuesQuantize/FakeQuantWithM

In [47]:
interpreter = tf.lite.Interpreter(model_content=tflite_model)
input_details = interpreter.get_input_details()

interpreter.allocate_tensors()
output_details = interpreter.get_output_details()

#Predictions from TFLite model
tfl_pred = []
tfl_pred_class = []
for i in range(len(x_test)):
    interpreter.set_tensor(input_details[0]["index"], x_test.astype('float32')[i:i+1,:])
    interpreter.invoke()
    result = interpreter.get_tensor(output_details[0]["index"])
    tfl_pred.append(result)
    tfl_pred_class.append(argmax(result))

right_pred = [y_test_old[i] == tfl_pred_class[i] for i in range(len(y_test))]
acc = sum(right_pred)/len(right_pred)
print(acc)

0.964


# **Conversion to C array**

In [48]:
# Function: Convert some hex value into an array for C programming
def hex_to_c_array(hex_data, var_name):

  c_str = ''

  # Create header guard
  c_str += '#ifndef ' + var_name.upper() + '_H\n'
  c_str += '#define ' + var_name.upper() + '_H\n\n'

  # Add array length at top of file
  c_str += '\nunsigned int ' + var_name + '_len = ' + str(len(hex_data)) + ';\n'

  # Declare C variable
  c_str += 'unsigned char ' + var_name + '[] = {'
  hex_array = []
  for i, val in enumerate(hex_data) :

    # Construct string from hex
    hex_str = format(val, '#04x')

    # Add formatting so each line stays within 80 characters
    if (i + 1) < len(hex_data):
      hex_str += ','
    if (i + 1) % 12 == 0:
      hex_str += '\n '
    hex_array.append(hex_str)

  # Add closing brace
  c_str += '\n ' + format(' '.join(hex_array)) + '\n};\n\n'

  # Close out header guard
  c_str += '#endif //' + var_name.upper() + '_H'

  return c_str

In [49]:
# Write TFLite model to a C source (or header) file
with open("CNN_quant" + '.h', 'w') as file:
  file.write(hex_to_c_array(tflite_model, "CNN_quant"))