<a href="https://colab.research.google.com/github/stefanziog/Quantized-Deep-Neural-Networks/blob/main/aware/mnist_aware.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
! pip uninstall -y tensorflow
! pip install -q tf-nightly
! pip install -q tensorflow-model-optimization

Found existing installation: tensorflow 2.6.0
Uninstalling tensorflow-2.6.0:
  Successfully uninstalled tensorflow-2.6.0
[K     |████████████████████████████████| 483.4 MB 31 kB/s 
[K     |████████████████████████████████| 1.3 MB 42.4 MB/s 
[K     |████████████████████████████████| 463 kB 49.1 MB/s 
[K     |████████████████████████████████| 12.8 MB 144 kB/s 
[K     |████████████████████████████████| 5.8 MB 22.6 MB/s 
[K     |████████████████████████████████| 2.3 MB 49.3 MB/s 
[K     |████████████████████████████████| 211 kB 8.5 MB/s 
[?25h

In [None]:
import tempfile
import os

import tensorflow as tf

from tensorflow import keras

In [None]:
# load train and test dataset
def load_dataset():
# load dataset
  mnist = keras.datasets.mnist
  (train_images, train_labels), (test_images, test_labels) = mnist.load_data()
	
  return train_images, train_labels, test_images, test_labels

# scale pixels
def prep_pixels(train, test):
	# convert from integers to floats
	train_norm = train.astype('float32')
	test_norm = test.astype('float32')
	# normalize to range 0-1
	train_norm = train_norm / 255.0
	test_norm = test_norm / 255.0
	# return normalized images
	return train_norm, test_norm

In [None]:
# load dataset
train_images, train_labels, test_images, test_labels = load_dataset()

# prepare pixel data
train_images, test_images = prep_pixels(train_images, test_images)

#load model
model = tf.keras.models.load_model('/content/gdrive/MyDrive/Διπλωματική Κώδικες τελικοί/saved_models/mnist.h5')

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
#Define Quantization Model
import tensorflow_model_optimization as tfmot

quantize_model = tfmot.quantization.keras.quantize_model

# q_aware stands for for quantization aware.
q_aware_model = quantize_model(model)

# `quantize_model` requires a recompile.
q_aware_model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

q_aware_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
quantize_layer (QuantizeLay  (None, 28, 28)            3         
er)                                                              
_________________________________________________________________
quant_reshape (QuantizeWrap  (None, 28, 28, 1)         1         
per)                                                             
_________________________________________________________________
quant_conv2d (QuantizeWrapp  (None, 14, 14, 32)        1635      
er)                                                              
_________________________________________________________________
quant_max_pooling2d (Quanti  (None, 5, 7, 32)          1         
zeWrapper)                                                       
_________________________________________________________________
quant_conv2d_1 (QuantizeWra  (None, 3, 4, 64)          1

In [None]:
#train and evaluate model against baseline
train_images_subset = train_images[0:1000]  # out of 60000
train_labels_subset = train_labels[0:1000]

q_aware_model.fit(train_images_subset, train_labels_subset,
                  batch_size=500, epochs=5, validation_split=0.1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fe502bdf590>

In [None]:
#Create Quantize model for TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]

quantized_tflite_model = converter.convert()



INFO:tensorflow:Assets written to: /tmp/tmphhinnz8f/assets


INFO:tensorflow:Assets written to: /tmp/tmphhinnz8f/assets


In [None]:
#See persistance of accuracy from tf to tflite
import numpy as np

def evaluate_model(interpreter):
  input_index = interpreter.get_input_details()[0]["index"]
  output_index = interpreter.get_output_details()[0]["index"]

  # Run predictions on every image in the "test" dataset.
  prediction_digits = []
  for i, test_image in enumerate(test_images):
    if i % 1000 == 0:
      print('Evaluated on {n} results so far.'.format(n=i))
    # Pre-processing: add batch dimension and convert to float32 to match with
    # the model's input data format.
    test_image = np.expand_dims(test_image, axis=0).astype(np.float32)
    interpreter.set_tensor(input_index, test_image)

    # Run inference.
    interpreter.invoke()

    # Post-processing: remove batch dimension and find the digit with highest
    # probability.
    output = interpreter.tensor(output_index)
    digit = np.argmax(output()[0])
    prediction_digits.append(digit)

  print('\n')
  # Compare prediction results with ground truth labels to calculate accuracy.
  prediction_digits = np.array(prediction_digits)
  accuracy = (prediction_digits == test_labels).mean()
  return accuracy

In [None]:
#Evaluate the quantized model
_, baseline_model_accuracy = model.evaluate(
    test_images, test_labels, verbose=0)

_, q_aware_model_accuracy = q_aware_model.evaluate(
   test_images, test_labels, verbose=0)

print('Baseline test accuracy:', baseline_model_accuracy)
print('Quant test accuracy:', q_aware_model_accuracy)

Baseline test accuracy: 0.9865999817848206
Quant test accuracy: 0.9879999756813049


In [None]:
#Check the size of the models
# Create float TFLite model.
float_converter = tf.lite.TFLiteConverter.from_keras_model(model)
float_tflite_model = float_converter.convert()

# Measure sizes of models.
_, float_file = tempfile.mkstemp('.tflite')
_, quant_file = tempfile.mkstemp('.tflite')

with open(quant_file, 'wb') as f:
  f.write(quantized_tflite_model)

with open(float_file, 'wb') as f:
  f.write(float_tflite_model)

print("Float model in Mb:", os.path.getsize(float_file) / float(2**20))
print("Quantized model in Mb:", os.path.getsize(quant_file) / float(2**20))

INFO:tensorflow:Assets written to: /tmp/tmpv6i_rnzf/assets


INFO:tensorflow:Assets written to: /tmp/tmpv6i_rnzf/assets


Float model in Mb: 0.5430374145507812
Quantized model in Mb: 0.14563751220703125
