<a href="https://colab.research.google.com/github/poojamahajan0712/AI_ML_concepts/blob/main/Quantization_NB2_tensorflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [35]:
#Reference - https://medium.com/game-of-bits/optimizing-tensorflow-models-using-quantization-fb4d09b46fac
# https://ai.google.dev/edge/litert/models/post_training_quantization

* key idea behind quantization - These techniques aim at providing smaller and faster models while keeping the performance of the models almost similar.
* Post-training quantization -  the deep learning model is trained with FP-32 tensors and later converted to INT-8(or float-16) in order to get a smaller and faster model for deployment. it is a bit more stable than quantization aware training and easy to use.
* In post-quantization techniques, we train the deep learning model normally and save the weights. These weights are later converted into TFLite format and quantized.

In [2]:
import pandas as pd
import numpy as np
import tensorflow
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Dense, Conv2D, Flatten
from tensorflow.keras.models import Model
import tensorflow as tf

%matplotlib inline

In [36]:
#loading dataset
digits = load_digits()
images = digits['images']
labels = digits['target']
print (images.shape, labels.shape)

#Splitting Data
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.25, random_state=42)
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)
print (X_train.shape, X_test.shape, y_train.shape, y_test.shape)

#Encoding Labels
def get_encoded_labels(target):
    output=np.zeros((len(target),10))
    for ix, value in enumerate(target):
        output[ix][target[ix]] = 1
    return output

Y_train = get_encoded_labels(y_train)
Y_test = get_encoded_labels(y_test)
print (Y_train.shape, Y_test.shape)


(1797, 8, 8) (1797,)
(1347, 8, 8, 1) (450, 8, 8, 1) (1347,) (450,)
(1347, 10) (450, 10)


In [24]:


input_layer = Input(shape=(8, 8, 1))
layer = Conv2D(64, (3,3), activation='relu')(input_layer)
layer = Conv2D(32, (3,3), activation='relu')(layer)
layer = Conv2D(32, (3,3), activation='relu')(layer)
layer = Flatten()(layer)
features = Dense(32, activation='relu')(layer)
output = Dense(10, activation='softmax')(features)


model = Model(inputs=input_layer, outputs=output)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()



In [25]:
model.fit(X_train, Y_train, batch_size=32, epochs=10, validation_data=(X_test, Y_test))

Epoch 1/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step - accuracy: 0.4496 - loss: 1.7803 - val_accuracy: 0.8689 - val_loss: 0.4341
Epoch 2/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.8768 - loss: 0.4132 - val_accuracy: 0.9289 - val_loss: 0.2390
Epoch 3/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.9469 - loss: 0.2051 - val_accuracy: 0.9556 - val_loss: 0.1546
Epoch 4/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.9606 - loss: 0.1560 - val_accuracy: 0.9644 - val_loss: 0.1234
Epoch 5/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9707 - loss: 0.1092 - val_accuracy: 0.9667 - val_loss: 0.0935
Epoch 6/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.9860 - loss: 0.0606 - val_accuracy: 0.9778 - val_loss: 0.0814
Epoch 7/10
[1m43/43[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x7a600b01d630>

In [31]:
def get_test_accuracy(predictions, target):
    correct = 0
    for ix, pred in enumerate(predictions):
        true_value = target[ix]
        if pred[true_value] == max(pred):
            correct += 1
    return correct*100/len(target)
predictions = model.predict(X_test)
get_test_accuracy(predictions, y_test)

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


98.44444444444444

Post training quantization

In [34]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quant_model = converter.convert()

Saved artifact at '/tmp/tmpqiiugldb'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 8, 8, 1), dtype=tf.float32, name='keras_tensor_7')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  134555602800832: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134555316328176: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134555317462368: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134552922856384: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134552922861664: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134552919467008: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134553005288688: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134552919468592: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134552923027616: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134552923025504: TensorSpec(shape=(), dtype=tf.resource, name=None)
