In [1]:
import tensorflow as tf
import numpy as np
import time
import os
from sklearn.metrics import accuracy_score

model = tf.keras.models.load_model('mnist_cnn.h5')

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quantized_model = converter.convert()

with open('mnist_cnn_quantized.tflite', 'wb') as f:
    f.write(tflite_quantized_model)

original_model_size = os.path.getsize('mnist_cnn.h5')
quantized_model_size = os.path.getsize('mnist_cnn_quantized.tflite')

(_, _), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_test = x_test.astype('float32') / 255.0
x_test = np.expand_dims(x_test, -1)

start_time = time.time()
y_pred = model.predict(x_test, verbose=0)
original_inference_time = (time.time() - start_time) / len(x_test)
original_accuracy = accuracy_score(y_test, np.argmax(y_pred, axis=1))

interpreter = tf.lite.Interpreter(model_path="mnist_cnn_quantized.tflite")
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

predictions = []
inference_times = []

for i in range(len(x_test)):
    input_data = np.expand_dims(x_test[i], axis=0).astype(np.float32)
    interpreter.set_tensor(input_details[0]['index'], input_data)

    start = time.time()
    interpreter.invoke()
    inference_times.append(time.time() - start)

    output = interpreter.get_tensor(output_details[0]['index'])
    predictions.append(np.argmax(output))

quantized_accuracy = accuracy_score(y_test, predictions)
quantized_inference_time = np.mean(inference_times)

print("\nModel Comparison:")
print("| Metric               | Original Model       | Quantized Model       |")
print("|----------------------|----------------------|------------------------|")
print(f"| Size (bytes)         | {original_model_size:<20} | {quantized_model_size:<22} |")
print(f"| Accuracy             | {original_accuracy:.4f}               | {quantized_accuracy:.4f}               |")
print(f"| Avg Inference Time   | {original_inference_time:.6f} s        | {quantized_inference_time:.6f} s        |")




INFO:tensorflow:Assets written to: C:\Users\PRAVEE~1\AppData\Local\Temp\tmp7j6fr8bn\assets


INFO:tensorflow:Assets written to: C:\Users\PRAVEE~1\AppData\Local\Temp\tmp7j6fr8bn\assets


Saved artifact at 'C:\Users\PRAVEE~1\AppData\Local\Temp\tmp7j6fr8bn'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32, name='input_layer')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  2374483124752: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2374483125520: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2374483126672: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2374483125136: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2374483128976: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2374483129744: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2374483129168: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2374483128208: TensorSpec(shape=(), dtype=tf.resource, name=None)


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    



Model Comparison:
| Metric               | Original Model       | Quantized Model       |
|----------------------|----------------------|------------------------|
| Size (bytes)         | 1502296              | 130112                 |
| Accuracy             | 0.9906               | 0.9906               |
| Avg Inference Time   | 0.000075 s        | 0.000026 s        |
