In [None]:

from importlib import import_module
from pathlib import Path
import sys
sys.path.insert(0, "../")
import tensorflow as tf
from tensorflow.keras.models import load_model
import numpy as np

# reload modules
import importlib
import models.fc
import models.cnn
importlib.reload(models.fc)
importlib.reload(models.cnn)

import data
importlib.reload(data)

from data import read_data, read_labels, normalize_img
from models.fc import build_fc_model
from models.cnn import build_cnn_model

In [None]:
# Read MNIST database
(train_data, train_labels), (test_data, test_labels) = tf.keras.datasets.mnist.load_data()

In [None]:
# Peprocessing (Normalization)
print('Raw data pixel value range:', train_data.min(), 'to', train_data.max())
train_data, train_labels = normalize_img(train_data, train_labels)
test_data, test_labels = normalize_img(test_data, test_labels)

print('Normalized datatye: ', type(train_data))
print('Normalized data pixel value range:', train_data.numpy().min(), 'to', train_data.numpy().max())

In [None]:
# One hot
train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=10)
test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=10)

In [None]:
# Define model
model_type = 'cnn'  # Cambia a 'cnn' para usar la red convolucional

In [None]:
# Define the path where the model is saved
OUTPUT_PATH = Path(f'./../../../saved_model/mnist_{model_type}') 

# Load the model
model = load_model(OUTPUT_PATH / 'model.h5')

model.summary()

In [None]:
# Evaluate model
test_loss, test_acc = model.evaluate(test_data, test_labels, verbose=2)
print(f"Test accuracy: {test_acc:.4f}")

In [None]:
def representative_data_gen():
    for i in range(100):
        idx = np.random.randint(len(train_data))
        # Convert to NumPy if it's a Tensor
        sample = train_data[idx:idx+1]
        if isinstance(sample, tf.Tensor):
            sample = sample.numpy()
        sample = sample.astype(np.float32)

        if model_type == 'cnn':
            # CNN espera (1, H, W, 1)
            if sample.ndim == 2:           # ej. (28, 28)
                sample = np.expand_dims(sample, axis=-1)
            elif sample.ndim == 3 and sample.shape[0] == 1:
                # Si ya es (1, H, W), aÃ±adimos canal
                sample = np.expand_dims(sample, axis=-1)
        elif model_type == 'fc':
            # FC espera (1, N) o (1, H, W)
            if sample.ndim > 2:
                sample = sample.reshape(1, -1)

        yield [sample]

In [None]:
# Convert the model to TensorFlow Lite with full INT8 quantization
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen

# Ensure both input and output are quantized to int8
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8

# Perform the conversion
tflite_model = converter.convert()

In [None]:
# Save model
OUTPUT_PATH = Path(f'./../../../saved_model/mnist_{model_type}_int8_pqt')
# model.save(OUTPUT_PATH / 'SavedModel')
# model.save( OUTPUT_PATH / 'model.h5', save_format='h5')

# Save the quantized model
with open(OUTPUT_PATH / "model_int8.tflite", "wb") as f:
    f.write(tflite_model)

In [None]:
def tflite_predict(x):
    if x.ndim == 3:
        x = np.expand_dims(x, axis=-1)

    input_shape = input_details[0]['shape']
    input_dtype = input_details[0]['dtype']

    if input_dtype == np.int8:
        scale, zero_point = input_details[0]['quantization']
        x = x / scale + zero_point
        x = np.clip(x, -128, 127).astype(np.int8)

    interpreter.set_tensor(input_details[0]['index'], x)
    interpreter.invoke()
    output_data = interpreter.get_tensor(output_details[0]['index'])

    if output_details[0]['dtype'] == np.int8:
        scale, zero_point = output_details[0]['quantization']
        output_data = (output_data.astype(np.float32) - zero_point) * scale

    return output_data


# Convert test tensors to NumPy if needed
if isinstance(test_data, tf.Tensor):
    test_data = test_data.numpy()
if isinstance(test_labels, tf.Tensor):
    test_labels = test_labels.numpy()

# Evaluate accuracy
correct = 0
total = len(test_data)
for i in range(total):
    x = test_data[i:i+1].astype(np.float32)
    y_true = np.argmax(test_labels[i])

    y_pred = np.argmax(tflite_predict(x))
    correct += (y_true == y_pred)

accuracy = correct / total
print(f"Quantized model accuracy: {accuracy:.4f}")
print(f"FP32 pretrained model accuracy: {test_acc:.4f}")