In [17]:
#라이브러리
import tempfile
import os
import zipfile
import tensorflow as tf
import tensorflow_model_optimization as tfmot
import numpy as np
from tensorflow import keras

In [18]:
# 함수 
def get_gzipped_model_size(file):
  # Returns size of gzipped model, in bytes.

  _, zipped_file = tempfile.mkstemp('.zip')
  with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
    f.write(file)

  return os.path.getsize(zipped_file)

def evaluate_model(interpreter):
  input_index = interpreter.get_input_details()[0]["index"]
  output_index = interpreter.get_output_details()[0]["index"]

  # Run predictions on every image in the "test" dataset.
  prediction_digits = []
  for i, test_image in enumerate(test_images):
    if i % 1000 == 0:
      print('Evaluated on {n} results so far.'.format(n=i))
    # Pre-processing: add batch dimension and convert to float32 to match with
    # the model's input data format.
    test_image = np.expand_dims(test_image, axis=0).astype(np.float32)
    interpreter.set_tensor(input_index, test_image)

    # Run inference.
    interpreter.invoke()

    # Post-processing: remove batch dimension and find the digit with highest
    # probability.
    output = interpreter.tensor(output_index)
    digit = np.argmax(output()[0])
    prediction_digits.append(digit)

  print('\n')
  # Compare prediction results with ground truth labels to calculate accuracy.
  prediction_digits = np.array(prediction_digits)
  accuracy = (prediction_digits == test_labels).mean()
  return accuracy

In [36]:
# Load MNIST dataset
mnist = keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Normalize the input image so that each pixel value is between 0 and 1.
train_images = train_images / 255.0
test_images = test_images / 255.0

# Define the model architecture.
model = keras.Sequential([
  keras.layers.InputLayer(input_shape=(28, 28)),
  keras.layers.Reshape(target_shape=(28, 28, 1)),
  keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'),
  keras.layers.MaxPooling2D(pool_size=(2, 2)),
  keras.layers.Flatten(),
  keras.layers.Dense(10)
])

# Train the digit classification model
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(
  train_images,
  train_labels,
  epochs=1,
  validation_split=0.1,
)



<keras.src.callbacks.History at 0x192079a7dd0>

In [37]:
_, baseline_model_accuracy = model.evaluate(
    test_images, test_labels, verbose=0)
print('Baseline test accuracy:', baseline_model_accuracy)

_, keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model, keras_file, include_optimizer=False)
print('Saved baseline model to:', keras_file)

Baseline test accuracy: 0.9589999914169312
Saved baseline model to: C:\Users\Jay\AppData\Local\Temp\tmppjsfuksa.h5


  tf.keras.models.save_model(model, keras_file, include_optimizer=False)


In [38]:
quantize_model = tfmot.quantization.keras.quantize_model

# q_aware stands for for quantization aware.
q_aware_model = quantize_model(model)

# `quantize_model` requires a recompile.
q_aware_model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

q_aware_model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 quantize_layer_2 (Quantize  (None, 28, 28)            3         
 Layer)                                                          
                                                                 
 quant_reshape_3 (QuantizeW  (None, 28, 28, 1)         1         
 rapperV2)                                                       
                                                                 
 quant_conv2d_3 (QuantizeWr  (None, 26, 26, 12)        147       
 apperV2)                                                        
                                                                 
 quant_max_pooling2d_3 (Qua  (None, 13, 13, 12)        1         
 ntizeWrapperV2)                                                 
                                                                 
 quant_flatten_3 (QuantizeW  (None, 2028)             

In [49]:
q_aware_model.fit(train_images, train_labels,
                  batch_size=500, epochs=1, validation_split=0.1)



<keras.src.callbacks.History at 0x192232810d0>

In [50]:
_, q_aware_model_accuracy = q_aware_model.evaluate(
   test_images, test_labels, verbose=0)

In [51]:
#양자화 후 tf 라이트파일로 변환
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]

quantized_tflite_model = converter.convert()

INFO:tensorflow:Assets written to: C:\Users\Jay\AppData\Local\Temp\tmplhfsibod\assets


INFO:tensorflow:Assets written to: C:\Users\Jay\AppData\Local\Temp\tmplhfsibod\assets


In [52]:
interpreter = tf.lite.Interpreter(model_content=quantized_tflite_model)
interpreter.allocate_tensors()

test_accuracy = evaluate_model(interpreter)

print('Quant TFLite test_accuracy:', test_accuracy)
print('Quant TF test accuracy:', q_aware_model_accuracy)

Evaluated on 0 results so far.
Evaluated on 1000 results so far.
Evaluated on 2000 results so far.
Evaluated on 3000 results so far.
Evaluated on 4000 results so far.
Evaluated on 5000 results so far.
Evaluated on 6000 results so far.
Evaluated on 7000 results so far.
Evaluated on 8000 results so far.
Evaluated on 9000 results so far.


Quant TFLite test_accuracy: 0.9728
Quant TF test accuracy: 0.9728000164031982


In [53]:
# Create float TFLite model.
float_converter = tf.lite.TFLiteConverter.from_keras_model(model)
float_tflite_model = float_converter.convert()

# Measure sizes of models.
_, float_file = tempfile.mkstemp('.tflite')
_, quant_file = tempfile.mkstemp('.tflite')

with open(quant_file, 'wb') as f:
  f.write(quantized_tflite_model)

with open(float_file, 'wb') as f:
  f.write(float_tflite_model)

INFO:tensorflow:Assets written to: C:\Users\Jay\AppData\Local\Temp\tmp0yz4u9mk\assets


INFO:tensorflow:Assets written to: C:\Users\Jay\AppData\Local\Temp\tmp0yz4u9mk\assets


In [54]:
# 파일 크기를 정수로 변환하여 출력
keras_file_size = int(get_gzipped_model_size(keras_file))
quant_file_size = int(get_gzipped_model_size(quant_file))

print("Size of gzipped baseline Keras model: %d bytes" % keras_file_size)
print("Size of gzipped Quantized model: %d bytes" % quant_file_size)


Size of gzipped baseline Keras model: 78147 bytes
Size of gzipped Quantized model: 18715 bytes


In [55]:
#압축률 = (압축 전 데이터 크기) / (압축 후 데이터 크기)
quant_file_compression_rate = (keras_file_size / quant_file_size)
print("compression rate of Quantized model: ", quant_file_compression_rate)

compression rate of Quantized model:  4.175634517766498


In [56]:
print('Baseline test accuracy:', baseline_model_accuracy) 
print('Quant TF test accuracy:', q_aware_model_accuracy)

Baseline test accuracy: 0.9589999914169312
Quant TF test accuracy: 0.9728000164031982


In [57]:
# 백분율 정확도 차이(Percentage Accuracy Difference) = [(전체 정확도 - 정확도 차이) / 전체 정확도] * 100%
PAD = (baseline_model_accuracy - (baseline_model_accuracy - q_aware_model_accuracy)) / baseline_model_accuracy * 100

In [58]:
PAD

101.4390015755764