In [10]:
# 라이브러리 및 모듈 설치
import tempfile
import os
import zipfile
import tensorflow as tf
import tensorflow_model_optimization as tfmot
import numpy as np
from tensorflow import keras

In [11]:
# 모델 크기 계산 함수
def get_gzipped_model_size(file):
  # Returns size of gzipped model, in bytes.

  _, zipped_file = tempfile.mkstemp('.zip')
  with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
    f.write(file)

  return os.path.getsize(zipped_file)

def evaluate_model(interpreter):
  input_index = interpreter.get_input_details()[0]["index"]
  output_index = interpreter.get_output_details()[0]["index"]

  # Run predictions on every image in the "test" dataset.
  prediction_digits = []
  for i, test_image in enumerate(test_images):
    if i % 1000 == 0:
      print('Evaluated on {n} results so far.'.format(n=i))
    # Pre-processing: add batch dimension and convert to float32 to match with
    # the model's input data format.
    test_image = np.expand_dims(test_image, axis=0).astype(np.float32)
    interpreter.set_tensor(input_index, test_image)

    # Run inference.
    interpreter.invoke()

    # Post-processing: remove batch dimension and find the digit with highest
    # probability.
    output = interpreter.tensor(output_index)
    digit = np.argmax(output()[0])
    prediction_digits.append(digit)

  print('\n')
  # Compare prediction results with ground truth labels to calculate accuracy.
  prediction_digits = np.array(prediction_digits)
  accuracy = (prediction_digits == test_labels).mean()
  return accuracy

In [12]:
# Load MNIST dataset
mnist = keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Normalize the input image so that each pixel value is between 0 and 1.
train_images = train_images / 255.0
test_images = test_images / 255.0

#모델 로드
model = tf.keras.models.load_model("my_model.h5")

In [13]:
# 모델을 사용하여 테스트 데이터에 대한 예측 수행
predictions = model.predict(test_images)

# 예측 결과를 실제 레이블과 비교하여 정확도 계산
accuracy = (predictions.argmax(axis=1) == test_labels).mean()
print("로드된 모델의 정확도:", accuracy)

로드된 모델의 정확도: 0.9566


In [14]:
prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

# Compute end step to finish pruning after 2 epochs.
batch_size = 128
epochs = 2
validation_split = 0.1 # 10% of training set will be used for validation set. 

num_images = train_images.shape[0] * (1 - validation_split)
end_step = np.ceil(num_images / batch_size).astype(np.int32) * epochs

# Define model for pruning.
pruning_params = {
      'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.50,
                                                               final_sparsity=0.80,
                                                               begin_step=0,
                                                               end_step=end_step)
}

model_for_pruning = prune_low_magnitude(model, **pruning_params)

# `prune_low_magnitude` requires a recompile.
model_for_pruning.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
model_for_pruning.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 prune_low_magnitude_reshap  (None, 28, 28, 1)         1         
 e (PruneLowMagnitude)                                           
                                                                 
 prune_low_magnitude_conv2d  (None, 26, 26, 12)        230       
  (PruneLowMagnitude)                                            
                                                                 
 prune_low_magnitude_max_po  (None, 13, 13, 12)        1         
 oling2d (PruneLowMagnitude                                      
 )                                                               
                                                                 
 prune_low_magnitude_flatte  (None, 2028)              1         
 n (PruneLowMagnitude)                                           
                                                        

Trainable params: 20410 (79.73 KB)
Non-trainable params: 20395 (79.69 KB)
_________________________________________________________________


In [15]:
logdir = tempfile.mkdtemp()

callbacks = [
  tfmot.sparsity.keras.UpdatePruningStep(),
  tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
]
model_for_pruning.fit(train_images, train_labels,
                  batch_size=batch_size, epochs=epochs, validation_split=validation_split,
                  callbacks=callbacks)

Epoch 1/2
Epoch 2/2


<keras.src.callbacks.History at 0x2715310a710>

In [16]:
_, model_for_pruning_accuracy = model_for_pruning.evaluate(
   test_images, test_labels, verbose=0)
print('Baseline test accuracy:', accuracy)
print('Pruned test accuracy:', model_for_pruning_accuracy)

Baseline test accuracy: 0.9566
Pruned test accuracy: 0.9672999978065491


In [17]:
model_for_pruning = tfmot.sparsity.keras.strip_pruning(model_for_pruning)

# 프루닝 모델 저장
model_for_pruning.save("purned_model.h5")



In [21]:
# 파일 크기를 정수로 변환하여 출력
keras_file_size = int(get_gzipped_model_size("my_model.h5"))
pruned_keras_file_size = int(get_gzipped_model_size("purned_model.h5"))
print("Size of gzipped baseline Keras model: %d bytes" % keras_file_size)
print("Size of gzipped pruned Keras model: %d bytes" % pruned_keras_file_size)

Size of gzipped baseline Keras model: 233550 bytes
Size of gzipped pruned Keras model: 25596 bytes


In [None]:
quantize_model = tfmot.quantization.keras.quantize_model

# q_aware stands for for quantization aware.
q_aware_model = quantize_model(model)

# `quantize_model` requires a recompile.
q_aware_model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

q_aware_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 quantize_layer_1 (Quantize  (None, 28, 28)            3         
 Layer)                                                          
                                                                 
 quant_reshape (QuantizeWra  (None, 28, 28, 1)         1         
 pperV2)                                                         
                                                                 
 quant_conv2d (QuantizeWrap  (None, 26, 26, 12)        147       
 perV2)                                                          
                                                                 
 quant_max_pooling2d (Quant  (None, 13, 13, 12)        1         
 izeWrapperV2)                                                   
                                                                 
 quant_flatten (QuantizeWra  (None, 2028)              1

In [None]:
q_aware_model.fit(train_images, train_labels,
                  batch_size=500, epochs=1, validation_split=0.1)



<keras.src.callbacks.History at 0x29a5b46ed50>

In [None]:
_, q_aware_model_accuracy = q_aware_model.evaluate(
   test_images, test_labels, verbose=0)
print('q test accuracy:', q_aware_model_accuracy)

q test accuracy: 0.9677000045776367


In [None]:
#양자화 후 tf 라이트파일로 변환
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]

quantized_tflite_model = converter.convert()

INFO:tensorflow:Assets written to: C:\Users\Jay\AppData\Local\Temp\tmp9r08_2gc\assets


INFO:tensorflow:Assets written to: C:\Users\Jay\AppData\Local\Temp\tmp9r08_2gc\assets


In [None]:
interpreter = tf.lite.Interpreter(model_content=quantized_tflite_model)
interpreter.allocate_tensors()

test_accuracy = evaluate_model(interpreter)

print('Quant TFLite test_accuracy:', test_accuracy)
print('Quant TF test accuracy:', q_aware_model_accuracy)

Evaluated on 0 results so far.
Evaluated on 1000 results so far.
Evaluated on 2000 results so far.
Evaluated on 3000 results so far.
Evaluated on 4000 results so far.
Evaluated on 5000 results so far.
Evaluated on 6000 results so far.
Evaluated on 7000 results so far.
Evaluated on 8000 results so far.
Evaluated on 9000 results so far.


Quant TFLite test_accuracy: 0.9652
Quant TF test accuracy: 0.9652000069618225


In [None]:
# 양자화 모델 저장
_, quantized_keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(quantized_tflite_model, quantized_keras_file, include_optimizer=False)
print('Saved quantized Keras model to:', quantized_keras_file)

NameError: name 'quantized_tflite_model' is not defined

In [None]:
# 프루닝 모델 저장
q_aware_model.save("quantized_model.h5")

In [None]:
# Create float TFLite model.
float_converter = tf.lite.TFLiteConverter.from_keras_model(model)
float_tflite_model = float_converter.convert()

# Measure sizes of models.
_, float_file = tempfile.mkstemp('.tflite')
_, quant_file = tempfile.mkstemp('.tflite')

with open(quant_file, 'wb') as f:
  f.write(quantized_tflite_model)

with open(float_file, 'wb') as f:
  f.write(float_tflite_model)

INFO:tensorflow:Assets written to: C:\Users\Jay\AppData\Local\Temp\tmp9eywa01t\assets


INFO:tensorflow:Assets written to: C:\Users\Jay\AppData\Local\Temp\tmp9eywa01t\assets


In [None]:
# 파일 크기를 정수로 변환하여 출력
keras_file_size = int(get_gzipped_model_size("my_model.h5"))
pruned_keras_file_size = int(get_gzipped_model_size("purned_model.h5"))
quant_file_size = int(get_gzipped_model_size("quantized_model.h5"))
print("Size of gzipped baseline Keras model: %d bytes" % keras_file_size)
print("Size of gzipped pruned Keras model: %d bytes" % pruned_keras_file_size)
print("Size of gzipped Quantized model: %d bytes" % quant_file_size)

Size of gzipped baseline Keras model: 233012 bytes
Size of gzipped pruned Keras model: 25595 bytes
Size of gzipped Quantized model: 183235 bytes


In [None]:
# 파일 크기를 정수로 변환하여 출력
keras_file_size = int(get_gzipped_model_size("my_model.h5"))
pruned_keras_file_size = int(get_gzipped_model_size("purned_model.h5"))
quant_file_size = int(get_gzipped_model_size("quantized_mdel.h5"))
#압축률 = (압축 전 데이터 크기) / (압축 후 데이터 크기)
pruned_keras_compression_rate = ((keras_file_size / pruned_keras_file_size))
quant_file_compression_rate = (keras_file_size / quant_file_size)
#원본 모델 비교
PADP = (baseline_model_accuracy - (baseline_model_accuracy - model_for_pruning_accuracy)) / baseline_model_accuracy * 100
PADQ = (baseline_model_accuracy - (baseline_model_accuracy - q_aware_model_accuracy)) / baseline_model_accuracy * 100

FileNotFoundError: [WinError 2] 지정된 파일을 찾을 수 없습니다: 'quantized_mdel.h5'

In [None]:
print('-------------------------------------------------------------')
print('모델 사이즈')
print('-------------------------------------------------------------')
print("Size of gzipped baseline Keras model: %d bytes" % keras_file_size)
print("Size of gzipped pruned Keras model: %d bytes" % pruned_keras_file_size)
print("Size of gzipped Quantized model: %d bytes" % quant_file_size)
print('-------------------------------------------------------------')
print('모델 압축률')
print('-------------------------------------------------------------')
print("compression rate of pruned Keras model: %d" % pruned_keras_compression_rate)
print("compression rate of Quantized model: ", quant_file_compression_rate)
print('-------------------------------------------------------------')
print('모델 최적화 성능')
print('-------------------------------------------------------------')
print('Baseline test accuracy:', baseline_model_accuracy)
print('Pruned test accuracy:', model_for_pruning_accuracy)
print('Quant TF test accuracy:', q_aware_model_accuracy)
print('-------------------------------------------------------------')
print('모델 최적화 정확도 비교리포트')
print('-------------------------------------------------------------')
print('가지치기 비교 정확도 :', PADP)
print('양자화 비교 정확도 :', PADQ)
print('-------------------------------------------------------------')

-------------------------------------------------------------
모델 사이즈
-------------------------------------------------------------


NameError: name 'keras_file_size' is not defined