In [16]:
# 라이브러리 및 모듈 설치
import tempfile
import os
import zipfile
import tensorflow as tf
import tensorflow_model_optimization as tfmot
import numpy as np
from tensorflow import keras

In [17]:
# 모델 크기 계산 함수
def get_gzipped_model_size(file):
  # Returns size of gzipped model, in bytes.

  _, zipped_file = tempfile.mkstemp('.zip')
  with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
    f.write(file)

  return os.path.getsize(zipped_file)

def evaluate_model(interpreter):
  input_index = interpreter.get_input_details()[0]["index"]
  output_index = interpreter.get_output_details()[0]["index"]

  # Run predictions on every image in the "test" dataset.
  prediction_digits = []
  for i, test_image in enumerate(test_images):
    if i % 1000 == 0:
      print('Evaluated on {n} results so far.'.format(n=i))
    # Pre-processing: add batch dimension and convert to float32 to match with
    # the model's input data format.
    test_image = np.expand_dims(test_image, axis=0).astype(np.float32)
    interpreter.set_tensor(input_index, test_image)

    # Run inference.
    interpreter.invoke()

    # Post-processing: remove batch dimension and find the digit with highest
    # probability.
    output = interpreter.tensor(output_index)
    digit = np.argmax(output()[0])
    prediction_digits.append(digit)

  print('\n')
  # Compare prediction results with ground truth labels to calculate accuracy.
  prediction_digits = np.array(prediction_digits)
  accuracy = (prediction_digits == test_labels).mean()
  return accuracy

In [18]:
# Load MNIST dataset
mnist = keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Normalize the input image so that each pixel value is between 0 and 1.
train_images = train_images / 255.0
test_images = test_images / 255.0

#모델 로드
model = tf.keras.models.load_model("my_model.h5")

In [19]:
# 모델을 사용하여 테스트 데이터에 대한 예측 수행
predictions = model.predict(test_images)

# 예측 결과를 실제 레이블과 비교하여 정확도 계산
accuracy = (predictions.argmax(axis=1) == test_labels).mean()
print("로드된 모델의 정확도:", accuracy)

  1/313 [..............................] - ETA: 22s

로드된 모델의 정확도: 0.9769


In [20]:
prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

# Compute end step to finish pruning after 2 epochs.
batch_size = 128
epochs = 4
validation_split = 0.1 # 10% of training set will be used for validation set. 

num_images = train_images.shape[0] * (1 - validation_split)
end_step = np.ceil(num_images / batch_size).astype(np.int32) * epochs

# Define model for pruning.
pruning_params = {
      'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.50,
                                                               final_sparsity=0.80,
                                                               begin_step=0,
                                                               end_step=end_step)
}

model_for_pruning = prune_low_magnitude(model, **pruning_params)

# `prune_low_magnitude` requires a recompile.
model_for_pruning.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
logdir = tempfile.mkdtemp()

callbacks = [
  tfmot.sparsity.keras.UpdatePruningStep(),
  tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
]
model_for_pruning.fit(train_images, train_labels,
                  batch_size=batch_size, epochs=epochs, validation_split=validation_split,
                  callbacks=callbacks)
_, model_for_pruning_accuracy = model_for_pruning.evaluate(
   test_images, test_labels, verbose=0)
model_for_pruning.summary()

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 prune_low_magnitude_reshap  (None, 28, 28, 1)         1         
 e_1 (PruneLowMagnitude)                                         
                                                                 
 prune_low_magnitude_conv2d  (None, 26, 26, 12)        230       
 _1 (PruneLowMagnitude)                                          
                                                                 
 prune_low_magnitude_max_po  (None, 13, 13, 12)        1         
 oling2d_1 (PruneLowMagnitu                                      
 de)                                                             
                                                                 
 prune_low_magnitude_flatte  (None, 2028)              1         
 n_1 (PruneLowMagnitude)                                         
              

In [21]:
model_for_export = tfmot.sparsity.keras.strip_pruning(model_for_pruning)

# 프루닝 모델 저장
model_for_pruning.save("purned_model.h5")

In [22]:
quantize_model = tfmot.quantization.keras.quantize_model

# q_aware stands for for quantization aware.
q_aware_model = quantize_model(model)

# `quantize_model` requires a recompile.
q_aware_model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
q_aware_model.fit(train_images, train_labels,
                  batch_size=500, epochs=4, validation_split=0.1)
_, q_aware_model_accuracy = q_aware_model.evaluate(
   test_images, test_labels, verbose=0)
print('q test accuracy:', q_aware_model_accuracy)
q_aware_model.summary()

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
q test accuracy: 0.9793000221252441
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 quantize_layer_2 (Quantize  (None, 28, 28)            3         
 Layer)                                                          
                                                                 
 quant_reshape_1 (QuantizeW  (None, 28, 28, 1)         1         
 rapperV2)                                                       
                                                                 
 quant_conv2d_1 (QuantizeWr  (None, 26, 26, 12)        147       
 apperV2)                                                        
                                                                 
 quant_max_pooling2d_1 (Qua  (None, 13, 13, 12)        1         
 ntizeWrapperV2)                                                 
                                            

In [23]:
q_aware_model.save("quantized_model.h5")

In [24]:
# 파일 크기를 정수로 변환하여 출력
keras_file_size = int(get_gzipped_model_size("my_model.h5"))
pruned_keras_file_size = int(get_gzipped_model_size("purned_model.h5"))
quant_file_size = int(get_gzipped_model_size("quantized_model.h5"))
#압축률 = (압축 전 데이터 크기) / (압축 후 데이터 크기)
pruned_keras_compression_rate = ((keras_file_size / pruned_keras_file_size))
quant_file_compression_rate = (keras_file_size / quant_file_size)
#원본 모델 비교
PADP = (accuracy - (accuracy - model_for_pruning_accuracy)) / accuracy * 100
PADQ = (accuracy - (accuracy - q_aware_model_accuracy)) / accuracy * 100

In [25]:
print('-------------------------------------------------------------')
print('모델 사이즈')
print('-------------------------------------------------------------')
print("Size of gzipped baseline Keras model: %d bytes" % keras_file_size)
print("Size of gzipped pruned Keras model: %d bytes" % pruned_keras_file_size)
print("Size of gzipped Quantized model: %d bytes" % quant_file_size)
print('-------------------------------------------------------------')
print('모델 압축률')
print('-------------------------------------------------------------')
print("compression rate of pruned Keras model: ", pruned_keras_compression_rate)
print("compression rate of Quantized model: ", quant_file_compression_rate)
print('-------------------------------------------------------------')
print('모델 최적화 성능')
print('-------------------------------------------------------------')
print('Baseline test accuracy:', accuracy)
print('Pruned test accuracy:', model_for_pruning_accuracy)
print('Quant TF test accuracy:', q_aware_model_accuracy)
print('-------------------------------------------------------------')
print('모델 최적화 정확도 비교리포트')
print('-------------------------------------------------------------')
print('가지치기 비교 정확도 :', PADP)
print('양자화 비교 정확도 :', PADQ)
print('-------------------------------------------------------------')

-------------------------------------------------------------
모델 사이즈
-------------------------------------------------------------
Size of gzipped baseline Keras model: 234324 bytes
Size of gzipped pruned Keras model: 186048 bytes
Size of gzipped Quantized model: 151059 bytes
-------------------------------------------------------------
모델 압축률
-------------------------------------------------------------
compression rate of pruned Keras model:  1.2594814241486068
compression rate of Quantized model:  1.5512084682144063
-------------------------------------------------------------
모델 최적화 성능
-------------------------------------------------------------
Baseline test accuracy: 0.9769
Pruned test accuracy: 0.9764999747276306
Quant TF test accuracy: 0.9793000221252441
-------------------------------------------------------------
모델 최적화 정확도 비교리포트
-------------------------------------------------------------
가지치기 비교 정확도 : 99.9590515638889
양자화 비교 정확도 : 100.24567735952954
----------------------