In [1]:
# 라이브러리 및 모듈 설치
import tempfile
import os
import zipfile
import tensorflow as tf
import tensorflow_model_optimization as tfmot
import numpy as np
from tensorflow import keras
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical

In [2]:
# 모델 크기 계산 함수 
def get_gzipped_model_size(file):
  _, zipped_file = tempfile.mkstemp('.zip')
  with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
    f.write(file)
  return os.path.getsize(zipped_file)

In [3]:
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()
train_images, test_images = train_images / 255.0, test_images / 255.0
model = tf.keras.models.load_model("my_model2.h5")
_, accuracy = model.evaluate(
   test_images, test_labels, verbose=0)

In [4]:
# 모델 가충치 가지치기 기능 실행
prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude
batch_size = 128
epochs = 4
validation_split = 0.1
num_images = train_images.shape[0] * (1 - validation_split)
end_step = np.ceil(num_images / batch_size).astype(np.int32) * epochs
pruning_params = {
      'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.50,
                                                               final_sparsity=0.80,
                                                               begin_step=0,
                                                               end_step=end_step)
}
model_for_pruning = prune_low_magnitude(model, **pruning_params)
model_for_pruning.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
logdir = tempfile.mkdtemp()
callbacks = [
  tfmot.sparsity.keras.UpdatePruningStep(),
  tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
]
model_for_pruning.fit(train_images, train_labels,
                  batch_size=batch_size, epochs=epochs, validation_split=validation_split,
                  callbacks=callbacks)
_, model_for_pruning_accuracy = model_for_pruning.evaluate(
   test_images, test_labels, verbose=0)
model_for_export = tfmot.sparsity.keras.strip_pruning(model_for_pruning)
model_for_pruning.summary()

Epoch 1/4


  output, from_logits = _get_logits(


Epoch 2/4
Epoch 3/4
Epoch 4/4
Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 prune_low_magnitude_conv2d  (None, 30, 30, 32)        1762      
 _6 (PruneLowMagnitude)                                          
                                                                 
 prune_low_magnitude_max_po  (None, 15, 15, 32)        1         
 oling2d_4 (PruneLowMagnitu                                      
 de)                                                             
                                                                 
 prune_low_magnitude_conv2d  (None, 13, 13, 64)        36930     
 _7 (PruneLowMagnitude)                                          
                                                                 
 prune_low_magnitude_max_po  (None, 6, 6, 64)          1         
 oling2d_5 (PruneLowMagnitu                                      
 de)                    

In [5]:
# 가중치 가지치기 기능 적용 모델 저장
model_for_pruning.save("purned_model2.h5")

  saving_api.save_model(


In [6]:
# 모델 양자화 기능 실행
quantize_model = tfmot.quantization.keras.quantize_model
q_aware_model = quantize_model(model)
q_aware_model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
q_aware_model.fit(train_images, train_labels,
                  batch_size=500, epochs=4, validation_split=0.1)
_, q_aware_model_accuracy = q_aware_model.evaluate(
   test_images, test_labels, verbose=0)
q_aware_model.summary()

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 quantize_layer (QuantizeLa  (None, 32, 32, 3)         3         
 yer)                                                            
                                                                 
 quant_conv2d_6 (QuantizeWr  (None, 30, 30, 32)        963       
 apperV2)                                                        
                                                                 
 quant_max_pooling2d_4 (Qua  (None, 15, 15, 32)        1         
 ntizeWrapperV2)                                                 
                                                                 
 quant_conv2d_7 (QuantizeWr  (None, 13, 13, 64)        18627     
 apperV2)                                                        
                                                                 
 quant_max_poo

In [7]:
# 양자화 기능 적용 모델 저장
q_aware_model.save("quantized_model2.h5")

In [8]:
# 기능 매트릭 정리 
# 파일 크기를 정수로 변환하여 출력
keras_file_size = int(get_gzipped_model_size("my_model2.h5"))
pruned_keras_file_size = int(get_gzipped_model_size("purned_model2.h5"))
quant_file_size = int(get_gzipped_model_size("quantized_model2.h5"))
# 압축률 = (압축 전 데이터 크기) / (압축 후 데이터 크기)
pruned_keras_compression_rate = ((keras_file_size / pruned_keras_file_size))
quant_file_compression_rate = (keras_file_size / quant_file_size)
pruned_keras_compression_rate = "{:.2f}".format(pruned_keras_compression_rate)
quant_file_compression_rate = "{:.2f}".format(quant_file_compression_rate)
# 원본 모델 비교
PADP = (accuracy - (accuracy - model_for_pruning_accuracy)) / accuracy * 100
if PADP >= 100:
    PADP = 100
P = "{:.2f}".format(PADP)
PADQ = (accuracy - (accuracy - q_aware_model_accuracy)) / accuracy * 100
if PADQ >= 100:
    PADQ = 100
Q = "{:.2f}".format(PADQ)

In [9]:
# 각 모델간(원본, 가중치 가지치기, 양자화) 성능비교 매트릭 출력
print('모델 사이즈---------------------------------------------------')
print("Size of gzipped baseline Keras model: %d bytes" % keras_file_size)
print("Size of gzipped pruned Keras model: %d bytes" % pruned_keras_file_size)
print("Size of gzipped Quantized model: %d bytes" % quant_file_size)
print('모델 압축률---------------------------------------------------')
print("compression rate of pruned Keras model: ", pruned_keras_compression_rate)
print("compression rate of Quantized model: ", quant_file_compression_rate)
print('모델 최적화 성능----------------------------------------------')
print('Baseline test accuracy:', accuracy)
print('Pruned test accuracy:', model_for_pruning_accuracy)
print('Quant test accuracy:', q_aware_model_accuracy)
print('모델 최적화 비교 정확도----------------------------------------')
print('가지치기 비교 정확도 :', P,"%")
print('양자화 비교 정확도 :', Q,"%")

모델 사이즈---------------------------------------------------
Size of gzipped baseline Keras model: 1319277 bytes
Size of gzipped pruned Keras model: 721579 bytes
Size of gzipped Quantized model: 836555 bytes
모델 압축률---------------------------------------------------
compression rate of pruned Keras model:  1.83
compression rate of Quantized model:  1.58
모델 최적화 성능----------------------------------------------
Baseline test accuracy: 0.6740000247955322
Pruned test accuracy: 0.6980000138282776
Quant test accuracy: 0.7024999856948853
모델 최적화 비교 정확도----------------------------------------
가지치기 비교 정확도 : 100.00 %
양자화 비교 정확도 : 100.00 %
