In [1]:
import tempfile
import os
import zipfile
import tensorflow as tf
import tensorflow_model_optimization as tfmot
import numpy as np
from tensorflow import keras
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
# 모델 크기 계산 함수 
def get_gzipped_model_size(file):
  _, zipped_file = tempfile.mkstemp('.zip')
  with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
    f.write(file)
  return os.path.getsize(zipped_file)

In [3]:
# IMDb 데이터 다운로드 및 전처리
max_words = 10000  # 가장 자주 등장하는 상위 10,000개 단어만 사용
max_len = 200  # 리뷰의 최대 길이

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_words)
x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

print("x_test shape:", x_test.shape)
print("y_test shape:", y_test.shape)



x_test shape: (25000, 200)
y_test shape: (25000,)


In [4]:
model = tf.keras.models.load_model("my_model_RNN.h5")
_, accuracy = model.evaluate(
   x_test, y_test, verbose=0)

In [None]:
# 모델 가중치 가지치기 기능 실행
prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude
batch_size = 128
epochs = 4
validation_split = 0.1
num_images = x_train.shape[0] * (1 - validation_split)
end_step = np.ceil(num_images / batch_size).astype(np.int32) * epochs
pruning_params = {
    'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.50,
                                                             final_sparsity=0.80,
                                                             begin_step=0,
                                                             end_step=end_step)
}
model_for_pruning = prune_low_magnitude(model, **pruning_params)
model_for_pruning.compile(optimizer='adam',
                          loss='binary_crossentropy',
                          metrics=['accuracy'])
logdir = tempfile.mkdtemp()
callbacks = [
    tfmot.sparsity.keras.UpdatePruningStep(),
    tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
]
model_for_pruning.fit(x_train, y_train,
                      batch_size=batch_size, epochs=epochs, validation_split=validation_split,
                      callbacks=callbacks)
_, model_for_pruning_accuracy = model_for_pruning.evaluate(x_test, y_test, verbose=0)
model_for_export = tfmot.sparsity.keras.strip_pruning(model_for_pruning)
model_for_pruning.summary()

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 prune_low_magnitude_embedd  (None, 200, 128)          2560002   
 ing (PruneLowMagnitude)                                         
                                                                 
 prune_low_magnitude_lstm (  (None, 64)                98563     
 PruneLowMagnitude)                                              
                                                                 
 prune_low_magnitude_dense_  (None, 1)                 131       
 4 (PruneLowMagnitude)                                           
                                                                 
Total params: 2658696 (10.14 MB)
Trainable params: 1329473 (5.07 MB)
Non-trainable params: 1329223 (5.07 MB)
_________________________________________________________________


In [None]:
# 가중치 가지치기 기능 적용 모델 저장
model_for_pruning.save("purned_model_RNN.h5")

  saving_api.save_model(


ValueError: Unable to create dataset (name already exists)

ValueError: Unable to create dataset (name already exists)

In [None]:
# 모델 양자화 기능 실행
quantize_model = tfmot.quantization.keras.quantize_model
q_aware_model = quantize_model(model_for_export)
q_aware_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
q_aware_model.fit(x_train, y_train, batch_size=128, epochs=4, validation_split=0.1)  # 배치 크기를 원래 크기로 설정
_, q_aware_model_accuracy = q_aware_model.evaluate(x_test, y_test, verbose=0)
q_aware_model.summary()

Epoch 1/4


  output, from_logits = _get_logits(


Epoch 2/4
Epoch 3/4
Epoch 4/4
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 quantize_layer_10 (Quantiz  (None, 13)                3         
 eLayer)                                                         
                                                                 
 quant_dense (QuantizeWrapp  (None, 128)               1797      
 erV2)                                                           
                                                                 
 quant_dense_1 (QuantizeWra  (None, 64)                8261      
 pperV2)                                                         
                                                                 
 quant_dense_2 (QuantizeWra  (None, 32)                2085      
 pperV2)                                                         
                                                                 
 quant_dense_3 (QuantizeWr

In [None]:
# 양자화 기능 적용 모델 저장
q_aware_model.save("quantized_model_RNN.h5")

  saving_api.save_model(


In [None]:
# 기능 매트릭 정리 
# 파일 크기를 정수로 변환하여 출력
keras_file_size = int(get_gzipped_model_size("my_model_RNN.h5"))
pruned_keras_file_size = int(get_gzipped_model_size("purned_model_RNN.h5"))
quant_file_size = int(get_gzipped_model_size("quantized_model_RNN.h5"))
# 압축률 = (압축 전 데이터 크기) / (압축 후 데이터 크기)
pruned_keras_compression_rate = ((keras_file_size / pruned_keras_file_size))
quant_file_compression_rate = (keras_file_size / quant_file_size)
pruned_keras_compression_rate = "{:.2f}".format(pruned_keras_compression_rate)
quant_file_compression_rate = "{:.2f}".format(quant_file_compression_rate)
# 원본 모델 비교
PADP = (accuracy - (accuracy - model_for_pruning_accuracy)) / accuracy * 100
if PADP >= 100:
    PADP = 100
P = "{:.2f}".format(PADP)
PADQ = (accuracy - (accuracy - q_aware_model_accuracy)) / accuracy * 100
if PADQ >= 100:
    PADQ = 100
Q = "{:.2f}".format(PADQ)

In [None]:
# 각 모델간(원본, 가중치 가지치기, 양자화) 성능비교 매트릭 출력
print('모델 사이즈---------------------------------------------------')
print("Size of gzipped baseline Keras model: %d bytes" % keras_file_size)
print("Size of gzipped pruned Keras model: %d bytes" % pruned_keras_file_size)
print("Size of gzipped Quantized model: %d bytes" % quant_file_size)
print('모델 압축률---------------------------------------------------')
print("compression rate of pruned Keras model: ", pruned_keras_compression_rate)
print("compression rate of Quantized model: ", quant_file_compression_rate)
print('모델 최적화 성능----------------------------------------------')
print('Baseline test accuracy:', accuracy)
print('Pruned test accuracy:', model_for_pruning_accuracy)
print('Quant test accuracy:', q_aware_model_accuracy)
print('모델 최적화 비교 정확도----------------------------------------')
print('가지치기 비교 정확도 :', P,"%")
print('양자화 비교 정확도 :', Q,"%")

모델 사이즈---------------------------------------------------
Size of gzipped baseline Keras model: 212572 bytes
Size of gzipped pruned Keras model: 212042 bytes
Size of gzipped Quantized model: 209090 bytes
모델 압축률---------------------------------------------------
compression rate of pruned Keras model:  1.00
compression rate of Quantized model:  1.02
모델 최적화 성능----------------------------------------------
Baseline test accuracy: 0.9649122953414917
Pruned test accuracy: 0.9649122953414917
Quant test accuracy: 0.9736841917037964
모델 최적화 비교 정확도----------------------------------------
가지치기 비교 정확도 : 100.00 %
양자화 비교 정확도 : 100.00 %
