In [1]:

import tensorflow as tf

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, x_test = x_train.astype('float32'), x_test.astype('float32')
x_train, x_test = x_train.reshape([-1, 784]), x_test.reshape([-1, 784])
x_train, x_test = x_train / 255., x_test / 255.

learning_rate_RMSProp = 0.02
learning_rate_GradientDescent = 0.5
num_epochs = 100         # 반복횟수
batch_size = 256
display_step = 1         # 몇 Step마다 log를 출력할지 결정합니다.
input_size = 784         # MNIST 데이터 input (이미지 크기: 28*28)
hidden1_size = 128       # 첫번째 히든레이어의 노드 개수
hidden2_size = 64        # 두번째 히든레이어의 노드 개수

train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.shuffle(60000).batch(batch_size)

class AutoEncoder(object):
    def __init__(self):
        self.Wh_1 = tf.Variable(tf.random.normal([input_size, hidden1_size]))
        self.bh_1 = tf.Variable(tf.random.normal([hidden1_size]))
        self.Wh_2 = tf.Variable(tf.random.normal([hidden1_size, hidden2_size]))
        self.bh_2 = tf.Variable(tf.random.normal([hidden2_size]))
        self.Wh_3 = tf.Variable(tf.random.normal([hidden2_size, hidden1_size]))
        self.bh_3 = tf.Variable(tf.random.normal([hidden1_size]))
        self.Wo = tf.Variable(tf.random.normal([hidden1_size, input_size]))
        self.bo = tf.Variable(tf.random.normal([input_size]))

    def __call__(self, x):
        H1_output = tf.nn.sigmoid(tf.matmul(x, self.Wh_1) + self.bh_1)
        H2_output = tf.nn.sigmoid(tf.matmul(H1_output, self.Wh_2) + self.bh_2)
        H3_output = tf.nn.sigmoid(tf.matmul(H2_output, self.Wh_3) + self.bh_3)
        X_reconstructed = tf.nn.sigmoid(tf.matmul(H3_output, self.Wo) + self.bo)

        return X_reconstructed, H2_output

class SoftmaxClassifier(object):
    def __init__(self):
        self.W_softmax = tf.Variable(tf.zeros([hidden2_size, 10]))  # 원본 MNIST 이미지(784) 대신 오토인코더의 압축된 특징(64)을 입력값으로 받습니다.
        self.b_softmax = tf.Variable(tf.zeros([10]))

    def __call__(self, x):
        y_pred = tf.nn.softmax(tf.matmul(x, self.W_softmax) + self.b_softmax)

        return y_pred

@tf.function
def pretraining_mse_loss(y_pred, y_true):
    return tf.reduce_mean(tf.pow(y_true - y_pred, 2)) # MSE(Mean of Squared Error) 손실함수

@tf.function
def finetuning_cross_entropy_loss(y_pred_softmax, y):
    return tf.reduce_mean(-tf.reduce_sum(y * tf.math.log(y_pred_softmax), axis=[1]))     # cross-entropy loss 함수

pretraining_optimizer = tf.optimizers.RMSprop(learning_rate_RMSProp, epsilon=1e-10)
@tf.function
def pretraining_train_step(autoencoder_model, x):
    y_true = x
    with tf.GradientTape() as tape:
        y_pred, _ = autoencoder_model(x)
        pretraining_loss = pretraining_mse_loss(y_pred, y_true)
    gradients = tape.gradient(pretraining_loss, vars(autoencoder_model).values())
    pretraining_optimizer.apply_gradients(zip(gradients, vars(autoencoder_model).values()))

finetuning_optimizer = tf.optimizers.SGD(learning_rate_GradientDescent)
@tf.function
def finetuning_train_step(autoencoder_model, softmax_classifier_model, x, y):
    with tf.GradientTape() as tape:
        y_pred, extracted_features = autoencoder_model(x)
        y_pred_softmax = softmax_classifier_model(extracted_features)
        finetuning_loss = finetuning_cross_entropy_loss(y_pred_softmax, y)
    autoencoder_encoding_variables = [autoencoder_model.Wh_1, autoencoder_model.bh_1, autoencoder_model.Wh_2, autoencoder_model.bh_2]
    gradients = tape.gradient(finetuning_loss, autoencoder_encoding_variables + list(vars(softmax_classifier_model).values()))
    finetuning_optimizer.apply_gradients(zip(gradients, autoencoder_encoding_variables + list(vars(softmax_classifier_model).values())))

@tf.function
def compute_accuracy(y_pred_softmax, y):
    correct_prediction = tf.equal(tf.argmax(y_pred_softmax,1), tf.argmax(y,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    return accuracy

AutoEncoder_model = AutoEncoder()
SoftmaxClassifier_model = SoftmaxClassifier()

for epoch in range(num_epochs):
    for batch_x, _ in train_data:
        _, pretraining_loss_print = pretraining_train_step(AutoEncoder_model, batch_x), pretraining_mse_loss(AutoEncoder_model(batch_x)[0], batch_x)
    if epoch % display_step == 0:
        print("반복(Epoch): %d, Pre-Training 손실 함수(pretraining_loss): %f" % ((epoch + 1), pretraining_loss_print))
print("Step 1 : MNIST 데이터 재구축을 위한 오토인코더 최적화 완료(Pre-Training)")

for epoch in range(num_epochs + 100):
    for batch_x, batch_y in train_data:
        batch_y = tf.one_hot(batch_y, depth=10)
        _, finetuning_loss_print = finetuning_train_step(AutoEncoder_model, SoftmaxClassifier_model, batch_x, batch_y), finetuning_cross_entropy_loss(SoftmaxClassifier_model(AutoEncoder_model(batch_x)[1]), batch_y)
    if epoch % display_step == 0:
        print("반복(Epoch): %d, Fine-tuning 손실 함수(finetuning_loss): %f" % ((epoch + 1), finetuning_loss_print))
print("Step 2 : MNIST 데이터 분류를 위한 오토인코더+Softmax 분류기 최적화 완료(Fine-Tuning)")

print("정확도(오토인코더+Softmax 분류기): %f" % compute_accuracy(SoftmaxClassifier_model(AutoEncoder_model(x_test)[1]), tf.one_hot(y_test, depth=10)))  # 정확도 : 약 96%

반복(Epoch): 1, Pre-Training 손실 함수(pretraining_loss): 0.049743
반복(Epoch): 2, Pre-Training 손실 함수(pretraining_loss): 0.042911
반복(Epoch): 3, Pre-Training 손실 함수(pretraining_loss): 0.037840
반복(Epoch): 4, Pre-Training 손실 함수(pretraining_loss): 0.036213
반복(Epoch): 5, Pre-Training 손실 함수(pretraining_loss): 0.034457
반복(Epoch): 6, Pre-Training 손실 함수(pretraining_loss): 0.030195
반복(Epoch): 7, Pre-Training 손실 함수(pretraining_loss): 0.031549
반복(Epoch): 8, Pre-Training 손실 함수(pretraining_loss): 0.030624
반복(Epoch): 9, Pre-Training 손실 함수(pretraining_loss): 0.030509
반복(Epoch): 10, Pre-Training 손실 함수(pretraining_loss): 0.032453
반복(Epoch): 11, Pre-Training 손실 함수(pretraining_loss): 0.027803
반복(Epoch): 12, Pre-Training 손실 함수(pretraining_loss): 0.027496
반복(Epoch): 13, Pre-Training 손실 함수(pretraining_loss): 0.030047
반복(Epoch): 14, Pre-Training 손실 함수(pretraining_loss): 0.025582
반복(Epoch): 15, Pre-Training 손실 함수(pretraining_loss): 0.026658
반복(Epoch): 16, Pre-Training 손실 함수(pretraining_loss): 0.027495
반복(Epoch): 17, Pr

반복(Epoch): 34, Fine-tuning 손실 함수(finetuning_loss): 0.033730
반복(Epoch): 35, Fine-tuning 손실 함수(finetuning_loss): 0.074498
반복(Epoch): 36, Fine-tuning 손실 함수(finetuning_loss): 0.094151
반복(Epoch): 37, Fine-tuning 손실 함수(finetuning_loss): 0.042830
반복(Epoch): 38, Fine-tuning 손실 함수(finetuning_loss): 0.091360
반복(Epoch): 39, Fine-tuning 손실 함수(finetuning_loss): 0.048259
반복(Epoch): 40, Fine-tuning 손실 함수(finetuning_loss): 0.111204
반복(Epoch): 41, Fine-tuning 손실 함수(finetuning_loss): 0.093989
반복(Epoch): 42, Fine-tuning 손실 함수(finetuning_loss): 0.052422
반복(Epoch): 43, Fine-tuning 손실 함수(finetuning_loss): 0.133251
반복(Epoch): 44, Fine-tuning 손실 함수(finetuning_loss): 0.048688
반복(Epoch): 45, Fine-tuning 손실 함수(finetuning_loss): 0.053841
반복(Epoch): 46, Fine-tuning 손실 함수(finetuning_loss): 0.065476
반복(Epoch): 47, Fine-tuning 손실 함수(finetuning_loss): 0.124366
반복(Epoch): 48, Fine-tuning 손실 함수(finetuning_loss): 0.104252
반복(Epoch): 49, Fine-tuning 손실 함수(finetuning_loss): 0.036500
반복(Epoch): 50, Fine-tuning 손실 함수(finetun

반복(Epoch): 170, Fine-tuning 손실 함수(finetuning_loss): 0.005810
반복(Epoch): 171, Fine-tuning 손실 함수(finetuning_loss): 0.015859
반복(Epoch): 172, Fine-tuning 손실 함수(finetuning_loss): 0.025053
반복(Epoch): 173, Fine-tuning 손실 함수(finetuning_loss): 0.010521
반복(Epoch): 174, Fine-tuning 손실 함수(finetuning_loss): 0.021920
반복(Epoch): 175, Fine-tuning 손실 함수(finetuning_loss): 0.015217
반복(Epoch): 176, Fine-tuning 손실 함수(finetuning_loss): 0.015348
반복(Epoch): 177, Fine-tuning 손실 함수(finetuning_loss): 0.039522
반복(Epoch): 178, Fine-tuning 손실 함수(finetuning_loss): 0.021278
반복(Epoch): 179, Fine-tuning 손실 함수(finetuning_loss): 0.014924
반복(Epoch): 180, Fine-tuning 손실 함수(finetuning_loss): 0.014926
반복(Epoch): 181, Fine-tuning 손실 함수(finetuning_loss): 0.021195
반복(Epoch): 182, Fine-tuning 손실 함수(finetuning_loss): 0.030546
반복(Epoch): 183, Fine-tuning 손실 함수(finetuning_loss): 0.009385
반복(Epoch): 184, Fine-tuning 손실 함수(finetuning_loss): 0.009882
반복(Epoch): 185, Fine-tuning 손실 함수(finetuning_loss): 0.006874
반복(Epoch): 186, Fine-tun