In [1]:
import tensorflow as tf

In [2]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()


In [3]:
x_train, x_test = x_train.astype('float32'), x_test.astype('float32')
x_train, x_test = x_train.reshape([-1, 784]), x_test.reshape([-1, 784])
x_train, x_test = x_train / 255., x_test / 255.

In [4]:
learning_rate_RMSProp = 0.02
learning_rate_GradientRescent = 0.5
num_epochs = 100
batch_size = 256
display_step = 1
input_size = 784
hidden1_size = 128
hidden2_size = 64

In [5]:
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.shuffle(60000).batch(batch_size)

In [6]:
def random_normal_initializer_stddev_1():
  return tf.keras.initializers.RandomNormal(mean=0.0, stddev=1.0, seed=None)

In [7]:
class AutoEncoder(tf.keras.Model):
  def __init__(self):
    super(AutoEncoder, self).__init__()
    # relu는 미분시 0~1 값을 만들어 버리는데 autoencoder 확률을 확인하는것이 아니므로 값을 버리면 안된다. 그래서 sigmoid를 사용
    self.hidden_layer_1 = tf.keras.layers.Dense(hidden1_size,
                                                activation='sigmoid',
                                                kernel_initializer=random_normal_initializer_stddev_1(),
                                                bias_initializer=random_normal_initializer_stddev_1())
    self.hidden_layer_2 = tf.keras.layers.Dense(hidden2_size,
                                                activation='sigmoid',
                                                kernel_initializer=random_normal_initializer_stddev_1(),
                                                bias_initializer=random_normal_initializer_stddev_1())
    self.hidden_layer_3 = tf.keras.layers.Dense(hidden1_size,
                                                activation='sigmoid',
                                                kernel_initializer=random_normal_initializer_stddev_1(),
                                                bias_initializer=random_normal_initializer_stddev_1())
    self.output_layer = tf.keras.layers.Dense(input_size,
                                                activation='sigmoid',
                                                kernel_initializer=random_normal_initializer_stddev_1(),
                                                bias_initializer=random_normal_initializer_stddev_1())

  def call(self, x):
    H1_output = self.hidden_layer_1(x)
    H2_output = self.hidden_layer_2(H1_output)
    H3_output = self.hidden_layer_3(H2_output)
    X_reconstructed = self.output_layer(H3_output)

    return X_reconstructed, H2_output

In [8]:
class SoftmaxClassifier(tf.keras.Model):
  def __init__(self):
    super(SoftmaxClassifier, self).__init__()
    self.softmax_layer = tf.keras.layers.Dense(10,activation='softmax', kernel_initializer='zeros', bias_initializer='zeros')

  def call(self, x):
    y_pred = self.softmax_layer(x)

    return y_pred

In [9]:
@tf.function
def pretraining_mse_loss(y_pred, y):
  return tf.reduce_mean(tf.pow(y_pred - y, 2))

@tf.function
def finetuning_cross_entropy_loss(y_pred_softmax, y):
  return tf.reduce_mean(-tf.reduce_sum(y * tf.math.log(y_pred_softmax), axis=[1]))


In [10]:
pretraining_optimizer = tf.keras.optimizers.RMSprop(learning_rate_RMSProp, epsilon=1e-10)

In [11]:
@tf.function
def pretraning_train_step(autoencoder_model, x):
  y_true = x

  with tf.GradientTape() as tape:
    y_pred, _ = autoencoder_model(x)
    pretraning_loss = pretraining_mse_loss(y_pred, y_true)
  
  gradients = tf.gradients(pretraning_loss, autoencoder_model.trainable_variables)
  pretraining_optimizer.apply_gradients(zip(gradients, autoencoder_model.trainable_variables))

In [12]:
finetuning_optimizer = tf.keras.optimizers.SGD(learning_rate_GradientRescent)

In [13]:
@tf.function
def finetuning_train_step(autoencoder_model, softmax_classifier_model, x, y):
  with tf.GradientTape() as tape:
    y_pred, extracted_feature = autoencoder_model(x)
    y_pred_softmax = softmax_classifier_model(extracted_feature)
    finetuning_loss = finetuning_cross_entropy_loss(y_pred_softmax, y)
  
  # 왜 그냥 더하지? 
  autoencoder_encoding_variables = autoencoder_model.hidden_layer_1.trainable_variables + autoencoder_model.hidden_layer_2.trainable_variables / 2
  gradients = tape.gradient(finetuning_loss, autoencoder_encoding_variables + softmax_classifier_model.trainable_variables)
  finetuning_optimizer.apply_gradients(zip(gradients, autoencoder_encoding_variables + softmax_classifier_model.trainable_variables))
  print(autoencoder_model.hidden_layer_1.trainable_variables)


In [14]:
@tf.function
def compute_accuracy(y_pred_softmax, y):
  correct_prediction = tf.equal( tf.argmax(y_pred_softmax, 1), tf.argmax(y, 1))
  accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

  return accuracy

In [15]:
Autoencoder_model = AutoEncoder()

In [16]:
SoftmaxClassifier_model = SoftmaxClassifier()

In [17]:
for epoch in range(num_epochs):
  for batch_x, _ in train_data:
    _, pretraining_loss_print = pretraning_train_step(Autoencoder_model, batch_x), pretraining_mse_loss(Autoencoder_model(batch_x)[0], batch_x)
    if epoch % display_step == 0:
      print("반복: %d, Pre-Training 손실: %f" % ((epoch+1), pretraining_loss_print))
print("Step1. 오토인코더 최적화 완료")

반복: 1, Pre-Training 손실: 0.261052
반복: 1, Pre-Training 손실: 0.183234
반복: 1, Pre-Training 손실: 0.140350
반복: 1, Pre-Training 손실: 0.119943
반복: 1, Pre-Training 손실: 0.111134
반복: 1, Pre-Training 손실: 0.104258
반복: 1, Pre-Training 손실: 0.103472
반복: 1, Pre-Training 손실: 0.102303
반복: 1, Pre-Training 손실: 0.094640
반복: 1, Pre-Training 손실: 0.098590
반복: 1, Pre-Training 손실: 0.091217
반복: 1, Pre-Training 손실: 0.094023
반복: 1, Pre-Training 손실: 0.094193
반복: 1, Pre-Training 손실: 0.087959
반복: 1, Pre-Training 손실: 0.088790
반복: 1, Pre-Training 손실: 0.088079
반복: 1, Pre-Training 손실: 0.087870
반복: 1, Pre-Training 손실: 0.086290
반복: 1, Pre-Training 손실: 0.087266
반복: 1, Pre-Training 손실: 0.084710
반복: 1, Pre-Training 손실: 0.086246
반복: 1, Pre-Training 손실: 0.087105
반복: 1, Pre-Training 손실: 0.087168
반복: 1, Pre-Training 손실: 0.084960
반복: 1, Pre-Training 손실: 0.086344
반복: 1, Pre-Training 손실: 0.085582
반복: 1, Pre-Training 손실: 0.084422
반복: 1, Pre-Training 손실: 0.084490
반복: 1, Pre-Training 손실: 0.083020
반복: 1, Pre-Training 손실: 0.081251
반복: 1, Pre

In [18]:
# for epoch in range(num_epochs):
  # for batch_x, batch_y in train_data:
  #   batch_y = tf.one_hot(batch_y, depth=10)
  #   _, finetuning_loss_print = finetuning_train_step(Autoencoder_model, SoftmaxClassifier_model, batch_x, batch_y), finetuning_cross_entropy_loss(SoftmaxClassifier_model(Autoencoder_model(batch_x)[1]), batch_y)

  #   if epoch % display_step == 0:
  #     print("반복: %d, Fine-tuning 손실 : %f" % ((epoch+1), finetuning_loss_print))
for batch_x, batch_y in train_data:
  batch_y = tf.one_hot(batch_y, depth=10)
  _, finetuning_loss_print = finetuning_train_step(Autoencoder_model, SoftmaxClassifier_model, batch_x, batch_y), finetuning_cross_entropy_loss(SoftmaxClassifier_model(Autoencoder_model(batch_x)[1]), batch_y)

  if epoch % display_step == 0:
    print("반복: %d, Fine-tuning 손실 : %f" % ((epoch+1), finetuning_loss_print))  

print("Step2. Softmax 분류기 최적화 완료")

TypeError: in user code:

    C:\Users\trans\AppData\Local\Temp/ipykernel_32528/2704568101.py:9 finetuning_train_step  *
        autoencoder_encoding_variables = autoencoder_model.hidden_layer_1.trainable_variables + autoencoder_model.hidden_layer_2.trainable_variables / 2

    TypeError: unsupported operand type(s) for /: 'list' and 'int'


In [None]:
print("정확도: %f" % ( compute_accuracy(SoftmaxClassifier_model(Autoencoder_model(x_test)[1]), tf.one_hot(y_test, depth=10) )))

정확도: 0.113500
