In [67]:
import tensorflow as tf

In [68]:
(x_train, y_train), (x_test, y_test)= tf.keras.datasets.mnist.load_data()

In [69]:
x_train.shape

(60000, 28, 28)

In [70]:
x_train, x_test = x_train.astype('float32'), x_test.astype('float32')

In [71]:
x_train, x_test = x_train.reshape([-1, 784]), x_test.reshape([-1, 784])

In [72]:
x_train, x_test = x_train / 255., x_test / 255.

In [73]:
y_train, y_test = tf.one_hot(y_train, depth=10), tf.one_hot(y_test, depth=10)

In [74]:
learning_rate = 0.001
num_epochs = 30
batch_size = 256
display_step = 1
input_size = 784
hidden1_size = 256
hidden2_size = 256
output_size = 10

In [75]:
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))

In [76]:
train_data = train_data.shuffle(60000).batch(batch_size)

In [77]:
def random_normal_initializer_with_stddev_1():
  return tf.keras.initializers.RandomNormal(mean=0.0, stddev=1.0, seed=None)

In [84]:
class ANN(tf.keras.Model):
  def __init__(self):
    super(ANN, self).__init__()
    self.hidden_layer_1 = tf.keras.layers.Dense(hidden1_size,
                                                activation='relu', 
                                                kernel_initializer=random_normal_initializer_with_stddev_1(),
                                                bias_initializer=random_normal_initializer_with_stddev_1())
    self.hidden_layer_2 = tf.keras.layers.Dense(hidden2_size,
                                                activation='relu', 
                                                kernel_initializer=random_normal_initializer_with_stddev_1(),
                                                bias_initializer=random_normal_initializer_with_stddev_1())
    self.output_layer = tf.keras.layers.Dense(output_size,
                                                activation=None,
                                                kernel_initializer=random_normal_initializer_with_stddev_1(), 
                                                bias_initializer=random_normal_initializer_with_stddev_1() )

  def call(self, x):
    H1_output = self.hidden_layer_1(x)
    H2_output = self.hidden_layer_2(H1_output)
    logits = self.output_layer(H2_output)

    return logits

In [79]:
@tf.function
def cross_entropy_loss(logits, y):
  return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))

In [80]:
optimizer = tf.optimizers.Adam(learning_rate)

In [81]:
@tf.function
def train_step(model, x, y):
  with tf.GradientTape() as tape:
    y_pred = model(x)
    loss = cross_entropy_loss(y_pred, y)
  gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))

In [82]:
@tf.function
def compute_accuracy(y_pred, y):
  correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1))
  accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

  return accuracy

In [85]:
ANN_model = ANN()

In [86]:
for epoch in range(num_epochs):
  average_loss = 0.
  total_batch = int( x_train.shape[0] / batch_size )

  for batch_x, batch_y in train_data:
    _, current_loss = train_step(ANN_model, batch_x, batch_y), cross_entropy_loss(ANN_model(batch_x), batch_y)
    average_loss += current_loss / total_batch
  
  if epoch % display_step == 0:
    print("반복: %d, 손실: %f" % ((epoch+1), average_loss ))

print("정확도 : %f" % compute_accuracy(ANN_model(x_test), y_test))

반복: 1, 손실: 211.122955
반복: 2, 손실: 57.533615
반복: 3, 손실: 37.536491
반복: 4, 손실: 27.683788
반복: 5, 손실: 21.439924
반복: 6, 손실: 17.162769
반복: 7, 손실: 13.837636
반복: 8, 손실: 11.334475
반복: 9, 손실: 9.230560
반복: 10, 손실: 7.524875
반복: 11, 손실: 6.210339
반복: 12, 손실: 5.080812
반복: 13, 손실: 4.105843
반복: 14, 손실: 3.361346
반복: 15, 손실: 2.684852
반복: 16, 손실: 2.162977
반복: 17, 손실: 1.751295
반복: 18, 손실: 1.357668
반복: 19, 손실: 1.066137
반복: 20, 손실: 0.816950
반복: 21, 손실: 0.631774
반복: 22, 손실: 0.504491
반복: 23, 손실: 0.350409
반복: 24, 손실: 0.304011
반복: 25, 손실: 0.249523
반복: 26, 손실: 0.153356
반복: 27, 손실: 0.126595
반복: 28, 손실: 0.135526
반복: 29, 손실: 0.118347
반복: 30, 손실: 0.086562
정확도 : 0.944300
