<a href="https://colab.research.google.com/github/lala991204/DL-self-study/blob/master/tensorflow/3_12_gradient_tape_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import numpy as np

In [None]:
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train / x_train.max()
x_test = x_test / x_test.max()

# 모델 정의
model = tf.keras.Sequential([
        tf.keras.layers.Flatten(input_shape=(28,28)),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax'),
])

# 손실함수 정의
loss_function =  tf.keras.losses.SparseCategoricalCrossentropy()

# optimizer 정의
optimizer = tf.keras.optimizers.Adam()

# 기록을 위한 Metirc 정의
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
valid_loss = tf.keras.metrics.Mean(name='valid_loss')
valid_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='valid_accuracy')
optimizer = tf.keras.optimizers.Adam()

# 배치 생성 함수
def get_batches(x, y, batch_size=32):
    for i in range(int(x.shape[0] // batch_size)):
        x_batch = x[i * batch_size:(i+1) * batch_size]
        y_batch = y[i * batch_size:(i+1) * batch_size]
        yield(np.asarray(x_batch), np.asarray(y_batch))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


tensorflow가 2.0 version으로 update되면서 지연 실헹(lazy execution) 모드에서 즉시 실행(eager execution) 모드가 기본으로 활성화되도록 변경되었다. 함수에 @tf.function 데코레이터를 붙여주면 텐서플로가 계산 그래프를 변환하여 지연 실행 모드로 처리된다.

In [None]:
@tf.function
def train_step(images, labels):
    # GradientTape 적용
    with tf.GradientTape() as tape:
        # 예측
        prediction = model(images, training=True)
        # 손실
        loss = loss_function(labels, prediction)
        # 미분(gradient) 값 계산
        gradients = tape.gradient(loss, model.trainable_variables)
        # optimizer 적용
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))    # 이전에 구한 gradient와 trainable_variables를 zip으로 묶어 대입해 gradient 갱신 
        # loss, accuracy 계산
        train_loss(loss)
        train_accuracy(labels, prediction)

@tf.function
def valid_step(images, labels):
    # 예측
    prediction = model(images, training=False)     # gradient가 갱신되지 않도록 설정함.
    # 손실
    loss = loss_function(labels, prediction)

    # loss, accuracy 
    valid_loss(loss)
    valid_accuracy(labels, prediction)

In [None]:
# 초기화 코드
train_loss.reset_states()
train_accuracy.reset_states()
valid_loss.reset_states()
valid_accuracy.reset_states()

# epoch 반복
for epoch in range(5):
    # batch별 순회
    for images, labels in get_batches(x_train, y_train):
        # train_step
        train_step(images, labels)

    for images, labels in get_batches(x_test, y_test):
        # valid_step
        valid_step(images, labels)

    # result
    metric_template = 'epoch: {}, loss: {:.4f}, acc: {:.2f}%, val_loss:{:.4f}, val_acc: {:.2f}%'
    print(metric_template.format(epoch+1, train_loss.result(), train_accuracy.result()*100, \
                                 valid_loss.result(), valid_accuracy.result()*100))

epoch: 1, loss: 0.0475, acc: 98.58%, val_loss:0.1124, val_acc: 96.94%
epoch: 2, loss: 0.0434, acc: 98.65%, val_loss:0.1018, val_acc: 97.22%
epoch: 3, loss: 0.0393, acc: 98.76%, val_loss:0.0943, val_acc: 97.41%
epoch: 4, loss: 0.0363, acc: 98.84%, val_loss:0.0943, val_acc: 97.52%
epoch: 5, loss: 0.0339, acc: 98.92%, val_loss:0.0971, val_acc: 97.55%
