# Training Logic

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

np.random.seed(7777)
tf.random.set_seed(7777)

In [2]:
class Cifar10DataLoader():
    def __init__(self):
        # data load 
        (self.train_x, self.train_y), \
            (self.test_x, self.test_y) = tf.keras.datasets.cifar10.load_data()
        self.input_shape = self.train_x.shape[1:]

    def scale(self, x):

        return (x / 255.0).astype(np.float32)

    def preprocess_dataset(self, dataset):

        (feature, target) = dataset

        # scaling #
        scaled_x = np.array([self.scale(x) for x in feature])

        # label encoding #
        ohe_y = np.array([tf.keras.utils.to_categorical(
            y, num_classes=10) for y in target])
        
        return scaled_x, ohe_y.squeeze(1)

    def get_train_dataset(self):
        return self.preprocess_dataset((self.train_x, self.train_y))

    def get_test_dataset(self):
        return self.preprocess_dataset((self.test_x, self.test_y))

cifar10_loader = Cifar10DataLoader()
train_x, train_y = cifar10_loader.get_train_dataset()

print(train_x.shape, train_x.dtype)
print(train_y.shape, train_y.dtype)

test_x, test_y = cifar10_loader.get_test_dataset()

print(test_x.shape, test_x.dtype)
print(test_y.shape, test_y.dtype)

(50000, 32, 32, 3) float32
(50000, 10) float32
(10000, 32, 32, 3) float32
(10000, 10) float32


In [3]:
from tensorflow.keras.layers import Input, Conv2D, MaxPool2D, Flatten, Dense, Add

def build_resnet(input_shape):
    inputs = Input(input_shape)

    net = Conv2D(32, kernel_size=3, strides=2,
                 padding='same', activation='relu')(inputs)
    net = MaxPool2D()(net)
    
    net1 = Conv2D(64, kernel_size=1, padding='same', activation='relu')(net)
    net2 = Conv2D(64, kernel_size=3, padding='same', activation='relu')(net1)
    net3 = Conv2D(64, kernel_size=1, padding='same', activation='relu')(net2)
    
    net1_1 = Conv2D(64, kernel_size=1, padding='same')(net)
    net = Add()([net1_1, net3])
    
    net1 = Conv2D(64, kernel_size=1, padding='same', activation='relu')(net)
    net2 = Conv2D(64, kernel_size=3, padding='same', activation='relu')(net1)
    net3 = Conv2D(64, kernel_size=1, padding='same', activation='relu')(net2)
    
    net = Add()([net, net3])
    
    net = MaxPool2D()(net)
    
    net = Flatten()(net)
    net = Dense(10, activation="softmax")(net)

    model = tf.keras.Model(inputs=inputs, outputs=net, name='resnet')
    
    return model

model = build_resnet((32, 32, 3))
model.summary()

Model: "resnet"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 32, 32, 3)]  0           []                               
                                                                                                  
 conv2d (Conv2D)                (None, 16, 16, 32)   896         ['input_1[0][0]']                
                                                                                                  
 max_pooling2d (MaxPooling2D)   (None, 8, 8, 32)     0           ['conv2d[0][0]']                 
                                                                                                  
 conv2d_1 (Conv2D)              (None, 8, 8, 64)     2112        ['max_pooling2d[0][0]']          
                                                                                             

# 학습하는 과정을 직접 만들어보자!
- model.fit() 대신 학습 과정을 직접 코드로 작성
- 이 경우에는 model.compile() 과정이 필요하지 않다.
- 그래서 opt, loss, metric도 직접 실행시켜야 한다.

In [4]:
lr = 0.03
batch_size = 64

In [17]:
learning_rate = 0.03
opt = tf.keras.optimizers.Adam(learning_rate)
loss_fn = tf.keras.losses.categorical_crossentropy
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

- loss_fn 는 함수
- train_loss, train_acc 는 클래스로 만든 객체

In [6]:
loss_fn([1], [0.8])   

<tf.Tensor: shape=(), dtype=float32, numpy=1.192093e-07>

- 정답이 1, 에측값이 0.8, loss값이 1.192093e-07

In [18]:
def train_step(x, y) :
    with tf.GradientTape() as tape:
        pred = model(x)
        loss = loss_fn(y, pred)  #loss 값 구하기
        
    gradients = tape.gradient(loss, model.trainable_variables)   # gradients값 구하기
    opt.apply_gradients(zip(gradients, model.trainable_variables))   # 원하는 opt방식으로 gradients 적용
    
    train_loss(loss)
    train_accuracy(y, pred)

In [19]:
# 실제 학습

batch_size = 64

for epoch in range(1):
    
    for i in range(train_x.shape[0] // batch_size):
        idx = i * batch_size
        x, y = train_x[idx:idx+batch_size], train_y[idx:idx+batch_size]
        train_step(x, y)
        print("\r {} / {}".format(i, train_x.shape[0] // batch_size), end='\r')  # end='\r' : 출력을 덮어쓰기로 한다.
    
    fmt = 'epoch {} loss: {}, accuracy: {}'
    print(fmt.format(epoch+1, 
                          train_loss.result(),
                          train_accuracy.result() * 100)
         )
    train_loss.reset_states()  # 모든 epoch의 loss, acc값이 누적되지 않게 하기 위함 
    train_accuracy.reset_states()  # reset 안하면 최종 loss, acc는 모든 epoch의 loss의 평균값이다.

epoch 1 loss: 2.3527259826660156, accuracy: 9.917173385620117


---

In [None]:
@tf.function
def train_step(x, y) :
    with tf.GradientTape() as tape:
        pred = model(x)
        loss = loss_fn(y, pred)  #loss 값 구하기
        
    gradients = tape.gradient(loss, model.trainable_variables)   # gradients값 구하기
    opt.apply_gradients(zip(gradients, model.trainable_variables))   # 원하는 opt방식으로 gradients 적용
    
    train_loss(loss)
    train_accuracy(y, pred)

- tensorflow에서 @tf.function 을 사용하면 함수를 사용하는 속도가 빨라진다.
- GPU를 사용하는 경우에만 눈에 띄는 속도차이를 볼 수 있다.