<a href="https://colab.research.google.com/github/neSTORY/Deeplearning_Zero_to_All/blob/master/best_cnn_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

그 동안 배웠던 방법들을 통해 최적의 model을 만들어 보자.

<br>

[활용할 방법들]
- Data Augmentation
- Batch Normalization
- Model Ensemble
- Learning Rate Decay

In [1]:
# import
import numpy as np
import matplotlib.pyplot as plt
import os

from scipy import ndimage # for data augmentation

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical

In [2]:
# set hyperparameter
lr = 1e-3
epochs = 10
batch_size = 100

In [3]:
# Data Augmentation
# 나중에 keras의 데이터증강방법을 이용해보자
def data_augmentaion(images, labels):
  aug_images = []
  aug_labels = []

  for x, y in zip(images, labels):
    aug_images.append(x)
    aug_labels.append(y)

    bg_value = np.median(x)
    # augmentation할 때 shift, rotation을 하면 빈 공간이 생기는데
    # 이때 빈 공간을 채워주기 위해 중위수를 구해놓음

    for _ in range(4): # 4번 데이터 증강 6만 -> 30만
      angle = np.random.randint(-15, 15, 1)
      rot_img = ndimage.rotate(x, angle, reshape=False, cval=bg_value)

      shift = np.random.randint(-2,2,2)
      shift_img = ndimage.shift(rot_img, shift, cval=bg_value)

    aug_images = np.array(aug_images)
    aug_labels.append(y)
    
    return aug_images, aug_labels

\* 주의 \*

> ndimage.rotate(x, angle, reshape=False, cval=bgvalue)

여기서 수업에서 정의한 angle은 numpy 배열로 나온다.

그런데 rotate 파라미터 angle의 인자값은 int,float형태로 들어와야해서 이걸 벗겨줘야한다!!

angle[0]으로 진행했음

In [4]:
# dataset
(train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()

train_images = train_images / 255.0
test_images = test_images / 255.0

train_images = np.expand_dims(train_images, axis=-1)
test_images = np.expand_dims(test_images, axis=-1)

train_labels = to_categorical(train_labels, 10)
test_labels = to_categorical(test_labels, 10)

train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).\
                                                  shuffle(buffer_size=500000).\
                                                  batch(batch_size)

test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).\
                                                  batch(batch_size)

In [5]:
from keras.layers import Conv2D, BatchNormalization, ReLU, MaxPool2D, Dense,Flatten, Dropout

In [6]:
# Build a Neural Network Model

class ConvBNRelu(tf.keras.Model):
    def __init__(self, filters, kernel_size=3, strides=1, padding='SAME'):
        super(ConvBNRelu, self).__init__()
        self.conv = keras.layers.Conv2D(filters=filters, kernel_size=kernel_size, strides=strides, 
                                        padding=padding, kernel_initializer='glorot_normal')
        self.batchnorm = tf.keras.layers.BatchNormalization()
    def call(self, inputs, training=False):
        layer = self.conv(inputs)
        layer = self.batchnorm(layer)
        layer = tf.nn.relu(layer)
        return layer   

class DenseBNRelu(tf.keras.Model):
    def __init__(self, units):
        super(DenseBNRelu, self).__init__()
        self.dense = keras.layers.Dense(units=units, kernel_initializer='glorot_normal')
        self.batchnorm = tf.keras.layers.BatchNormalization()
    def call(self, inputs, training=False):
        layer = self.dense(inputs)
        layer = self.batchnorm(layer)
        layer = tf.nn.relu(layer)
        return layer

class MNISTModel(tf.keras.Model):
    def __init__(self):
        super(MNISTModel, self).__init__()
        self.conv1 = ConvBNRelu(filters=32, kernel_size=[3, 3], padding='SAME')        
        self.pool1 = keras.layers.MaxPool2D(padding='SAME')
        self.conv2 = ConvBNRelu(filters=64, kernel_size=[3, 3], padding='SAME')
        self.pool2 = keras.layers.MaxPool2D(padding='SAME')
        self.conv3 = ConvBNRelu(filters=128, kernel_size=[3, 3], padding='SAME')
        self.pool3 = keras.layers.MaxPool2D(padding='SAME')
        self.pool3_flat = keras.layers.Flatten()
        self.dense4 = DenseBNRelu(units=256)
        self.drop4 = keras.layers.Dropout(rate=0.4)
        self.dense5 = keras.layers.Dense(units=10, kernel_initializer='glorot_normal')
    def call(self, inputs, training=False):
        net = self.conv1(inputs)        
        net = self.pool1(net)
        net = self.conv2(net)
        net = self.pool2(net)
        net = self.conv3(net)
        net = self.pool3(net)
        net = self.pool3_flat(net)
        net = self.dense4(net)
        net = self.drop4(net)
        net = self.dense5(net)
        return net

In [7]:
models = []
num_models = 5

for m in range(num_models):
  models.append(MNISTModel())

def loss_fn(model, images, labels):
  logit = model(images, training=False)
  loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=labels))

  return loss

def grad(model, iamges, labels):
  with tf.GradientTape() as tape:
    loss = loss_fn(model, images, labels)
  return tape.gradient(loss, model.trainable_variables)

In [8]:
train_images.shape

(60000, 28, 28, 1)

In [9]:
train_images.shape[0]/batch_size*num_models*5

15000.0

In [10]:
lr_decay = tf.keras.optimizers.schedules.ExponentialDecay(lr,
                                                          train_images.shape[0]/batch_size*num_models*5,
                                                          0.5,
                                                          staircase=True) # 5epoch을 의미함. staircase는 5epoch 이후 lr을 0.5배로 줄이는 것
optimizer = keras.optimizers.Adam(lr_decay)

def evaluate(models, images, labels):
  predictions = tf.zeros_like(labels)
  for model in models:
    logits = model(images, training=False)
    predictions += logits

  correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(labels,1))
  accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

  return accuracy

In [11]:
import time

In [13]:
for epoch in range(1,10):
  avg_loss, train_acc, test_acc, train_step, test_step = 0.,0.,0.,0,0
  start_time = time.time()

  print("[*]{}Epoch is started...".format(epoch))

  for images, labels in train_dataset:
    for model in models:
      grads = grad(model, images, labels)
      optimizer.apply_gradients(zip(grads, model.trainable_variables))
      loss = loss_fn(model, images, labels)
      avg_loss += loss / num_models
    
    acc = evaluate(models, images, labels)
    train_acc += acc
    train_step += 1

  avg_loss = avg_loss / train_step
  train_acc = train_acc / train_step

  for images, labels in test_dataset:
    acc = evaluate(models, images, labels)
    test_acc += acc
    test_step +=1

  test_acc = test_acc / test_step
  end_time = time.time()
  total_time = end_time - start_time

  print("[{}]Epochs, Loss : {:4.4f}, Time : {:4.1f} Train Accuracy : {:6.4f}, Test Accuracy : {:6.4f}".\
        format(epoch, avg_loss, total_time, train_acc, test_acc))

[*]1Epoch is started...
[1]Epochs, Loss : 0.1207, Time : 89.6 Train Accuracy : 0.9675, Test Accuracy : 0.9897
[*]2Epoch is started...
[2]Epochs, Loss : 0.0258, Time : 88.5 Train Accuracy : 0.9937, Test Accuracy : 0.9922
[*]3Epoch is started...
[3]Epochs, Loss : 0.0153, Time : 88.1 Train Accuracy : 0.9968, Test Accuracy : 0.9927
[*]4Epoch is started...
[4]Epochs, Loss : 0.0107, Time : 87.9 Train Accuracy : 0.9981, Test Accuracy : 0.9931
[*]5Epoch is started...
[5]Epochs, Loss : 0.0067, Time : 88.0 Train Accuracy : 0.9990, Test Accuracy : 0.9943
[*]6Epoch is started...
[6]Epochs, Loss : 0.0034, Time : 87.8 Train Accuracy : 0.9995, Test Accuracy : 0.9947
[*]7Epoch is started...
[7]Epochs, Loss : 0.0016, Time : 87.9 Train Accuracy : 0.9998, Test Accuracy : 0.9947
[*]8Epoch is started...
[8]Epochs, Loss : 0.0014, Time : 88.0 Train Accuracy : 0.9998, Test Accuracy : 0.9950
[*]9Epoch is started...
[9]Epochs, Loss : 0.0014, Time : 87.9 Train Accuracy : 0.9998, Test Accuracy : 0.9948


model.variables -> model.trainable_variables로 변경하니 warning이 사라짐