# 심층 신경망 - 모델 세부 설정-초기값, 규제, Dropout-MNIST

In [6]:
import tensorflow as tf
from tensorflow import keras

mnist = keras.datasets.mnist

# 데이터셋을 로드
(x_train,y_train) , (x_test, y_test) = mnist.load_data()

# # 로드된 데이터셋 확인
print('train set: ', x_train.shape, y_train.shape)
print('test  set: ', x_test.shape, y_test.shape)

# # 데이터 정규화


train set:  (60000, 28, 28) (60000,)
test  set:  (10000, 28, 28) (10000,)


## 초기값 설정

In [7]:
from keras.layers import Dense
dense = Dense(256,activation='relu')
dense.get_config()

{'name': 'dense',
 'trainable': True,
 'dtype': 'float32',
 'units': 256,
 'activation': 'relu',
 'use_bias': True,
 'kernel_initializer': {'class_name': 'GlorotUniform',
  'config': {'seed': None}},
 'bias_initializer': {'class_name': 'Zeros', 'config': {}},
 'kernel_regularizer': None,
 'bias_regularizer': None,
 'activity_regularizer': None,
 'kernel_constraint': None,
 'bias_constraint': None}

In [8]:
# he normal 초기화
dense = Dense(256,kernel_initializer='he_normal',activation='relu')

In [None]:
# 클래스 인스턴스 초기화


In [None]:
print("https://www.tensorflow.org/api_docs/python/tf/keras/initializers")

## 규제

In [9]:
# 기본 값
dense.get_config()

{'name': 'dense_1',
 'trainable': True,
 'dtype': 'float32',
 'units': 256,
 'activation': 'relu',
 'use_bias': True,
 'kernel_initializer': {'class_name': 'HeNormal', 'config': {'seed': None}},
 'bias_initializer': {'class_name': 'Zeros', 'config': {}},
 'kernel_regularizer': None,
 'bias_regularizer': None,
 'activity_regularizer': None,
 'kernel_constraint': None,
 'bias_constraint': None}

In [13]:
# l1 규제 적용
dense = Dense(256,kernel_regularizer='l1',activation='relu')
print(dense.get_config())

# 클래스 인스턴스 적용, alpha 값 변경
regularizer = tf.keras.regularizers.l1(l1=0.1)
dense = Dense(256, kernel_regularizer=regularizer,activation='relu')
dense.get_config()

{'name': 'dense_6', 'trainable': True, 'dtype': 'float32', 'units': 256, 'activation': 'relu', 'use_bias': True, 'kernel_initializer': {'class_name': 'GlorotUniform', 'config': {'seed': None}}, 'bias_initializer': {'class_name': 'Zeros', 'config': {}}, 'kernel_regularizer': {'class_name': 'L1', 'config': {'l1': 0.009999999776482582}}, 'bias_regularizer': None, 'activity_regularizer': None, 'kernel_constraint': None, 'bias_constraint': None}


{'name': 'dense_7',
 'trainable': True,
 'dtype': 'float32',
 'units': 256,
 'activation': 'relu',
 'use_bias': True,
 'kernel_initializer': {'class_name': 'GlorotUniform',
  'config': {'seed': None}},
 'bias_initializer': {'class_name': 'Zeros', 'config': {}},
 'kernel_regularizer': {'class_name': 'L1',
  'config': {'l1': 0.10000000149011612}},
 'bias_regularizer': None,
 'activity_regularizer': None,
 'kernel_constraint': None,
 'bias_constraint': None}

## 드랍아웃

In [17]:
# Dropout 25% 비율 적용 (25%의 노드가 삭제)
from keras.layers import Dropout

import numpy as np


In [22]:
# DropOut 예제
data = np.arange(1,11).reshape(2,5).astype(np.float32)
layer = keras.layers.Dropout(0.3,input_shape=(2,))
output = layer(data,training=True)
print(output)

tf.Tensor(
[[ 0.         2.857143   4.285714   5.714286   7.1428576]
 [ 8.571428   0.        11.428572  12.857143  14.285715 ]], shape=(2, 5), dtype=float32)


##  배치 정규화

In [24]:
# Model A: Dense + ReLU

model_a = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28)),
    tf.keras.layers.Dense(64,activation='relu'),
    tf.keras.layers.Dense(32,activation='relu'),
    tf.keras.layers.Dense(10,activation='softmax')
])

model_a.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense_8 (Dense)             (None, 64)                50240     
                                                                 
 dense_9 (Dense)             (None, 32)                2080      
                                                                 
 dense_10 (Dense)            (None, 10)                330       
                                                                 
Total params: 52,650
Trainable params: 52,650
Non-trainable params: 0
_________________________________________________________________


In [25]:
# Model B: Dense + BatchNorm + ReLU
model_b = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28)),

    tf.keras.layers.Dense(64),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),


    tf.keras.layers.Dense(32),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),

    tf.keras.layers.Dense(10,activation='softmax')
])

model_b.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 784)               0         
                                                                 
 dense_11 (Dense)            (None, 64)                50240     
                                                                 
 batch_normalization (BatchN  (None, 64)               256       
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 64)                0         
                                                                 
 dense_12 (Dense)            (None, 32)                2080      
                                                                 
 batch_normalization_1 (Batc  (None, 32)               128       
 hNormalization)                                      

## 활성화 함수: relu 이외에 Keras가 지원하는 다른 활성화 함수 사용 가능

In [None]:
# LeakyReLU 기본 설정


# LeakyReLU, alpha=0.2 로 변경


In [None]:
# Model C: Dense + BatchNorm + LeakyReLU(0.2)

# 모델 요약


In [None]:

# Model A: Dense + ReLU

# Model B: Dense + BatchNorm + ReLU

# Model C: Dense + BatchNorm + LeakyReLU(0.2)


In [27]:
# 컴파일
model_a.compile(loss='sparse_categorical_crossentropy', metrics='accuracy', optimizer='adam')
model_b.compile(loss='sparse_categorical_crossentropy', metrics='accuracy', optimizer='adam')
# 학습
hist_a = model_a.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=10)
hist_b = model_b.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [32]:
import pandas as pd
print(pd.DataFrame(hist_a.history))
print('*'*45)
print(pd.DataFrame(hist_b.history))

       loss  accuracy  val_loss  val_accuracy
0  1.490590  0.676100  0.691864        0.8118
1  0.544237  0.858083  0.419000        0.9020
2  0.354699  0.912333  0.313420        0.9232
3  0.257143  0.935550  0.249124        0.9384
4  0.204108  0.949000  0.232952        0.9431
5  0.170465  0.955467  0.182710        0.9539
6  0.144280  0.962067  0.174134        0.9583
7  0.133009  0.964800  0.153957        0.9610
8  0.118928  0.968283  0.152994        0.9651
9  0.112073  0.970767  0.174087        0.9631
*********************************************
       loss  accuracy  val_loss  val_accuracy
0  0.312653  0.912850  0.132285        0.9579
1  0.142181  0.956883  0.101134        0.9695
2  0.107725  0.967317  0.084828        0.9731
3  0.092879  0.970650  0.083130        0.9727
4  0.081754  0.974383  0.085274        0.9750
5  0.072978  0.976567  0.081594        0.9759
6  0.067088  0.977933  0.074510        0.9773
7  0.063026  0.979483  0.076083        0.9773
8  0.054943  0.982050  0.073859   