# ResNet 학습(CIFAR-10 dataset)

ResNet은 2015년에 나온 네트워크로 residual mapping을 사용하는 residual block 사용합니다.   
네트워크 깊이가 깊지만 성능이 좋다고 하여 CIFAR-10 dataset을 이용하여 정확도를 알아볼 것입니다.

학습에 사용할 CIFAR-10 dataset을 가져옵니다.   
dataset download url : https://www.cs.toronto.edu/~kriz/cifar.html

In [None]:
import numpy as np

# dataset dir
cifar_dir = './cifar-10-batches-py/'
cifar_file = ['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5', 'test_batch']

# data 가져오기
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict
dataset = [unpickle(cifar_dir + file) for file in cifar_file]

# data 분리
X_train_tmp = []
y_train = []
for ndata in dataset[:-1]:
    X_train_tmp.extend(ndata[b'data'].tolist())
    y_train.extend(ndata[b'labels'])
X_test_tmp = dataset[-1][b'data']
y_test = dataset[-1][b'labels']

# image data 전처리
pixel = 1024
X_train = []
for i in range(len(X_train_tmp)):
    tmp = [X_train_tmp[i][:pixel], X_train_tmp[i][pixel:2*pixel], X_train_tmp[i][2*pixel:]]
    tmp = np.array(tmp)
    tmp = tmp.T.tolist()
    trans = []
    for j in range(0, len(tmp), 32):
        trans.append(tmp[j:j+32])
    X_train.append(trans)
X_train = np.array(X_train)
X_test = []
for i in range(len(X_test_tmp)):
    tmp = [X_test_tmp[i][:pixel], X_test_tmp[i][pixel:2*pixel], X_test_tmp[i][2*pixel:]]
    tmp = np.array(tmp)
    tmp = tmp.T.tolist()
    trans = []
    for j in range(0, len(tmp), 32):
        trans.append(tmp[j:j+32])
    X_test.append(trans)
X_test = np.array(X_test)

X_train = X_train/255.0
X_test = X_test/255.0

# label data 전처리
y_train = [[_] for _ in y_train]
y_train = np.array(y_train)

y_test = [[_] for _ in y_test]
y_test = np.array(y_test)

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(50000, 32, 32, 3) (50000, 1) (10000, 32, 32, 3) (10000, 1)


ResNet의 residual block을 간단하게 나타내면 input에 대해,   

    [input] -> (Conv) -> [c1]   
    [c1] -> (Conv) -> [c2]   
    [input] + [c2] -> [output]   
    
이러한 구조를 가집니다.

CIFAR-10은 32x32의 이미지 크기를 가지는데 반해 모델이 너무 크기 때문에 줄여서 구현 및 학습을 진행합니다.

In [4]:
from keras import models, layers

# 입력층
input_layer = layers.Input(shape=(32, 32, 3))
res = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(input_layer)

# 합성곱층
conv1 = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(res)
conv2 = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(conv1)
res = layers.Add()([res, conv2])

conv3 = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(res)
conv4 = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(conv3)
res = layers.Add()([res, conv4])

conv5 = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(res)
conv6 = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(conv5)
res = layers.Add()([res, conv6])

conv7 = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(res)
conv8 = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(conv7)

conv9 = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(conv8)
conv10 = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(conv9)
res = layers.Add()([conv8, conv10])

conv11 = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(res)
conv12 = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(conv11)
res = layers.Add()([res, conv12])

maxpool1 = layers.MaxPooling2D(2)(res)
ft1 = layers.Flatten()(maxpool1)
dense1 = layers.Dense(128, activation='relu')(ft1)
dense2 = layers.Dense(64, activation='relu')(dense1)
dense3 = layers.Dense(10, activation='softmax')(dense2)
output_layer = dense3


model = models.Model(input_layer, output_layer)
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 32, 32, 3)]  0           []                               
                                                                                                  
 conv2d_13 (Conv2D)             (None, 32, 32, 64)   1792        ['input_2[0][0]']                
                                                                                                  
 conv2d_14 (Conv2D)             (None, 32, 32, 64)   36928       ['conv2d_13[0][0]']              
                                                                                                  
 conv2d_15 (Conv2D)             (None, 32, 32, 64)   36928       ['conv2d_14[0][0]']              
                                                                                            

In [5]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

hist = model.fit(X_train, y_train, epochs=10)
model.save('./model/ResNet_cifar10_model.h5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  layer_config = serialize_layer_fn(layer)


In [6]:
model.evaluate(X_test, y_test)



[1.4768956899642944, 0.6388000249862671]

# Pooling layer 추가

이번에는 pooling layer를 하나 더 추가를 해 파라미터 수를 줄여 학습을 진행해보겠습니다.

In [10]:
from keras import models, layers

# 입력층
input_layer = layers.Input(shape=(32, 32, 3))
res = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(input_layer)

# 합성곱층
conv1 = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(res)
conv2 = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(conv1)
res = layers.Add()([res, conv2])

conv3 = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(res)
conv4 = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(conv3)
res = layers.Add()([res, conv4])

conv5 = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(res)
conv6 = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(conv5)
res = layers.Add()([res, conv6])

conv7 = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(res)
conv8 = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(conv7)

conv9 = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(conv8)
conv10 = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(conv9)
res = layers.Add()([conv8, conv10])
maxpool1 = layers.MaxPooling2D(2)(res)

conv11 = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(maxpool1)
conv12 = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(conv11)
res = layers.Add()([maxpool1, conv12])

maxpool2 = layers.MaxPooling2D(2)(res)
ft1 = layers.Flatten()(maxpool2)
dense1 = layers.Dense(128, activation='relu')(ft1)
dense2 = layers.Dense(64, activation='relu')(dense1)
dense3 = layers.Dense(10, activation='softmax')(dense2)
output_layer = dense3


model = models.Model(input_layer, output_layer)
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_6 (InputLayer)           [(None, 32, 32, 3)]  0           []                               
                                                                                                  
 conv2d_61 (Conv2D)             (None, 32, 32, 64)   1792        ['input_6[0][0]']                
                                                                                                  
 conv2d_62 (Conv2D)             (None, 32, 32, 64)   36928       ['conv2d_61[0][0]']              
                                                                                                  
 conv2d_63 (Conv2D)             (None, 32, 32, 64)   36928       ['conv2d_62[0][0]']              
                                                                                            

layer 하나만 추가했는데 520만 -> 200만 정도로 파라미터가 줄은 것을 볼 수 있습니다.

In [11]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

hist = model.fit(X_train, y_train, epochs=10)
model.save('./model/ResNet_pool_cifar10_model.h5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  layer_config = serialize_layer_fn(layer)


In [12]:
model.evaluate(X_test, y_test)



[0.9882370233535767, 0.7307000160217285]

# Bottlenect 구조

bottleneck 구조를 이용해보겠습니다. bottleneck 구조는 resnet 중에 50 layer이상 가지는 네트워크에 사용되는 구조입니다. 저는 단순히 적용만 해서 결과가 어떻게 나오는지 보겠습니다. 다만 ResNet50이상에서의 bottleneck 구조가 64->64->128 처럼 마지막 conv layer의 filter수가 2배가 되지만 저는 그대로 유지하도록 구성하였습니다. bottlenect 구조의 사용은 파라미터 수를 줄이고 연산을 줄이는 방법입니다.

In [13]:
from keras import models, layers

# 입력층
input_layer = layers.Input(shape=(32, 32, 3))
res = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(input_layer)

# 합성곱층
conv1 = layers.Conv2D(64, (1, 1), padding='same', activation='relu')(res)
conv2 = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(conv1)
conv3 = layers.Conv2D(64, (1, 1), padding='same', activation='relu')(conv2)
res = layers.Add()([res, conv3])

conv4 = layers.Conv2D(64, (1, 1), padding='same', activation='relu')(res)
conv5 = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(conv4)
conv6 = layers.Conv2D(64, (1, 1), padding='same', activation='relu')(conv5)
res = layers.Add()([res, conv6])

conv7 = layers.Conv2D(64, (1, 1), padding='same', activation='relu')(res)
conv8 = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(conv7)
conv9 = layers.Conv2D(64, (1, 1), padding='same', activation='relu')(conv8)
res = layers.Add()([res, conv9])

conv10 = layers.Conv2D(128, (1, 1), padding='same', activation='relu')(res)
conv11 = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(conv10)
conv12 = layers.Conv2D(128, (1, 1), padding='same', activation='relu')(conv11)

conv13 = layers.Conv2D(128, (1, 1), padding='same', activation='relu')(conv12)
conv14 = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(conv13)
conv15 = layers.Conv2D(128, (1, 1), padding='same', activation='relu')(conv14)
res = layers.Add()([conv12, conv15])
maxpool1 = layers.MaxPooling2D(2)(res)

conv16 = layers.Conv2D(128, (1, 1), padding='same', activation='relu')(maxpool1)
conv17 = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(conv16)
conv18 = layers.Conv2D(128, (1, 1), padding='same', activation='relu')(conv17)
res = layers.Add()([maxpool1, conv18])

maxpool2 = layers.MaxPooling2D(2)(res)
ft1 = layers.Flatten()(maxpool2)
dense1 = layers.Dense(128, activation='relu')(ft1)
dense2 = layers.Dense(64, activation='relu')(dense1)
dense3 = layers.Dense(10, activation='softmax')(dense2)
output_layer = dense3


model = models.Model(input_layer, output_layer)
model.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_7 (InputLayer)           [(None, 32, 32, 3)]  0           []                               
                                                                                                  
 conv2d_74 (Conv2D)             (None, 32, 32, 64)   1792        ['input_7[0][0]']                
                                                                                                  
 conv2d_75 (Conv2D)             (None, 32, 32, 64)   4160        ['conv2d_74[0][0]']              
                                                                                                  
 conv2d_76 (Conv2D)             (None, 32, 32, 64)   36928       ['conv2d_75[0][0]']              
                                                                                            

bottlenect구조를 적용하니 파라미터 수는 약 30만이 줄었습니다.

In [14]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

hist = model.fit(X_train, y_train, epochs=10)
model.save('./model/ResNet_pool_bottle_cifar10_model.h5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  layer_config = serialize_layer_fn(layer)


In [15]:
model.evaluate(X_test, y_test)



[1.0771136283874512, 0.6965000033378601]

# 결론

간단하게 ResNet을 구현하여 CIFAR-10 dataset을 이용하여 학습을 진행해보았습니다.   
기본 모델, pooling layer을 추가한 모델, bottlenect 구조를 가진 모델 이렇게 3가지의 모델을 학습해보았습니다.   
각 모델의 정확도와 손실율을 보면,

In [3]:
import keras

resbasic = keras.models.load_model('./model/ResNet_cifar10_model.h5')
respool = keras.models.load_model('./model/ResNet_pool_cifar10_model.h5')
resbottle = keras.models.load_model('./model/ResNet_pool_bottle_cifar10_model.h5')

resbasic_loss, resbasic_acc = resbasic.evaluate(X_test, y_test)
respool_loss, respool_acc = respool.evaluate(X_test, y_test)
resbottle_loss, resbottle_acc = resbottle.evaluate(X_test, y_test)

print('per model accuracy : ', resbasic_acc, respool_acc, resbottle_acc)
print('per model loss : ', resbasic_loss, respool_loss, resbottle_loss)

per model accuracy :  0.6388000249862671 0.7307999730110168 0.6965000033378601
per model loss :  1.476896047592163 0.9882370829582214 1.0771136283874512


pooling layer를 하나만 추가했지만 기본 모델과 비교했을때 정확도가 10%정도 증가하였습니다. 손실율 또한 많이 줄었습니다.   
bottlenect 구조는 pooling layer가 추가한 모델에 구성하였지만 정확도는 오히려 떨어진 것을 확인할 수 있습니다.

ResNet이 깊은 모델이기 때문에 학습 시간이 오래 걸려 epoch를 10번으로 하였습니다. 그리고 학습에 이용한 dataset이 작은 이미지이기 때문에 모델을 기존 ResNet보다 얇게 구성하였습니다. 그러다보니 깊은 ResNet모델에서 사용하는 bottlenect같은 경우 적용을 하여도 오히려 정확도가 낮아지는 것을 확인하였습니다. bottlenect에 관해 정확히 알기 위해서는 이후에 깊은 모델을 구성하여 bottlenect 구조의 유무에 따른 결과를 다시 한번 확인해보도록 하겠습니다.