# CIFAR-10

In [2]:
# Helper libraries
import numpy as np
import matplotlib.pyplot as plt

import tensorflow_datasets as tfds

# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers



In [3]:
# Tensorflow가 활용할 GPU가 장착되어 있는지 확인
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

# ResNet block

In [4]:
# cnn block 함수
def conv_block(input_layer, channel, kernel_size, padding='same', strides=1, activation='relu'):
        x = keras.layers.Conv2D(filters=channel,
                                kernel_size=kernel_size,
                                kernel_initializer='he_normal',
                                kernel_regularizer=keras.regularizers.l2(1e-4),
                                padding='same',
                                strides=strides)(input_layer)
        x = keras.layers.BatchNormalization()(x)
        if activation:
            x = keras.layers.Activation(activation)(x)
    
        return x

In [5]:
#residual block 함수

def build_residual_block(input_layer, num_cnn=3, is_50=True, channel=64,block_num=0 ):
    
    x = input_layer
    
    #resnet-50
    
    if is_50:
        for i in range(num_cnn):
            if i ==0:
                shortcut = conv_block(x,channel*4,(1,1), strides=2, activation=None) #shortcut은 gradient가 잘 흐를 수 있도록 도와 줌
                x = conv_block(x, channel, (1,1), strides=2)
                x = conv_block(x, channel, (3,3))
                x = conv_block(x, channel*4, (1,1), activation =None)
            else:
                shortcut = x
                x = conv_block(x,channel,(1,1))
                x = conv_block(x,channel,(3,3))
                x = conv_block(x, channel*4, (1,1), activation =None)
        
            x = keras.layers.Add()([x, shortcut])
            x = keras.layers.Activation('relu')(x)
    
    
    #resnet-34
    
    else:
        for i in range(num_cnn):
            if block_num > 0 and i ==0:
                shortcut = conv_block(x, channel, (1,1), strides=2, activation=None)
                x = conv_block(x, channel, (3,3), strides=2)
                x = conv_block(x, channel, (3,3), activation=None)
            else:
                shortcut = x
                x = conv_block(x, channel, (3,3))
                x = conv_block(x, channel, (3,3), activation=None)
            
            x = keras.layers.Add()([x, shortcut])
            x = keras.layers.Activation('relu')(x)

    return x

# ResNet-34, ResNet-50 Complete Model

In [6]:
def build_resnet(num_cnn_list=[3,4,6,3], 
                 channel_list=[64,128,256,512],
                 input_shape=(32,32,3),
                 num_classes=10,
                 name='ResNet_50',
                 is_50=True,
                 activation='softmax'):
    #모델을 만들기 전에 config list들이 같은 길이인지 확인
    assert len(num_cnn_list) == len(channel_list)
    
    #input layer
    input_layer = keras.layers.Input(shape=input_shape)
    
    #first layer
    x = conv_block(input_layer, 64, (7,7), strides =2)
    x = keras.layers.MaxPool2D(pool_size=(2,2), strides =2)(x)
    
    # Residual block(config list들의 길이만큼 반복해서 블록을 생성)
    for block_num, (num_cnn, channel) in enumerate(zip(num_cnn_list, channel_list)):
        x = build_residual_block(x, 
                               num_cnn=num_cnn, 
                               channel=channel, 
                               block_num=block_num, 
                               is_50=is_50)
        
    x = keras.layers.GlobalAveragePooling2D()(x) #전역 영역의 평균값을 계산
    x = keras.layers.Dense(num_classes, 
                           activation=activation, 
                           kernel_initializer='he_normal'
                          )(x)
    
    model = keras.Model(inputs=input_layer, outputs=x, name=name)
    
    return model


In [7]:
resnet_34 = build_resnet(input_shape=(32,32,3), is_50=False)
resnet_34.summary()

Model: "ResNet_50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 16, 16, 64)   9472        input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 16, 16, 64)   256         conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 16, 16, 64)   0           batch_normalization[0][0]        
__________________________________________________________________________________________

In [8]:
resnet_50 = build_resnet(input_shape=(32,32,3), is_50=True)
resnet_50.summary()

Model: "ResNet_50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
conv2d_36 (Conv2D)              (None, 16, 16, 64)   9472        input_2[0][0]                    
__________________________________________________________________________________________________
batch_normalization_36 (BatchNo (None, 16, 16, 64)   256         conv2d_36[0][0]                  
__________________________________________________________________________________________________
activation_33 (Activation)      (None, 16, 16, 64)   0           batch_normalization_36[0][0]     
__________________________________________________________________________________________

# Plain network

In [9]:
#plain block 함수

def build_plain_block(input_layer, 
                      num_cnn=3, 
                      channel=64, 
                      block_num=0, 
                      is_50=True):
    
    x = input_layer
    
    # plain-50
    if is_50:
        for i in range(num_cnn):
            if block_num > 0 and i == 0:
                x = conv_block(x, channel, (1,1), strides=2)
                x = conv_block(x, channel, (3,3))
                x = conv_block(x, channel*4, (1,1))
            else:
                x = conv_block(x, channel, (1,1))
                x = conv_block(x, channel, (3,3))
                x = conv_block(x, channel*4, (1,1))
    
    # plain-34
    else:
        for i in range(num_cnn):
            if block_num > 0 and i == 0:
                x = conv_block(x, channel, (3,3), strides=2)
                x = conv_block(x, channel, (3,3))
            else:
                x = conv_block(x, channel, (3,3))
                x = conv_block(x, channel, (3,3))

    return x

In [10]:
def build_plain(num_cnn_list=[3,4,6,3], 
                   channel_list=[64,128,256,512],
                   input_shape=(32,32,3),
                   num_classes=10,
                   name='Plain_50',
                   is_50=True,
                   activation='softmax'):
    #모델을 만들기 전에 config list들이 같은 길이인지 확인
    assert len(num_cnn_list) == len(channel_list)
    
    input_layer = keras.layers.Input(shape=input_shape, name='Input')
    
   
    #first layer
    x = conv_block(input_layer, 64, (7,7), strides =2)
    x = keras.layers.MaxPool2D(pool_size=(2,2), strides =2)(x)
    
    # Residual block(config list들의 길이만큼 반복해서 블록을 생성)
    for block_num, (num_cnn, channel) in enumerate(zip(num_cnn_list, channel_list)):
        x = build_plain_block(x, 
                              num_cnn=num_cnn, 
                              channel=channel, 
                              block_num=block_num, 
                              is_50=is_50)
        
    x = keras.layers.GlobalAveragePooling2D()(x) #전역 영역의 평균값을 계산
    x = keras.layers.Dense(num_classes, 
                           activation=activation, 
                           kernel_initializer='he_normal'
                          )(x)
    
    model = keras.Model(inputs=input_layer, outputs=x, name=name)
    
    return model



In [11]:
#PlainNet 34
plain_34 = build_plain(is_50=False,
                       input_shape=(224,224,3))
plain_34.summary()

Model: "Plain_50"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input (InputLayer)           [(None, 224, 224, 3)]     0         
_________________________________________________________________
conv2d_89 (Conv2D)           (None, 112, 112, 64)      9472      
_________________________________________________________________
batch_normalization_89 (Batc (None, 112, 112, 64)      256       
_________________________________________________________________
activation_82 (Activation)   (None, 112, 112, 64)      0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 56, 56, 64)        0         
_________________________________________________________________
conv2d_90 (Conv2D)           (None, 56, 56, 64)        36928     
_________________________________________________________________
batch_normalization_90 (Batc (None, 56, 56, 64)        256

In [12]:
#PlainNet50
plain_50 = build_plain(is_50=True,
                       input_shape=(224,224,3)
                       )
plain_50.summary()

Model: "Plain_50"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input (InputLayer)           [(None, 224, 224, 3)]     0         
_________________________________________________________________
conv2d_122 (Conv2D)          (None, 112, 112, 64)      9472      
_________________________________________________________________
batch_normalization_122 (Bat (None, 112, 112, 64)      256       
_________________________________________________________________
activation_115 (Activation)  (None, 112, 112, 64)      0         
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 56, 56, 64)        0         
_________________________________________________________________
conv2d_123 (Conv2D)          (None, 56, 56, 64)        4160      
_________________________________________________________________
batch_normalization_123 (Bat (None, 56, 56, 64)        256

# Dataset

In [13]:
#Dataset
import urllib3
urllib3.disable_warnings()

#tfds.disable_progress_bar()   # 이 주석을 풀면 데이터셋 다운로드과정의 프로그레스바가 나타나지 않습니다.

(ds_train, ds_test), ds_info = tfds.load(
    'cats_vs_dogs',
    split=['train[:80%]', 'train[80%:]'],
    as_supervised=True,
    shuffle_files=True,
    with_info=True,
)

[1mDownloading and preparing dataset 786.68 MiB (download: 786.68 MiB, generated: Unknown size, total: 786.68 MiB) to /aiffel/tensorflow_datasets/cats_vs_dogs/4.0.0...[0m


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]





Generating splits...:   0%|          | 0/1 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/23262 [00:00<?, ? examples/s]



Shuffling cats_vs_dogs-train.tfrecord...:   0%|          | 0/23262 [00:00<?, ? examples/s]

[1mDataset cats_vs_dogs downloaded and prepared to /aiffel/tensorflow_datasets/cats_vs_dogs/4.0.0. Subsequent calls will reuse this data.[0m


In [14]:
print(ds_info.features)

FeaturesDict({
    'image': Image(shape=(None, None, 3), dtype=tf.uint8),
    'image/filename': Text(shape=(), dtype=tf.string),
    'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=2),
})


In [15]:
print(tf.data.experimental.cardinality(ds_train))
print(tf.data.experimental.cardinality(ds_test))

tf.Tensor(18610, shape=(), dtype=int64)
tf.Tensor(4652, shape=(), dtype=int64)


In [16]:
#Normalization
#머신러닝:scale이 큰 feature의 영향 커짐 방지 , 딥러닝:Local optimum에 빠질 위험 감소(학습 속도 향상)
def normalize_and_resize_img(image, label):
    image = tf.image.resize(image, (224,224)) # resize
    image = tf.cast(image, tf.float32) / 255.
    return image, label

In [17]:
def apply_normalize_on_dataset(ds, is_test=False, batch_size=16):
    ds = ds.map(
        normalize_and_resize_img, 
        num_parallel_calls=1
    )
    ds = ds.batch(batch_size)
    if not is_test:
        ds = ds.repeat()
        ds = ds.shuffle(200)
    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
    return ds

In [18]:
BATCH_SIZE = 32
EPOCH = 20

In [19]:
ds_train = apply_normalize_on_dataset(ds_train, batch_size=BATCH_SIZE)
ds_test = apply_normalize_on_dataset(ds_test, batch_size=BATCH_SIZE)

# ResNet-50 vs Plain-50 비교

In [20]:
#resnet_50 일 때 is_50=True 
resnet_50 = build_resnet(num_cnn_list = [3, 4, 6, 3],
                         channel_list=[64, 128, 256, 512],
                         is_50=True,
                         num_classes=1,
                         input_shape=(224,224,3),
                         name='ResNet_50')

In [21]:
resnet_50.compile(
    loss='binary_crossentropy',
    optimizer=tf.keras.optimizers.Adam(lr=0.01), #momentum=0.9
    metrics=['accuracy'],
)


history_resnet_50 = resnet_50.fit(
    ds_train,
    steps_per_epoch=int(ds_info.splits['train[:80%]'].num_examples/BATCH_SIZE),
    validation_steps=int(ds_info.splits['train[80%:]'].num_examples/BATCH_SIZE),
    epochs=EPOCH,
    validation_data=ds_test,
    verbose=1,
    use_multiprocessing=True,
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [24]:
plain_50 = build_plain(num_cnn_list = [3, 4, 6, 3],
                         channel_list=[64, 128, 256, 512],
                         is_50=True,
                         num_classes=1,
                         input_shape=(224,224,3),
                         name='Plain_50')

In [21]:
plain_50.compile(
    loss='binary_crossentropy',
    optimizer=tf.keras.optimizers.Adam(lr=0.01),
    metrics=['accuracy'],
)

history_plain_50 = plain_50.fit(
    ds_train,
    steps_per_epoch=int(ds_info.splits['train[:80%]'].num_examples/BATCH_SIZE),
    validation_steps=int(ds_info.splits['train[80%:]'].num_examples/BATCH_SIZE),
    epochs=EPOCH,
    validation_data=ds_test,
    verbose=1,
    use_multiprocessing=True,
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [28]:
#visualization
'''
import matplotlib.pyplot as plt

plt.figure(figsize=(15,5))
plt.subplot(1,2,1)
plt.plot(history_resnet_50.history['loss'],'b')
plt.plot(history_plain_50.history['loss'],'g')
plt.title('training loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['ResNet_50', 'Plain_50'], loc='upper right')

plt.subplot(1, 2, 2)
plt.plot(history_resnet_50.history['val_accuracy'], 'r')
plt.plot(history_plain_50.history['val_accuracy'], 'm')
plt.title('validation accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['ResNet_50', 'Plain_50'], loc='lower right')
plt.show()
'''

"\nimport matplotlib.pyplot as plt\n\nplt.figure(figsize=(15,5))\nplt.subplot(1,2,1)\nplt.plot(history_resnet_50.history['loss'],'b')\nplt.plot(history_plain_50.history['loss'],'g')\nplt.title('training loss')\nplt.ylabel('Loss')\nplt.xlabel('Epoch')\nplt.legend(['ResNet_50', 'Plain_50'], loc='upper right')\n\nplt.subplot(1, 2, 2)\nplt.plot(history_resnet_50.history['val_accuracy'], 'r')\nplt.plot(history_plain_50.history['val_accuracy'], 'm')\nplt.title('validation accuracy')\nplt.ylabel('Accuracy')\nplt.xlabel('Epoch')\nplt.legend(['ResNet_50', 'Plain_50'], loc='lower right')\nplt.show()\n"

# ResNet-34 vs Plain-34 비교

In [21]:
resnet_34 = build_resnet(num_cnn_list = [3, 4, 6, 3],
                         channel_list=[64, 128, 256, 512],
                         is_50=False,
                         num_classes=1,
                         input_shape=(224,224,3),
                         name='ResNet_34')

In [22]:
resnet_34.compile(
    loss='binary_crossentropy',
    optimizer=tf.keras.optimizers.Adam(lr=0.01),
    metrics=['accuracy'],
)

history_resnet_34 = resnet_34.fit(
    ds_train,
    steps_per_epoch=int(ds_info.splits['train[:80%]'].num_examples/BATCH_SIZE),
    validation_steps=int(ds_info.splits['train[80%:]'].num_examples/BATCH_SIZE),
    epochs=EPOCH,
    validation_data=ds_test,
    verbose=1,
    use_multiprocessing=True,

)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [23]:
plain_34 = build_plain(num_cnn_list = [3, 4, 6, 3],
                         channel_list=[64, 128, 256, 512],
                         is_50=False,
                         num_classes=1,
                         input_shape=(224,224,3),
                         name='Plain_34')

In [24]:
plain_34.compile(
    loss='binary_crossentropy',
    optimizer=tf.keras.optimizers.Adam(lr=0.01),
    metrics=['accuracy'],
)

history_plain_34 = plain_34.fit(
    ds_train,
    steps_per_epoch=int(ds_info.splits['train[:80%]'].num_examples/BATCH_SIZE),
    validation_steps=int(ds_info.splits['train[80%:]'].num_examples/BATCH_SIZE),
    epochs=EPOCH,
    validation_data=ds_test,
    verbose=1,
    use_multiprocessing=True,
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [27]:
#visualization
'''
plt.figure(figsize=(15,5))
plt.subplot(1,2,1)
plt.plot(history_resnet_34.history['loss'],'b')
plt.plot(history_plain_34.history['loss'],'g')
plt.title('training loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['ResNet_34', 'Plain_34'], loc='upper right')

plt.subplot(1, 2, 2)
plt.plot(history_resnet_34.history['val_accuracy'], 'r')
plt.plot(history_plain_34.history['val_accuracy'], 'm')
plt.title('validation accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['ResNet_34', 'Plain_34'], loc='lower right')
plt.show()
'''

"\nplt.figure(figsize=(15,5))\nplt.subplot(1,2,1)\nplt.plot(history_resnet_34.history['loss'],'b')\nplt.plot(history_plain_34.history['loss'],'g')\nplt.title('training loss')\nplt.ylabel('Loss')\nplt.xlabel('Epoch')\nplt.legend(['ResNet_34', 'Plain_34'], loc='upper right')\n\nplt.subplot(1, 2, 2)\nplt.plot(history_resnet_34.history['val_accuracy'], 'r')\nplt.plot(history_plain_34.history['val_accuracy'], 'm')\nplt.title('validation accuracy')\nplt.ylabel('Accuracy')\nplt.xlabel('Epoch')\nplt.legend(['ResNet_34', 'Plain_34'], loc='lower right')\nplt.show()\n"

# 결과 
resnet model과 plain을 비교 했을 때 resnet 이 더 나은 결과를 보이고 있다.
찾아보니 is_plain과 같이  skip connection(=shortcut)을 하는 법이 있다고 한다 다음에 이용해서 모델을 만들어 봐야겠다.
그리고 파라미터를 보는 중에 꼭 optimizer SGD를 사용해야 하는 가 궁금증이 들어서 찾다가 Adam이 SGD 보다 좋은 성능을 낼 수 있다는 것을 알았다.  
https://www.sciencedirect.com/science/article/pii/S2405959519303455#fig2
      
* SGD 사용시 epoch 20이 베스트 하이퍼 파라미터의 조합이라고 한다.