In [None]:
import os
import shutil
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, BatchNormalization, Dropout, Reshape, Input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.optimizers.schedules import CosineDecay
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, Callback, LearningRateScheduler
from tensorflow.keras.preprocessing import image
from tensorflow.keras import layers
from tensorflow.keras.mixed_precision import experimental as mixed_precision
import matplotlib.pyplot as plt
import itertools
from sklearn.metrics import confusion_matrix

In [None]:
# gpu 준비
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

1 Physical GPUs, 1 Logical GPUs


2022-05-28 11:45:55.809663: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-28 11:45:55.967925: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-28 11:45:55.968671: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-28 11:45:55.976452: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

In [None]:
import wandb
from wandb.keras import WandbCallback

wandb.login(key=wandb_api_key)
wandb.init(project="resnet-fer-keras-0")

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mhhan14[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
!tar xzvf ../input/challenges-in-representation-learning-facial-expression-recognition-challenge/fer2013.tar.gz
%mkdir ./models

fer2013/fer2013.csv
fer2013/README
fer2013/fer2013.bib
fer2013/


In [None]:
DATA_PATH = "./fer2013/fer2013.csv"

In [None]:
!nvidia-smi -L

GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-7ed3589b-eafb-b891-ce82-e4610321b848)


In [None]:
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_policy(policy)

2022-05-28 11:46:11.245911: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


In [None]:
# 참조: https://minimin2.tistory.com/100
# 매 epoch 마다 data augmentation을 진행하기 위한 dataloader class

class Dataloader(Sequence):
    def __init__(self, x_set, y_set, transform=None, batch_size=64, shuffle=True):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.transform = transform # data augmentation 진행할 torchvision transform 모듈
        self.on_epoch_end()
    
    def img_preprocess(self, idx):
        img = np.array(self.x[idx])
        img = Image.fromarray(img)
        if self.transform:
            img = self.transform(img)
        label = np.array([self.y[idx]] * 10)
        return img, label

    def __len__(self):
        return math.ceil(len(self.x) / self.batch_size)

    # 데이터를 batch size 만큼 불러오며 img_preprocess 통해 augmentation 한 데이터 반환
    def __getitem__(self, idx):
        indices = self.indices[idx*self.batch_size:(idx+1)*self.batch_size]

        batch_x_list, batch_y_list = map(list, zip(*[self.img_preprocess(i) for i in indices]))
        
        batch_x = np.array(batch_x_list)
        batch_y = np.array(batch_y_list)
        
        bs, ncrops, h, w, c = batch_x.shape
        batch_x = batch_x.reshape([-1, h, w, c])
        
        bs, ncrops, labels = batch_y.shape
        batch_y = batch_y.reshape([-1, labels])

        return batch_x, batch_y

    def on_epoch_end(self):
        self.indices = np.arange(len(self.x))
        if self.shuffle == True:
            np.random.shuffle(self.indices)

In [None]:
# 참조: https://github.com/usef-kh/fer/blob/master/data/fer2013.py
def load_data(path=DATA_PATH):
    fer2013 = pd.read_csv(path)
    emotion_mapping = {0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Sad', 5: 'Surprise', 6: 'Neutral'}

    return fer2013, emotion_mapping

# df에서 이미지와 label을 추출하여 array로 반환
def prepare_data(data):
    image_array = np.zeros(shape=(len(data), 48, 48))
    image_label = np.array(list(map(int, data['emotion'])))
    
    onehot_encoder = OneHotEncoder()
    image_label = image_label.reshape(-1, 1)
    image_label = onehot_encoder.fit_transform(image_label)
    image_label = image_label.toarray()

    for i, row in enumerate(data.index):
        image = np.fromstring(data.loc[row, 'pixels'], dtype=int, sep=' ')
        image = np.reshape(image, (48, 48))
        image_array[i] = image

    return image_array, image_label


def get_dataloaders(path=DATA_PATH, bs=64, augment=True):
    fer2013, emotion_mapping = load_data(path)

    xtrain, ytrain = prepare_data(fer2013[fer2013['Usage'] == 'Training'])
    xval, yval = prepare_data(fer2013[fer2013['Usage'] == 'PrivateTest'])
    xtest, ytest = prepare_data(fer2013[fer2013['Usage'] == 'PublicTest'])

    mu, st = 0, 255

    test_transform = transforms.Compose([
        transforms.TenCrop(40),
        transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
        transforms.Lambda(lambda tensors: torch.stack([transforms.Normalize(mean=(mu,), std=(st,))(t) for t in tensors]).permute(0, 2, 3, 1).numpy()),
    ])

    if augment:
        train_transform = transforms.Compose([
            transforms.RandomResizedCrop(48, scale=(0.8, 1.2)), # randomly rescale
            transforms.RandomApply([transforms.RandomAffine(0, translate=(0.2, 0.2))], p=0.5), # randomly translate
            transforms.RandomHorizontalFlip(),# randomly horizontal flip
            transforms.RandomApply([transforms.RandomRotation(10)], p=0.5), # randomly rotate
            transforms.TenCrop(40), # ten-crop
            transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
            transforms.Lambda(lambda tensors: torch.stack([transforms.Normalize(mean=(mu,), std=(st,))(t) for t in tensors])), # normalize
            transforms.Lambda(lambda tensors: torch.stack([transforms.RandomErasing(p=0.5)(t) for t in tensors]).permute(0, 2, 3, 1).numpy()), # keras 모델의 입력인 (batch size, 40, 40, 1) input에 맞도록 reshape
        ])
    else:
        train_transform = test_transform

    trainloader = Dataloader(xtrain, ytrain, transform=train_transform, batch_size=128)
    valloader = Dataloader(xval, yval, transform=test_transform, batch_size=128)
    testloader = Dataloader(xtest, ytest, transform=test_transform, shuffle=False, batch_size=128)

    return trainloader, valloader, testloader

# 후에 정확도 측정 및 confusion matrix 생성을 위해 test 데이터 label만 불러오는 메서드
def get_test_labels():
    fer2013, emotion_mapping = load_data(DATA_PATH)
    x_test, y_test = prepare_data(fer2013[fer2013['Usage'] == 'PublicTest'])
    
    mu, st = 0, 255

    test_transform = transforms.Compose([
        transforms.TenCrop(40),
        transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
        transforms.Lambda(lambda tensors: torch.stack([transforms.Normalize(mean=(mu,), std=(st,))(t) for t in tensors]).permute(0, 2, 3, 1).numpy()),
    ])

    testloader_cm = Dataloader(x_test, y_test, batch_size=3589, transform=test_transform, shuffle=False)
    xtest, ytest = next(iter(testloader_cm))
    
    return emotion_mapping, np.argmax(ytest, axis=1) 

In [None]:
trainloader, valloader, testloader = get_dataloaders(bs=128)

In [None]:
# 참조: https://towardsdatascience.com/learning-rate-schedules-and-adaptive-learning-rate-methods-for-deep-learning-2c8f433990d1

class AccHistory(Callback):
    def on_train_begin(self, logs={}):
        self.acc = []
        self.lr = []
        
    def on_epoch_end(self, batch, logs={}):
        self.acc.append(logs.get('accuracy'))
        self.lr.append(step_decay(len(self.acc)))
        print('lr:', step_decay(len(self.acc)))

def step_decay(epoch):
    initial_lrate = 0.1
    drop = 0.5
    epochs_drop = 10.0
    lrate = initial_lrate * math.pow(drop, math.floor((epoch)/epochs_drop))
    return lrate

acc_history = AccHistory()
lrate = LearningRateScheduler(step_decay)

In [None]:
#참고 논문 : Deep Residual Learning for Image Recognition (https://arxiv.org/abs/1512.03385)
#코드 : 핸즈온 머신러닝 2판(책) 14장 5절
#Resnet34는 34개 층으로 이루어져있고, 64개의 특성 맵을 출력하는 3개의 Residual Unit, 128개의 특성 맵을 출력하는 4개의 Residual Unit, 512개의 특성 맵을 출력하는 3개의 Residual Unit을 포함한다
#먼저 Residual Unit층을 구현한다.
class Residual_Unit(keras.layers.Layer):
    def __init__(self, filters, strides = 1, activation = 'relu',**kwargs):
        super().__init__(**kwargs)
        self.activation = keras.activations.get(activation)
        self.main_layers = [
            keras.layers.Conv2D(filters,3,strides = strides,padding = "same",use_bias = False),
            keras.layers.BatchNormalization(),
            self.activation,
            keras.layers.Conv2D(filters,3,strides = 1,padding = "same",use_bias = False),
            keras.layers.BatchNormalization()
        ]
        #main_layers는 convolution과 batch normalization을 사용하는 기본적인 구조다.
        self.skip_layers = []
        if strides > 1 :
            self.skip_layers = [
                keras.layers.Conv2D(filters,1,strides = strides,padding = "same",use_bias = False),
                keras.layers.BatchNormalization()
            ]
        #skip_layers는 convolution과 batch normalization을 stride가 1보다 큰 경우에만 적용한다. 즉, 입력과 출력의 크기가 다른 경우를 의미한다. 
        #입력과 출력의 크기가 다르면 입력이 Residual Unit의 출력에 바로 더해질 수 없다.

    def get_config(self):
      config = super().get_config().copy()
      config.update({
          'activation' : self.activation,
          'main_layers' : self.main_layers,
          'skip_layers' : self.skip_layers,
      })
      return config
            
    def call(self,inputs):
       x = inputs
       for layer in self.main_layers :
         x = layer(x)
         skip = inputs
       for layer in self.skip_layers:
         skip = layer(skip)
       return self.activation(x+skip)
    #call()은 input을 main layer와 skip layer에 통과시키고 두 출력을 더하여 activation function에 통과시킨다.

In [None]:
model = keras.models.Sequential()
#Residual Unit을 준비해두었기 때문에 Residual Unit을 하나의 층처럼 취급할 수 있다. 그러므로 Sequential class를 이용해 구현한다.
input_shape = (40, 40, 1)
model.add(keras.layers.Input(shape=input_shape))

prev_filters = 32
for filters in [32]*3 + [64]*4 + [128]*6 + [256]*3 : 
    strides = 1 if filters == prev_filters else 2
    model.add(Residual_Unit(filters,strides = strides))
    prev_filters = filters
#64개의 특성 맵을 출력하는 3개의 Residual Unit, 128개의 특성 맵을 출력하는 4개의 Residual Unit, 512개의 특성 맵을 출력하는 3개의 Residual Unit을 for문을 이용해 구현해주었다.
#Filter 개수가 이전과 같으면 stride를 1, 아니면 2로 설정하고, filter개수를 계속 update해주었다.
model.add(keras.layers.GlobalAvgPool2D())
model.add(keras.layers.Flatten())
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(7,activation = "softmax"))
#참고 논문 : Deep Residual Learning for Image Recognition (https://arxiv.org/abs/1512.03385)
#코드 : 핸즈온 머신러닝 2판(책) 14장 5절

In [None]:
opt = SGD(learning_rate=0.01, momentum=0.9, nesterov=True, decay=0.0001) # Stochastic Gradient Descent(확률적 경사 하강법) 이용
# val loss를 모니터 하여 정체될 경우 lr 감소
lr_schedule = ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.75,
    patience=5,
    verbose=True
)

model.compile(optimizer=opt, loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
res__unit (Res_Unit)         (None, 40, 40, 32)        9760      
_________________________________________________________________
res__unit_1 (Res_Unit)       (None, 40, 40, 32)        18688     
_________________________________________________________________
res__unit_2 (Res_Unit)       (None, 40, 40, 32)        18688     
_________________________________________________________________
res__unit_3 (Res_Unit)       (None, 20, 20, 64)        58112     
_________________________________________________________________
res__unit_4 (Res_Unit)       (None, 20, 20, 64)        74240     
_________________________________________________________________
res__unit_5 (Res_Unit)       (None, 20, 20, 64)        74240     
_________________________________________________________________
res__unit_6 (Res_Unit)       (None, 20, 20, 64)        7

In [None]:
acc_model_path = 'models/best_acc_model.h5'

acc_checkpoint = ModelCheckpoint(filepath=acc_model_path, monitor='val_accuracy', save_best_only=True, save_weights_only=False, save_freq="epoch")
early = EarlyStopping(monitor='val_accuracy', patience=10, verbose=1, mode='auto')

wandb.config = {
  "epochs": 150,
  "batch_size": 128
}

hist = model.fit(
    trainloader,
    validation_data=valloader,
    epochs=150,
    batch_size=128,
    callbacks=[acc_checkpoint, early, acc_history, lr_schedule, WandbCallback()],
)

wandb.finish()

2022-05-28 11:46:23.786929: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/150


2022-05-28 11:46:29.378344: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


lr: 0.1
Epoch 2/150
lr: 0.1
Epoch 3/150
lr: 0.1
Epoch 4/150
lr: 0.1
Epoch 5/150
lr: 0.1
Epoch 6/150
lr: 0.1
Epoch 7/150
lr: 0.1
Epoch 8/150
lr: 0.1
Epoch 9/150
lr: 0.1
Epoch 10/150
lr: 0.05
Epoch 11/150
lr: 0.05
Epoch 12/150
lr: 0.05
Epoch 13/150
lr: 0.05
Epoch 14/150
lr: 0.05
Epoch 15/150
lr: 0.05
Epoch 16/150
lr: 0.05
Epoch 17/150
lr: 0.05
Epoch 18/150
lr: 0.05
Epoch 19/150
lr: 0.05

Epoch 00019: ReduceLROnPlateau reducing learning rate to 0.007499999832361937.
Epoch 20/150
lr: 0.025
Epoch 21/150
lr: 0.025
Epoch 22/150
lr: 0.025
Epoch 23/150
lr: 0.025
Epoch 24/150
lr: 0.025

Epoch 00024: ReduceLROnPlateau reducing learning rate to 0.005624999874271452.
Epoch 25/150
lr: 0.025
Epoch 26/150
lr: 0.025
Epoch 27/150
lr: 0.025
Epoch 28/150
lr: 0.025
Epoch 29/150
lr: 0.025
Epoch 30/150
lr: 0.0125
Epoch 31/150
lr: 0.0125
Epoch 32/150
lr: 0.0125

Epoch 00032: ReduceLROnPlateau reducing learning rate to 0.004218749818392098.
Epoch 33/150
lr: 0.0125
Epoch 34/150
lr: 0.0125
Epoch 35/150
lr: 0.012

In [None]:
hist = model.evaluate(testloader)
print(hist)

[1.0370680093765259, 0.6714405417442322]
