In [None]:
import os
import shutil
import random
import math
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential, load_model, Model
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, BatchNormalization, Dropout, Reshape,  \
                              GlobalAveragePooling2D, AveragePooling2D, Input, Concatenate, Layer

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.optimizers.schedules import CosineDecay
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, LearningRateScheduler, Callback
from tensorflow.keras.preprocessing import image
from tensorflow.keras import layers
from tensorflow.keras.mixed_precision import experimental as mixed_precision
from tensorflow.keras.utils import to_categorical
from keras.utils import np_utils
import matplotlib.pyplot as plt
import itertools
from sklearn.metrics import confusion_matrix

In [None]:
# gpu 준비
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

2022-05-28 19:50:02.975336: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-28 19:50:03.140439: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-28 19:50:03.141317: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


1 Physical GPUs, 1 Logical GPUs


2022-05-28 19:50:03.150974: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-05-28 19:50:03.151326: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-28 19:50:03.152069: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-28 19:50:03.152728: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA 

In [None]:
import wandb
from wandb.keras import WandbCallback

wandb.login(key=wandb_api_key)
wandb.init(project="googlenet-fer-keras-3")

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mhhan14[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
!tar xzvf ../input/challenges-in-representation-learning-facial-expression-recognition-challenge/fer2013.tar.gz
%mkdir ./models

fer2013/fer2013.csv
fer2013/README
fer2013/fer2013.bib
fer2013/


In [None]:
DATA_PATH = "./fer2013/fer2013.csv"

In [None]:
!nvidia-smi -L

GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-f27de642-7484-4a88-83b5-fe4547a9f948)


In [None]:
# 효율적인 GPU 사용을 위한 mixed precision 설정
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_policy(policy)

2022-05-28 19:50:18.328538: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


In [None]:
# 참조: https://minimin2.tistory.com/100
# 매 epoch 마다 data augmentation을 진행하기 위한 dataloader class

class Dataloader(Sequence):
    def __init__(self, x_set, y_set, transform=None, batch_size=64, shuffle=True):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.transform = transform # data augmentation 진행할 torchvision transform 모듈
        self.on_epoch_end()
    
    def img_preprocess(self, idx):
        img = np.array(self.x[idx])
        img = Image.fromarray(img)
        if self.transform:
            img = self.transform(img)
        label = np.array([self.y[idx]] * 10)
        return img, label

    def __len__(self):
        return math.ceil(len(self.x) / self.batch_size)

    # 데이터를 batch size 만큼 불러오며 img_preprocess 통해 augmentation 한 데이터 반환
    def __getitem__(self, idx):
        indices = self.indices[idx*self.batch_size:(idx+1)*self.batch_size]

        batch_x_list, batch_y_list = map(list, zip(*[self.img_preprocess(i) for i in indices]))
        
        batch_x = np.array(batch_x_list)
        batch_y = np.array(batch_y_list)
        
        bs, ncrops, h, w, c = batch_x.shape
        batch_x = batch_x.reshape([-1, h, w, c])
        
        bs, ncrops, labels = batch_y.shape
        batch_y = batch_y.reshape([-1, labels])

        return batch_x, batch_y

    def on_epoch_end(self):
        self.indices = np.arange(len(self.x))
        if self.shuffle == True:
            np.random.shuffle(self.indices)

In [None]:
# 참조: https://github.com/usef-kh/fer/blob/master/data/fer2013.py
def load_data(path=DATA_PATH):
    fer2013 = pd.read_csv(path)
    emotion_mapping = {0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Sad', 5: 'Surprise', 6: 'Neutral'}

    return fer2013, emotion_mapping

# df에서 이미지와 label을 추출하여 array로 반환
def prepare_data(data):
    image_array = np.zeros(shape=(len(data), 48, 48))
    image_label = np.array(list(map(int, data['emotion'])))
    
    onehot_encoder = OneHotEncoder()
    image_label = image_label.reshape(-1, 1)
    image_label = onehot_encoder.fit_transform(image_label)
    image_label = image_label.toarray()

    for i, row in enumerate(data.index):
        image = np.fromstring(data.loc[row, 'pixels'], dtype=int, sep=' ')
        image = np.reshape(image, (48, 48))
        image_array[i] = image

    return image_array, image_label


def get_dataloaders(path=DATA_PATH, bs=64, augment=True):
    fer2013, emotion_mapping = load_data(path)

    xtrain, ytrain = prepare_data(fer2013[fer2013['Usage'] == 'Training'])
    xval, yval = prepare_data(fer2013[fer2013['Usage'] == 'PrivateTest'])
    xtest, ytest = prepare_data(fer2013[fer2013['Usage'] == 'PublicTest'])

    mu, st = 0, 255

    test_transform = transforms.Compose([
        transforms.TenCrop(40),
        transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
        transforms.Lambda(lambda tensors: torch.stack([transforms.Normalize(mean=(mu,), std=(st,))(t) for t in tensors]).permute(0, 2, 3, 1).numpy()),
    ])

    if augment:
        train_transform = transforms.Compose([
            transforms.RandomResizedCrop(48, scale=(0.8, 1.2)), # randomly rescale
            transforms.RandomApply([transforms.RandomAffine(0, translate=(0.2, 0.2))], p=0.5), # randomly translate
            transforms.RandomHorizontalFlip(),# randomly horizontal flip
            transforms.RandomApply([transforms.RandomRotation(10)], p=0.5), # randomly rotate
            transforms.TenCrop(40), # ten-crop
            transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
            transforms.Lambda(lambda tensors: torch.stack([transforms.Normalize(mean=(mu,), std=(st,))(t) for t in tensors])), # normalize
            transforms.Lambda(lambda tensors: torch.stack([transforms.RandomErasing(p=0.5)(t) for t in tensors]).permute(0, 2, 3, 1).numpy()), # keras 모델의 입력인 (batch size, 40, 40, 1) input에 맞도록 reshape
        ])
    else:
        train_transform = test_transform

    trainloader = Dataloader(xtrain, ytrain, transform=train_transform, batch_size=128)
    valloader = Dataloader(xval, yval, transform=test_transform, batch_size=128)
    testloader = Dataloader(xtest, ytest, transform=test_transform, shuffle=False, batch_size=128)

    return trainloader, valloader, testloader

# 후에 정확도 측정 및 confusion matrix 생성을 위해 test 데이터 label만 불러오는 메서드
def get_test_labels():
    fer2013, emotion_mapping = load_data(DATA_PATH)
    x_test, y_test = prepare_data(fer2013[fer2013['Usage'] == 'PublicTest'])
    
    mu, st = 0, 255

    test_transform = transforms.Compose([
        transforms.TenCrop(40),
        transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
        transforms.Lambda(lambda tensors: torch.stack([transforms.Normalize(mean=(mu,), std=(st,))(t) for t in tensors]).permute(0, 2, 3, 1).numpy()),
    ])

    testloader_cm = Dataloader(x_test, y_test, batch_size=3589, transform=test_transform, shuffle=False)
    xtest, ytest = next(iter(testloader_cm))
    
    return emotion_mapping, np.argmax(ytest, axis=1) 

In [None]:
trainloader, valloader, testloader = get_dataloaders(bs=128)

In [None]:
# 참조: https://towardsdatascience.com/learning-rate-schedules-and-adaptive-learning-rate-methods-for-deep-learning-2c8f433990d1

class AccHistory(Callback):
    def on_train_begin(self, logs={}):
        self.acc = []
        self.lr = []
        
    def on_epoch_end(self, batch, logs={}):
        self.acc.append(logs.get('accuracy'))
        self.lr.append(step_decay(len(self.acc)))
        print('lr:', step_decay(len(self.acc)))

def step_decay(epoch):
    initial_lrate = 0.1
    drop = 0.5
    epochs_drop = 10.0
    lrate = initial_lrate * math.pow(drop, math.floor((epoch)/epochs_drop))
    return lrate

# lr걊 display
acc_history = AccHistory()
# 10 epoch이후 0.5씩 lr drop
lrate = LearningRateScheduler(step_decay)

In [None]:
## 참조 : https://github.com/hskang9/Googlenet/blob/master/keras/googlenet.py -- LRN 코드
## 참조 : https://sike6054.github.io/blog/paper/second-post/
class LRN2D(Layer):
    """
    This code is adapted from pylearn2.
    License at: https://github.com/lisa-lab/pylearn2/blob/master/LICENSE.txt
    """

    def __init__(self, alpha=0.0001,k=1,beta=0.75,n=3, name=None):
        if n % 2 == 0:
            raise NotImplementedError("LRN2D only works with odd n. n provided: " + str(n))
        super(LRN2D, self).__init__()
        self.alpha = alpha
        self.k = k
        self.beta = beta
        self.n = n
        self.test = name

    def get_output(self, train):
        X = self.get_input(train)
        return tf.nn.lrn(X)

    def get_config(self):
        return {"test": self.__class__.__name__,
                "alpha": self.alpha,
                "k": self.k,
                "beta": self.beta,
                "n": self.n}


def inception(input_tensor, filter_channels):
    filter_1x1, filter_3x3_Reduce, filter_5x5_Reduce, filter_5x5, pool_proj = filter_channels
    
    branch_1 = Conv2D(filter_1x1, kernel_size=(1, 1), padding='same', strides=(1, 1), activation='relu', kernel_initializer='he_normal')(input_tensor)
    branch_1 = BatchNormalization()(branch_1)
    
    branch_2 = Conv2D(filter_3x3_Reduce, kernel_size=(1, 1), padding='same', strides=(1, 1), activation='relu', kernel_initializer='he_normal')(input_tensor)
    branch_2 = BatchNormalization()(branch_2)

    branch_3 = Conv2D(filter_5x5_Reduce, kernel_size=(1, 1),  padding='same', strides=(1, 1), activation='relu', kernel_initializer='he_normal')(input_tensor)
    branch_3 = BatchNormalization()(branch_3)
    branch_3 = Conv2D(filter_5x5, kernel_size=(5, 5), padding='same', strides=(1, 1), activation='relu', kernel_initializer='he_normal')(branch_3)
    branch_3 = BatchNormalization()(branch_3)
    
    branch_4 = MaxPooling2D(pool_size=(3, 3), padding='same', strides=1)(input_tensor)
    branch_4 = Conv2D(pool_proj, kernel_size=(1, 1), padding='same', strides=(1, 1), activation='relu', kernel_initializer='he_normal')(branch_4)
    branch_4 = BatchNormalization()(branch_4)
    
    DepthConcat = Concatenate()([branch_1, branch_2, branch_3, branch_4])
    
    return DepthConcat



#논문에 맞게 CIPIIPIIPIIPF 구조로 GoogLeNet 변형
#Inception layer는 3x3 feature maps 기반, 7개의 inception 사용
#n값의 초기값은 32이고, 이후 inception layer마다 32씩 증가
#n값에 따라 1x1, 3x3 reduce, 5x5 reduce, 5x5, pool projection은 3/4n, 1/2n, 1/8n, 1/4n, 1/4n에 대응
def GoogLeNet(model_input, classes=7):
    conv_1 = Conv2D(192, kernel_size=(3, 3), padding='same', activation='relu')(model_input)
    conv_1_normalize = BatchNormalization()(conv_1)
    poo11_norm1 = LRN2D(name='pool1/norm1')(conv_1_normalize)

    #n=32, 3/4n, 1/2n, 1/8n, 1/4n, 1/4n에 따라 1x1, 3x3 reduce, 5x5 reduce, 5x5, pool projection 24, 16, 4, 8, 8 대응            
    inception_1a = inception(poo11_norm1, [24, 16, 4, 8, 8]) 
    pool_1 = MaxPooling2D(pool_size=(3, 3), padding='same', strides=2)(inception_1a) 
    
    #n=64, 3/4n, 1/2n, 1/8n, 1/4n, 1/4n에 따라 1x1, 3x3 reduce, 5x5 reduce, 5x5, pool projection 48, 32, 8, 16, 16 대응 
    inception_2a = inception(pool_1, [48, 32, 8, 16, 16])

    #n=96, 3/4n, 1/2n, 1/8n, 1/4n, 1/4n에 따라 1x1, 3x3 reduce, 5x5 reduce, 5x5, pool projection 72, 48, 12, 24, 24 대응  
    inception_2b = inception(inception_2a, [72, 48, 12, 24, 24]) 
    pool_2 = MaxPooling2D(pool_size=(3, 3), padding='same', strides=2)(inception_2b) 
    
    #n=128, 3/4n, 1/2n, 1/8n, 1/4n, 1/4n에 따라 1x1, 3x3 reduce, 5x5 reduce, 5x5, pool projection 96, 64, 16, 32, 32 대응
    inception_3a = inception(pool_2, [96, 64, 16, 32, 32]) 

    #n=160, 3/4n, 1/2n, 1/8n, 1/4n, 1/4n에 따라 1x1, 3x3 reduce, 5x5 reduce, 5x5, pool projection 120, 80, 20, 40, 40 대응
    inception_3b = inception(inception_3a, [120, 80, 20, 40, 40]) 
    pool_3 = MaxPooling2D(pool_size=(3, 3), padding='same', strides=(2, 2))(inception_3b) 

    #n=192, 3/4n, 1/2n, 1/8n, 1/4n, 1/4n에 따라 1x1, 3x3 reduce, 5x5 reduce, 5x5, pool projection 144, 96, 24, 48, 48 대응
    inception_4a = inception(pool_3, [144, 96, 24, 48, 48]) 
    
    #n=224, 3/4n, 1/2n, 1/8n, 1/4n, 1/4n에 따라 1x1, 3x3 reduce, 5x5 reduce, 5x5, pool projection 168, 112, 28, 56, 56 대응
    inception_4b = inception(inception_4a, [168, 112, 28, 56, 56]) 
    
    
    avg_pool = GlobalAveragePooling2D()(inception_4b)
    linear = Dense(1000, activation='relu')(avg_pool)
    dropout = Dropout(0.4)(linear)
    model_output = Dense(classes, activation='softmax', name='main_classifier')(dropout) 
    
    model = Model(model_input, model_output)
    
    return model

input_shape = (40, 40, 1)

model_input = Input(shape=input_shape)

model = GoogLeNet(model_input, 7)

In [None]:
opt = SGD(learning_rate=0.1, momentum=0.9, nesterov=True, decay=0.0001) # Stochastic Gradient Descent(확률적 경사 하강법) 이용
model.compile(optimizer=opt, loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])

In [None]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 40, 40, 1)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 40, 40, 192)  1920        input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 40, 40, 192)  768         conv2d[0][0]                     
__________________________________________________________________________________________________
lr_n2d (LRN2D)                  (None, 40, 40, 192)  0           batch_normalization[0][0]        
______________________________________________________________________________________________

In [None]:
acc_model_path = 'models/best_acc_model.h5'

acc_checkpoint = ModelCheckpoint(filepath=acc_model_path, monitor='val_accuracy', save_best_only=True, save_weights_only=False, save_freq="epoch")
early = EarlyStopping(monitor='val_accuracy', patience=10, verbose=1, mode='auto')

wandb.config = {
  "epochs": 150,
  "batch_size": 128
}

hist = model.fit(
    trainloader,
    validation_data=valloader,
    epochs=150,
    batch_size=128,
    callbacks=[acc_checkpoint, early, WandbCallback(), acc_history, lrate]
)

wandb.finish()

2022-05-28 19:50:31.548469: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/150


2022-05-28 19:50:38.293368: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005
2022-05-28 19:50:46.874651: I tensorflow/stream_executor/cuda/cuda_driver.cc:732] failed to allocate 7.01G (7530610688 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory


lr: 0.1
Epoch 2/150
lr: 0.1
Epoch 3/150
lr: 0.1
Epoch 4/150
lr: 0.1
Epoch 5/150
lr: 0.1
Epoch 6/150
lr: 0.1
Epoch 7/150
lr: 0.1
Epoch 8/150
lr: 0.1
Epoch 9/150
lr: 0.1
Epoch 10/150
lr: 0.05
Epoch 11/150
lr: 0.05
Epoch 12/150
lr: 0.05
Epoch 13/150
lr: 0.05
Epoch 14/150
lr: 0.05
Epoch 15/150
lr: 0.05
Epoch 16/150
lr: 0.05
Epoch 17/150
lr: 0.05
Epoch 18/150
lr: 0.05
Epoch 19/150
lr: 0.05
Epoch 20/150
lr: 0.025
Epoch 21/150
lr: 0.025
Epoch 22/150
lr: 0.025
Epoch 23/150
lr: 0.025
Epoch 24/150
lr: 0.025
Epoch 25/150
lr: 0.025
Epoch 26/150
lr: 0.025
Epoch 27/150
lr: 0.025
Epoch 28/150
lr: 0.025
Epoch 29/150
lr: 0.025
Epoch 30/150
lr: 0.0125
Epoch 31/150
lr: 0.0125
Epoch 32/150
lr: 0.0125
Epoch 33/150
lr: 0.0125
Epoch 34/150
lr: 0.0125
Epoch 35/150
lr: 0.0125
Epoch 36/150
lr: 0.0125
Epoch 37/150
lr: 0.0125
Epoch 38/150
lr: 0.0125
Epoch 39/150
lr: 0.0125
Epoch 40/150
lr: 0.00625
Epoch 41/150
lr: 0.00625
Epoch 42/150
lr: 0.00625
Epoch 43/150
lr: 0.00625
Epoch 44/150
lr: 0.00625
Epoch 45/150
lr: 

VBox(children=(Label(value='7.156 MB of 7.156 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▃▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇██████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
loss,█▆▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▄▅▄▆▇▇▆▇▇▇▇▇█▇▇████████████████████████
val_loss,█▅▄▅▂▂▂▃▂▂▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,0.70085
best_epoch,36.0
best_val_loss,0.90695
epoch,49.0
loss,0.79276
val_accuracy,0.67459
val_loss,0.91333


In [None]:
hist = model.evaluate(testloader)
print(hist)

[0.9683228135108948, 0.6629144549369812]
