In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
import sys
import numpy as np
import random

In [3]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [4]:
import torchvision
import torchvision.transforms as transforms

In [5]:
from torch.utils.tensorboard import SummaryWriter

In [6]:
import warnings
warnings.filterwarnings(action='ignore')

## Data

In [7]:
path =  '../Preprocessing/Data/'

img_data = np.load(path + 'img_data.npy')
img_label = np.load(path + 'img_age.npy')

In [8]:
data_size = len(img_data)
rand_idx = np.random.choice(data_size , data_size , replace = False)

train_size = int(0.8*data_size)

In [9]:
img_data = img_data[rand_idx]
img_label = img_label[rand_idx]

In [10]:
img_train = img_data[:train_size]
label_train = img_label[:train_size]

In [11]:
img_test = img_data[train_size:]
label_test = img_label[train_size:]

In [12]:
print('Train Data Shape\n')

print(img_train.shape)
print(label_train.shape)

Train Data Shape

(2160, 7, 512, 384, 3)
(2160, 7, 1)


In [13]:
print('Test Data Shape\n')

print(img_test.shape)
print(label_test.shape)

Test Data Shape

(540, 7, 512, 384, 3)
(540, 7, 1)


## Reshape

In [14]:
height = 512
width = 384
channel = 3
a_class = 3

In [15]:
img_train = img_train.reshape(-1 , height , width , channel)
label_train = label_train.reshape(-1,)

img_test = img_test.reshape(-1 , height , width , channel)
label_test = label_test.reshape(-1,)

In [16]:
img_size = 288
re_size = 224
batch_size = 32

mid_h = int(height/2)
mid_w = int(width/2)
mid_s = int(img_size/2)

# crop image of center : image shape (18900 , 256 , 256 , 3)
img_train = img_train[: , 
                      mid_h - mid_s : mid_h + mid_s , 
                      mid_w - mid_s : mid_w + mid_s ,
                      :]

img_test = img_test[: , 
                    mid_h - mid_s : mid_h + mid_s , 
                    mid_w - mid_s : mid_w + mid_s ,
                    :]

## Data Loader

In [17]:
class ProjectDataset(Dataset) :

    def __init__(self , data , label , class_size) :

        super(Dataset , self).__init__()

        self.data = np.transpose(data , (0,3,1,2)) # change channel first (for pytorch)
        self.label = np.eye(class_size)[label.astype('int32')] # one hot vector

    def __len__(self) :

        data_len = self.data.shape[0]

        return data_len

    def __getitem__(self , idx) :

        data_idx = self.data[idx]
        label_idx = self.label[idx]
        
        # return dict type
        sample_idx = {'image' : data_idx , 'label' : label_idx}
        
        return sample_idx

In [18]:
# train dataset
image_train_dset = ProjectDataset(img_train, label_train , a_class)

# train data loader
image_train_loader = DataLoader(image_train_dset, 
                                batch_size=batch_size,
                                shuffle=True ,
                                num_workers=4 ,
                                drop_last=True)

# test dataset
image_test_dset = ProjectDataset(img_test, label_test , a_class)

# test data loader
image_test_loader = DataLoader(image_test_dset,  
                               batch_size=batch_size,
                               shuffle=False , 
                               num_workers=4 ,
                               drop_last=True)

## Device

In [19]:
USE_CUDA = torch.cuda.is_available()
random.seed(20210905)
torch.cuda.manual_seed_all(20210905)

device = torch.device("cuda" if USE_CUDA else "cpu") 

## Model Parameter

In [20]:
# 논문 기반 

# Resnet Convolution layer 수
layer_dim = [2,3,5,2]
# Resnet Convolution Filter 갯수
ch_dim = [64,128,256,512]
start_kernal = 7 # 시작 Kernal 수
kernal_size = 3 # Convolution Kernal 수

## Model

In [21]:
class Conv2ResidualSame(nn.Module) :

    # input channel and output channel are smae
    def __init__(self, ch_size , k_size) :

        super(Conv2ResidualSame , self).__init__()

        pad_size = int(k_size/2)
        # 출력 모양와 입력 모양이 같게 하기 위해 Padding Size를 Kernal 사이즈의 절반으로 설정
        self.c_res = nn.Sequential(nn.Conv2d(ch_size,ch_size,k_size,padding=pad_size,stride=1),
                                   nn.ReLU(),
                                   nn.Conv2d(ch_size,ch_size,k_size,padding=pad_size,stride=1))
        
        self.init_param()

    # 파라미터 초기화
    def init_param(self) :

        for m in self.modules() :
            if isinstance(m , nn.Conv2d) :

                nn.init.kaiming_normal_(m.weight)
                nn.init.zeros_(m.bias)


    def forward(self , in_tensor) :
        
        out_tensor = self.c_res(in_tensor)
        # 입력과 출력결과를 더함
        res_tensor = torch.add(out_tensor , in_tensor)

        return res_tensor

In [22]:
class Conv2ResidualDiff(nn.Module) :

    # input channel and output channel are different
    def __init__(self, in_ch , out_ch , k_size) :

        super(Conv2ResidualDiff , self).__init__()

        pad_size = int(k_size/2)

        # stride size is 2 and channel size is doubled 
        self.c_res = nn.Sequential(nn.Conv2d(in_ch,out_ch,k_size,padding=pad_size,stride=2),
                                   nn.ReLU(),
                                   nn.Conv2d(out_ch,out_ch,k_size,padding=pad_size,stride=1))
        

        # 1 X 1 Convolution to match input dimension and out dimension
        self.conv11 = nn.Conv2d(in_ch,out_ch,1,stride=2) 

        self.init_param()

    def init_param(self) :

        for m in self.modules() :
            if isinstance(m , nn.Conv2d) :

                nn.init.kaiming_normal_(m.weight)
                nn.init.zeros_(m.bias)

    def forward(self , in_tensor) :

        # 출력 모양의 채널 갯수가 커지고 입출력 길이가 절반으로 감소
        out_tensor = self.c_res(in_tensor)
        # 입력과 출력의 모양을 맞추기 위해서 1X1 Convolution을 이용해서 채널 수와 입출력 모양을 일치시킴
        project_tensor = self.conv11(in_tensor)

        # 입력과 출력을 더함
        res_tensor = torch.add(out_tensor , project_tensor)

        return res_tensor

In [23]:
class Conv2Block(nn.Module) :

    def __init__(self, layer_size , in_ch , ch_size , k_size) :

        super(Conv2Block , self).__init__()

        self.layer_size = layer_size

        self.c_block = nn.ModuleList()
        # Convolutional Residual block (output channel is doubled and feature map size is halvec) 
        self.c_block.append(Conv2ResidualDiff(in_ch,ch_size,k_size))

        # Convolutional Residual block (input channel and output channel is same and also with feature map size)
        for i in range(layer_size) :
            self.c_block.append(Conv2ResidualSame(ch_size,k_size))

    def forward(self , in_tensor) :

        tensor_ptr = in_tensor

        for i in range(self.layer_size) :

            tensor_ptr = self.c_block[i](tensor_ptr)

        return tensor_ptr

In [24]:
class ResNet(nn.Module) :

    def __init__(self, layer_list , image_size , ch_list , 
                 in_kernal , kernal_size , class_size) :

        super(ResNet , self).__init__()

        self.ch_list = ch_list
        self.resnet = nn.ModuleList()

        size_ptr = image_size
        # first convolutional layer
        self.resnet.append(nn.Conv2d(3,ch_dim[0],in_kernal,stride=2,padding=int(in_kernal/2))) 
        size_ptr /= 2

        ch_ptr = ch_dim[0]
        for i in range(len(layer_list)) :

            # convolution residual block
            convblock = Conv2Block(layer_list[i],ch_ptr,ch_dim[i],kernal_size)
            self.resnet.append(convblock) 
            self.resnet.append(nn.BatchNorm2d(ch_dim[i])) # batch normalization

            size_ptr /= 2
            ch_ptr = ch_dim[i]

        self.avg_pool = nn.AvgPool2d(int(size_ptr)) # final average pooling layer
        self.o_layer = nn.Linear(ch_list[-1] , class_size)
        
        self.layer_size = len(self.resnet)

        self.init_param()

    def init_param(self) :

        nn.init.kaiming_normal_(self.o_layer.weight)
        nn.init.zeros_(self.o_layer.bias) 

    def forward(self , in_tensor) :

        batch_size = in_tensor.shape[0]
        tensor_ptr = in_tensor

        for i in range(self.layer_size) :

            tensor_ptr = self.resnet[i](tensor_ptr)

        avg_tensor = self.avg_pool(tensor_ptr)
        avg_tensor = torch.reshape(avg_tensor , (batch_size , self.ch_list[-1]))

        o_tensor = self.o_layer(avg_tensor)
    
        return o_tensor

In [25]:
epoch_size = 200
min_loss = 1e+7
init_lr = 1e-4
early_count = 0
log_count = 0

scalor = transforms.Compose([transforms.Resize((re_size,re_size)),
                             transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.2, 0.2, 0.2))])

resnet = ResNet(layer_dim , re_size , ch_dim , start_kernal , kernal_size , a_class).to(device)

optimizer = optim.Adam(resnet.parameters() , lr = init_lr)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 3 , gamma = 0.8)

### Accuracy Function

In [26]:
def acc_fn(y_output , y_label) :

    # get max argument of output 
    _ , y_out_arg = torch.max(y_output , dim = -1)
    _ , y_label_arg = torch.max(y_label , dim = -1)

    # check if output max argument if same as output label
    y_acc = (y_out_arg == y_label_arg).float()    
    y_acc = torch.mean(y_acc)

    return y_acc

### Loss Function

In [27]:
def loss_fn(y_output , y_label) :
    
    y_prob = F.softmax(y_output , dim = -1)
    y_log = -torch.log(y_prob + 1e-12)
    
    y_loss = torch.multiply(y_log , y_label)
    y_loss = torch.sum(y_loss , dim = -1)
    
    y_loss = torch.mean(y_loss)
    
    return y_loss

## Writer

In [28]:
writer = SummaryWriter('runs/resnet/normal/')

## Training

In [29]:
def progressLearning(value, endvalue, loss , acc , bar_length=50):
      
    percent = float(value + 1) / endvalue
    arrow = '-' * int(round(percent * bar_length)-1) + '>'
    spaces = ' ' * (bar_length - len(arrow))

    sys.stdout.write("\rPercent: [{0}] {1}/{2} \t Loss : {3:.3f} , Acc : {4:.3f}".format(arrow + spaces, value+1 , endvalue , loss , acc))
    sys.stdout.flush()

In [30]:
def evaluate(model , scalor  , test_loader , device) :

    loss = 0.0
    acc = 0.0

    with torch.no_grad() :

        model.eval()

        for img_data in test_loader :
            
            img_in , img_label = img_data['image'] , img_data['label']

            img_in = img_in.float().to(device) / 255
            img_in = scalor(img_in) # scalor
            
            img_label = img_label.float().to(device)

            img_output = model(img_in)

            loss_idx = loss_fn(img_output , img_label)
            acc_idx = acc_fn(img_output , img_label)

            loss += loss_idx
            acc += acc_idx

        model.train()

    loss /= len(test_loader)
    acc /= len(test_loader)

    return loss , acc

In [31]:
for epoch in range(epoch_size) :

    idx = 0
    
    print('Epoch : %d \t Learning Rate : %e' %(epoch , optimizer.param_groups[0]['lr'])) 
    
    for img_data in image_train_loader : 

        img_in , img_label = img_data['image'] , img_data['label']
        
        img_in = img_in.float().to(device) / 255
        img_in = scalor(img_in) # scalor
        
        optimizer.zero_grad()
        
        img_label = img_label.float().to(device)
        
        img_output = resnet(img_in) 
        
        loss = loss_fn(img_output , img_label)  
        acc = acc_fn(img_output , img_label) 
        
        loss.backward()
        optimizer.step()

        if (idx + 1) % 10 == 0 :
            
            writer.add_scalar('train/loss' , loss.item() , log_count)
            writer.add_scalar('train/acc' , acc.item() , log_count)
            log_count += 1
        
        progressLearning(idx , len(image_train_loader) , loss, acc) 

        idx += 1 

    test_loss, test_acc = evaluate(resnet, scalor , image_test_loader , device) 
    
    writer.add_scalar('test/loss' , test_loss.item() , epoch)
    writer.add_scalar('test/acc' , test_acc.item() , epoch)
    
    if test_loss < min_loss :
        
        min_loss = test_loss
        torch.save({'epoch' : (epoch) ,  
                    'model_state_dict' : resnet.state_dict() , 
                    'loss' : test_loss.item() , 
                    'acc' : test_acc.item()} , 
                    f'./Model/checkpoint_resnet_normal.pt')        
        early_count = 0 
        
    else :
                
        early_count += 1
        if early_count >= 5 :      
            print('\nTraining Stopped')
            break

    scheduler.step()
    print('\nValidation Loss : %.4f \t Validation Acc : %.4f\n' %(test_loss , test_acc))




Epoch : 0 	 Learning Rate : 1.000000e-04
Percent: [------------------------------------------------->] 472/472 	 Loss : 0.495 , Acc : 0.812
Validation Loss : 1.0957 	 Validation Acc : 0.5776

Epoch : 1 	 Learning Rate : 1.000000e-04
Percent: [------------------------------------------------->] 472/472 	 Loss : 0.388 , Acc : 0.875
Validation Loss : 0.4144 	 Validation Acc : 0.8382

Epoch : 2 	 Learning Rate : 1.000000e-04
Percent: [------------------------------------------------->] 472/472 	 Loss : 0.163 , Acc : 0.938
Validation Loss : 0.4084 	 Validation Acc : 0.8700

Epoch : 3 	 Learning Rate : 8.000000e-05
Percent: [------------------------------------------------->] 472/472 	 Loss : 0.190 , Acc : 0.906
Validation Loss : 0.5133 	 Validation Acc : 0.8385

Epoch : 4 	 Learning Rate : 8.000000e-05
Percent: [------------------------------------------------->] 472/472 	 Loss : 0.083 , Acc : 0.938
Validation Loss : 0.5086 	 Validation Acc : 0.8493

Epoch : 5 	 Learning Rate : 8.000000e-05

In [32]:
!tensorboard --logdir=./runs/resnet/ --port=6006 --bind_all

2021-09-05 23:01:56.862055: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-09-05 23:01:56.867681: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcusolver.so.11'; dlerror: libcusolver.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
2021-09-05 23:01:56.868442: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudnn.so.8'; dlerror: libcudnn.so.8: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
2021-09-05 23:01:56.868477: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1835] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if 