In [1]:
import os
from glob import glob
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from torchvision import models

import random
from tqdm import tqdm
import cv2

base_path = '/home/lab17/jupyter_home/Data/product_image'
files = os.listdir(base_path)
print(files)

['Validation', 'Training']


# 하이퍼 파라미터

In [2]:
# 학습 변수
# device = torch.device("cuda:0")
device = torch.device('cuda')
batch_size = 8
class_n = len(os.listdir('/home/lab17/jupyter_home/Data/product_image/Training/image'))
# class_n = len(train_total['disease_code'].unique())
learning_rate = 5e-5
epochs = 300
save_path = 'model.pt'

# 이미지 전처리

In [3]:
base_path = '/home/lab17/jupyter_home/Data/product_image'

train_jpg = sorted(glob(os.path.join(base_path, 'Training/image/**/*.jpg')))
valid_jpg = sorted(glob(os.path.join(base_path, 'Validation/image/**/*.jpg')))

train_label = [int(t_img[len(base_path+'/Training/image/'):len(base_path+'/Training/image/')+5]) for t_img in train_jpg]
valid_label = [int(v_img[len(base_path+'/Validation/image/'):len(base_path+'/Validation/image/')+5]) for v_img in valid_jpg]

print('train_label count', len(set(train_label)))
print('valid_label count', len(set(valid_label)))
print('train_label length' , len(train_label))
print('valid_label length' , len(valid_label))


train_label count 76
valid_label count 76
train_label length 8664
valid_label length 1140


# 데이터셋 구성

In [4]:
# class Custom_dataset(Dataset):
#     def __init__(self, img_paths, labels, mode='train'):
#         self.img_paths = img_paths
#         self.labels = labels
#         self.mode=mode
#     def __len__(self):
#         return len(self.img_paths)
#     def __getitem__(self, idx):
#         img = self.img_paths[idx]
#         img = img.resize((256,256))
# #         if self.mode == 'train':
# #             train_transform = transforms.Compose([
# #                     transforms.ToTensor(),
# #                     transforms.Normalize(mean = [0.433038, 0.403458, 0.394151],
# #                                         std = [0.181572, 0.174035, 0.163234]),
# #                     transforms.RandomAffine((-45, 45)),

# #                     transforms.RandomVerticalFlip(p=0.5),   # - 이미지를 랜덤으로 수직으로 뒤집는다. p =0이면 뒤집지 않는다.
# #                     transforms.RandomHorizontalFlip(p=0.5), # - 이미지를 랜덤으로 수평으로 뒤집는다.
# #                     transforms.RandomRotation((0,80))       #  이미지를 랜덤으로 degrees 각도로 회전한다.

# #                 ])
# #                 img = train_transform(img)
# #         if self.mode == 'test':
# #           test_transform = transforms.Compose([
# #                 transforms.ToTensor(),
# #                 transforms.Normalize(mean = [0.418256, 0.393101, 0.386632],
# #                                      std = [0.195055, 0.190053, 0.185323]),
              
# #             ])
# #           img = test_transform(img)

#         label = self.labels[idx]
#         return img, label

In [45]:
class Custom_dataset(Dataset):
    def __init__(self, files, labels=None, mode='train'):
        self.mode = mode
        self.files = files
        if mode == 'train':
            self.labels = labels
            
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, i):
        if self.mode == 'train':
            img = cv2.imread(self.files[i])
            img = cv2.resize(img, dsize=(256, 256), interpolation=cv2.INTER_AREA)
            img = img.astype(np.float32)/255
            
            # 0~3 사이의 임의의 정수 생성
            rand = random.randrange(0,4)
            
#             if rand == 0:
#               img = img
#             elif rand == 1:
#               img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE) # 시계방향으로 90도 회전
#             elif rand == 2:
#               img = cv2.rotate(img, cv2.ROTATE_180) # 180도 회전       
#             elif rand == 3:
#               img = cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE) # 시계방향으로 270도 회전
#             else:
#               img=img
       
            img = np.transpose(img, (2,0,1))
            return {
                'img' : torch.tensor(img, dtype=torch.float32),
                'label' : torch.tensor(self.labels[i], dtype=torch.long)
            }
        else:
            img = cv2.imread('test_imgs/'+self.files[i])
            img = cv2.resize(img, dsize=(256, 256), interpolation=cv2.INTER_AREA)
            img = img.astype(np.float32)/255

            img = np.transpose(img, (2,0,1))
            return {
                'img' : torch.tensor(img, dtype=torch.float32),
            }

In [46]:
# Train
train_dataset = Custom_dataset(train_jpg, train_label, mode='train')
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)

# Val
val_dataset = Custom_dataset(valid_jpg, valid_label, mode='test')
val_loader = DataLoader(val_dataset, shuffle=True, batch_size=batch_size)

In [47]:
# def score_function(real, pred):
#     score = f1_score(real, pred, average="macro")
#     return score

In [48]:
def main(seed = 2022):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = True
    
main(2022)

# 모델

In [49]:
class Network(nn.Module):
    def __init__(self,mode = 'train'):
        super(Network, self).__init__()
        self.mode = mode
        if self.mode == 'train':
          self.model = timm.create_model('efficientnet_b4', pretrained=True, num_classes=class_n, drop_path_rate = 0.2)
        if self.mode == 'test':
          self.model = timm.create_model('efficientnet_b4', pretrained=True, num_classes=class_n, drop_path_rate = 0)
        
    def forward(self, x):
        x = self.model(x)
        return x

class CNN_Model(nn.Module):
    def __init__(self, class_n, rate=0.1):
        super(CNN_Model, self).__init__()
        self.model = models.resnet50(pretrained=True)
        self.dropout = nn.Dropout(rate)
        self.output_layer = nn.Linear(in_features=1000, out_features=class_n, bias=True)
    
    def forward(self, inputs):
        output = self.output_layer(self.dropout(self.model(inputs)))
        return output

In [50]:
model = CNN_Model(class_n).to(device)

In [51]:
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

# 학습

In [52]:
def train_step(batch_item, epoch, batch, training):
    img = batch_item['img'].to(device)
    label = batch_item['label'].to(device)
    if training is True:
        model.train()
        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            output = model(img)
            loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        
        return loss
    else:
        model.eval()
        with torch.no_grad():
            output = model(img)
            loss = criterion(output, label)
            
        return loss

In [53]:
loss_plot, val_loss_plot = [], []

for epoch in range(epochs):
    total_loss, total_val_loss = 0, 0
    
    tqdm_dataset = tqdm(enumerate(train_loader))
    training = True
    for batch, batch_item in tqdm_dataset:
        batch_loss = train_step(batch_item, epoch, batch, training)
        total_loss += batch_loss
        
        tqdm_dataset.set_postfix({
            'Epoch': epoch + 1,
            'Loss': '{:06f}'.format(batch_loss.item()),
            'Total Loss' : '{:06f}'.format(total_loss/(batch+1))
        })
    loss_plot.append(total_loss/(batch+1))
    
    tqdm_dataset = tqdm(enumerate(val_loader))
    training = False
    for batch, batch_item in tqdm_dataset:
        batch_loss = train_step(batch_item, epoch, batch, training)
        total_val_loss += batch_loss
        
        tqdm_dataset.set_postfix({
            'Epoch': epoch + 1,
            'Val Loss': '{:06f}'.format(batch_loss.item()),
            'Total Val Loss' : '{:06f}'.format(total_val_loss/(batch+1))
        })
    val_loss_plot.append(total_val_loss/(batch+1))
    
    if val_loss_plot[-1]<0.04:
        torch.save(model, 'models/model'+str(epoch+1)+str(val_loss_plot[-1])+'.pt')
    if min(val_loss_plot) == val_loss_plot[-1]:
        torch.save(model, 'model.pt')

0it [00:02, ?it/s]
../aten/src/ATen/native/cuda/Loss.cu:257: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [0,0,0] Assertion `t >= 0 && t < n_classes` failed.
../aten/src/ATen/native/cuda/Loss.cu:257: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [1,0,0] Assertion `t >= 0 && t < n_classes` failed.
../aten/src/ATen/native/cuda/Loss.cu:257: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [2,0,0] Assertion `t >= 0 && t < n_classes` failed.
../aten/src/ATen/native/cuda/Loss.cu:257: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [3,0,0] Assertion `t >= 0 && t < n_classes` failed.
../aten/src/ATen/native/cuda/Loss.cu:257: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [4,0,0] Assertion `t >= 0 && t < n_classes` failed.
../aten/src/ATen/native/cuda/Loss.cu:257: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [5,0,0] Assertion `t >= 0 && t < n_classes` failed.
../aten/src/ATen/native/cuda/Loss

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.