# Library

In [1]:
!nvidia-smi

Fri Jun  3 14:06:09 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.142.00   Driver Version: 450.142.00   CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            On   | 00000000:00:1E.0 Off |                    0 |
| N/A   47C    P0    26W /  70W |   6841MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

In [2]:
import os
import cv2
import time
import random
import logging  # 로그 출력
import easydict  # 속성으로 dict 값에 access할 수 있음
import numpy as np
import pandas as pd
from tqdm import tqdm  # process bar
from os.path import join as opj
from ptflops import get_model_complexity_info
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score

import timm
import torch
import torch.nn as nn
import torch_optimizer as optim
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, grad_scaler
from torchvision import transforms

import warnings
warnings.filterwarnings('ignore')

# Config

Hyper-parameter 정의

In [3]:
args = easydict.EasyDict(
    {'exp_num':'0',
     
     # Path settings
     # /home/lab18/Data/product_image/Training/image/10268_아넬라사과디저트_2입/
     'data_path':'/home/lab16/jupyter_home/Data/product_image/',
     'Kfold':1,
     'model_path':'results/',

     # Model parameter settings 
     'encoder_name':'regnety_064',
     'drop_path_rate':0.2,
     
     # Training parameter settings
     ## Base Parameter
     'img_size':224,
     'batch_size':16,
     'epochs':50,
     'optimizer':'Lamb',
     'initial_lr':5e-6,
     'weight_decay':1e-3,

     ## Augmentation
     'aug_ver':2,

     ## Scheduler (OnecycleLR)
     'scheduler':'cycle',
     'warm_epoch':5,
     'max_lr':1e-3,

     ### Cosine Annealing
     'min_lr':5e-6,
     'tmax':145,

     ## etc.
     'patience':50,
     'clipping':None,

     # Hardware settings
     'amp':True,
     'multi_gpu':False,
     'logging':False,
     'num_workers':0,
     'seed':42
    })

# Utils for training and Logging

In [4]:
# Warmup Learning rate scheduler
from torch.optim.lr_scheduler import _LRScheduler
class WarmUpLR(_LRScheduler):
    """warmup_training learning rate scheduler
    Args:
        optimizer: optimizer(e.g. SGD)
        total_iters: totoal_iters of warmup phase
    """
    def __init__(self, optimizer, total_iters, last_epoch=-1):
        
        self.total_iters = total_iters
        super().__init__(optimizer, last_epoch)

    def get_lr(self):
        """we will use the first m batches, and set the learning
        rate to base_lr * m / total_iters
        """
        return [base_lr * self.last_epoch / (self.total_iters + 1e-8) for base_lr in self.base_lrs]

# Logging
def get_root_logger(logger_name='basicsr',
                    log_level=logging.INFO,
                    log_file=None):

    logger = logging.getLogger(logger_name)
    # if the logger has been initialized, just return it
    if logger.hasHandlers():
        return logger

    format_str = '%(asctime)s %(levelname)s: %(message)s'
    logging.basicConfig(format=format_str, level=log_level)

    if log_file is not None:
        file_handler = logging.FileHandler(log_file, 'w')
        file_handler.setFormatter(logging.Formatter(format_str))
        file_handler.setLevel(log_level)
        logger.addHandler(file_handler)

    return logger

class AvgMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
        self.losses = []

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        self.losses.append(val)

# Data Preprocessing
- 원본 이미지 사이즈보다 작은 (256,256)로 resize하여 데이터를 새롭게 저장

In [4]:
# df = pd.read_csv('./data/train_df.csv')

# # Resize Train Images
# # save_path = './data/train_256_new'  # 새로 저장할 폴더 경로
# save_path = '/home/lab18/Data/0602_apple_256'
# os.makedirs(save_path, exist_ok=True)
# for img in tqdm(df['file_name']):  # train_df의 'file_name' 컬럼을 참고하여
#     name = os.path.basename(img)
#     img = cv2.imread(opj('./data/train/', img))  # 해당 경로에 있는 png 이미지 읽어서
#     img = cv2.resize(img, dsize=(256, 256), interpolation=cv2.INTER_AREA)
#     img = cv2.imwrite(opj(save_path, name), img)  # 새 폴더에 저장

# # Resize Test Images
# df = pd.read_csv('./data/test_df.csv')
# save_path = './data/test_256_new'
# os.makedirs(save_path, exist_ok=True)
# for img in tqdm(df['file_name']):
#     name = os.path.basename(img)
#     img = cv2.imread(opj('./data/test/', img))
#     img = cv2.resize(img, dsize=(256, 256), interpolation=cv2.INTER_AREA)
#     img = cv2.imwrite(opj(save_path, name), img)

### label(class) 정렬

In [5]:
import os
from PIL import Image

In [55]:
training_img_folder_path = '/home/lab16/jupyter_home/Data/product_image/Training/label/'
trainig_files = os.listdir(training_img_folder_path)
training_img_path = os.path.join(training_img_folder_path, trainig_files[0])
print(training_img_path)
print(len(trainig_files))

/home/lab16/jupyter_home/Data/product_image/Training/label/35102_삼양사)건포도150G
76


In [56]:
validation_img_folder_path = '/home/lab16/jupyter_home/Data/product_image/Validation/label/'
validation_files = os.listdir(validation_img_folder_path)
validation_img_path = os.path.join(validation_img_folder_path, validation_files[0])
print(validation_img_path)
print(len(validation_files))

/home/lab16/jupyter_home/Data/product_image/Validation/label/35102_삼양사)건포도150G
76


In [70]:
labels = []
for i in range(len(files)):
    labels.append(int((files[i][:5])))
print(labels)

[35102, 55701, 65753, 66304, 35192, 45661, 35954, 35189, 35191, 25607, 66309, 35196, 45678, 35211, 45659, 35193, 35270, 35274, 35953, 25222, 35272, 35194, 35265, 35273, 65738, 35584, 45204, 45203, 35563, 46020, 55034, 55798, 35190, 35955, 65754, 45660, 45470, 55845, 65742, 65752, 45679, 35266, 35195, 66308, 35583, 45471, 65736, 35587, 65740, 35269, 10268, 46018, 35585, 70208, 35271, 45658, 55702, 45468, 35586, 46019, 45469, 45030, 45657, 65744, 35268, 45662, 35267, 45467, 10242, 25228, 60195, 46017, 65739, 35188, 45680, 25223]


In [57]:
keys = []
values = []
for i in range(len(trainig_files)):
    keys.append(int(trainig_files[i][:5]))
    values.append(trainig_files[i][6:])
print(keys)
print(values)

[35102, 55701, 65753, 66304, 35192, 45661, 35954, 35189, 35191, 25607, 66309, 35196, 45678, 35211, 45659, 35193, 35270, 35274, 35953, 25222, 35272, 35194, 35265, 35273, 65738, 35584, 45204, 45203, 35563, 46020, 55034, 55798, 35190, 35955, 65754, 45660, 45470, 55845, 65742, 65752, 45679, 35266, 35195, 66308, 35583, 45471, 65736, 35587, 65740, 35269, 10268, 46018, 35585, 70208, 35271, 45658, 55702, 45468, 35586, 46019, 45469, 45030, 45657, 65744, 35268, 45662, 35267, 45467, 10242, 25228, 60195, 46017, 65739, 35188, 45680, 25223]
['삼양사)건포도150G', '쁘띠첼요거젤리밀감', '매일데르뜨복숭아3개입90G_3', '매일데르뜨자몽130G', '신선에프앤브이)파인애플컵400G', '씨제이)쁘티첼(요거젤리블루베리)', '팜팩토리)아넬라사과_자두디저트', 'CJ쟈뎅)쁘띠첼과일젤리포도90G', 'CJ쟈뎅)쁘띠첼과일젤리복숭아90G', '돌코리아애플팝496ML', '풍림푸드피코크포도젤리90G', '엠디에스코리아)사과푸딩220G', '롯데제과)디저뜨와(구운치즈케이크타르트)', '매일유업)데르뜨130G', '씨제이)쁘티첼(요거젤리딸기)', '신선에프앤브이)파인애플컵100G', '홈플러스)마이프루타애플앤망고100G', '홈플러스)마이프루타애플앤블루베리100G', '팜팩토리)아넬라사과_바나나디저트', '대만)망고케익184g', '홈플러스)마이프루타애플앤페어100G', 'CJ쟈뎅)쁘띠첼과일젤리밀감270G', '홈플러스)마이프루타애플앤스트로베리200G', '홈플러스)마이

In [58]:
labels = dict(zip(keys,values))

In [59]:
labels

{35102: '삼양사)건포도150G',
 55701: '쁘띠첼요거젤리밀감',
 65753: '매일데르뜨복숭아3개입90G_3',
 66304: '매일데르뜨자몽130G',
 35192: '신선에프앤브이)파인애플컵400G',
 45661: '씨제이)쁘티첼(요거젤리블루베리)',
 35954: '팜팩토리)아넬라사과_자두디저트',
 35189: 'CJ쟈뎅)쁘띠첼과일젤리포도90G',
 35191: 'CJ쟈뎅)쁘띠첼과일젤리복숭아90G',
 25607: '돌코리아애플팝496ML',
 66309: '풍림푸드피코크포도젤리90G',
 35196: '엠디에스코리아)사과푸딩220G',
 45678: '롯데제과)디저뜨와(구운치즈케이크타르트)',
 35211: '매일유업)데르뜨130G',
 45659: '씨제이)쁘티첼(요거젤리딸기)',
 35193: '신선에프앤브이)파인애플컵100G',
 35270: '홈플러스)마이프루타애플앤망고100G',
 35274: '홈플러스)마이프루타애플앤블루베리100G',
 35953: '팜팩토리)아넬라사과_바나나디저트',
 25222: '대만)망고케익184g',
 35272: '홈플러스)마이프루타애플앤페어100G',
 35194: 'CJ쟈뎅)쁘띠첼과일젤리밀감270G',
 35265: '홈플러스)마이프루타애플앤스트로베리200G',
 35273: '홈플러스)마이프루타애플앤블루베리200G',
 65738: '풍림푸드망고젤리4개입90G_4',
 35584: '매일데르뜨파인애플90G',
 45204: '세방유통)애플망고슬라이스인망고쥬스370G',
 45203: '세방유통)골든파인애플슬라이스인시럽370G',
 35563: '금광약초)대추',
 46020: '팜팩토리)하넬라사과_망고디저트',
 55034: '돌트로피칼666G',
 55798: '이멕스무역)혼합푸딩(ASSORTEDPUDDING)4개입',
 35190: 'CJ쟈뎅)쁘띠첼과일젤리복숭아270G',
 35955: '팜팩토리)아넬라배_사과디저트',
 65754: '매일데르뜨자몽3개입130G_3',
 45660: '

In [71]:
keys = []
values = []
for i in range(len(files)):
    keys.append(int(files[i][:5]))
    values.append(files[i][6:])
print(keys)
print(values)

[35102, 55701, 65753, 66304, 35192, 45661, 35954, 35189, 35191, 25607, 66309, 35196, 45678, 35211, 45659, 35193, 35270, 35274, 35953, 25222, 35272, 35194, 35265, 35273, 65738, 35584, 45204, 45203, 35563, 46020, 55034, 55798, 35190, 35955, 65754, 45660, 45470, 55845, 65742, 65752, 45679, 35266, 35195, 66308, 35583, 45471, 65736, 35587, 65740, 35269, 10268, 46018, 35585, 70208, 35271, 45658, 55702, 45468, 35586, 46019, 45469, 45030, 45657, 65744, 35268, 45662, 35267, 45467, 10242, 25228, 60195, 46017, 65739, 35188, 45680, 25223]
['삼양사)건포도150G', '쁘띠첼요거젤리밀감', '매일데르뜨복숭아3개입90G_3', '매일데르뜨자몽130G', '신선에프앤브이)파인애플컵400G', '씨제이)쁘티첼(요거젤리블루베리)', '팜팩토리)아넬라사과_자두디저트', 'CJ쟈뎅)쁘띠첼과일젤리포도90G', 'CJ쟈뎅)쁘띠첼과일젤리복숭아90G', '돌코리아애플팝496ML', '풍림푸드피코크포도젤리90G', '엠디에스코리아)사과푸딩220G', '롯데제과)디저뜨와(구운치즈케이크타르트)', '매일유업)데르뜨130G', '씨제이)쁘티첼(요거젤리딸기)', '신선에프앤브이)파인애플컵100G', '홈플러스)마이프루타애플앤망고100G', '홈플러스)마이프루타애플앤블루베리100G', '팜팩토리)아넬라사과_바나나디저트', '대만)망고케익184g', '홈플러스)마이프루타애플앤페어100G', 'CJ쟈뎅)쁘띠첼과일젤리밀감270G', '홈플러스)마이프루타애플앤스트로베리200G', '홈플러스)마이

In [72]:
labels = dict(zip(keys,values))

In [73]:
labels

{35102: '삼양사)건포도150G',
 55701: '쁘띠첼요거젤리밀감',
 65753: '매일데르뜨복숭아3개입90G_3',
 66304: '매일데르뜨자몽130G',
 35192: '신선에프앤브이)파인애플컵400G',
 45661: '씨제이)쁘티첼(요거젤리블루베리)',
 35954: '팜팩토리)아넬라사과_자두디저트',
 35189: 'CJ쟈뎅)쁘띠첼과일젤리포도90G',
 35191: 'CJ쟈뎅)쁘띠첼과일젤리복숭아90G',
 25607: '돌코리아애플팝496ML',
 66309: '풍림푸드피코크포도젤리90G',
 35196: '엠디에스코리아)사과푸딩220G',
 45678: '롯데제과)디저뜨와(구운치즈케이크타르트)',
 35211: '매일유업)데르뜨130G',
 45659: '씨제이)쁘티첼(요거젤리딸기)',
 35193: '신선에프앤브이)파인애플컵100G',
 35270: '홈플러스)마이프루타애플앤망고100G',
 35274: '홈플러스)마이프루타애플앤블루베리100G',
 35953: '팜팩토리)아넬라사과_바나나디저트',
 25222: '대만)망고케익184g',
 35272: '홈플러스)마이프루타애플앤페어100G',
 35194: 'CJ쟈뎅)쁘띠첼과일젤리밀감270G',
 35265: '홈플러스)마이프루타애플앤스트로베리200G',
 35273: '홈플러스)마이프루타애플앤블루베리200G',
 65738: '풍림푸드망고젤리4개입90G_4',
 35584: '매일데르뜨파인애플90G',
 45204: '세방유통)애플망고슬라이스인망고쥬스370G',
 45203: '세방유통)골든파인애플슬라이스인시럽370G',
 35563: '금광약초)대추',
 46020: '팜팩토리)하넬라사과_망고디저트',
 55034: '돌트로피칼666G',
 55798: '이멕스무역)혼합푸딩(ASSORTEDPUDDING)4개입',
 35190: 'CJ쟈뎅)쁘띠첼과일젤리복숭아270G',
 35955: '팜팩토리)아넬라배_사과디저트',
 65754: '매일데르뜨자몽3개입130G_3',
 45660: '

In [74]:
# img_folder_path = '/home/lab18/Data/product_image/Training/label/'
# files = os.listdir(img_folder_path)

# for i in range(10):
#     img_path = os.path.join(img_folder_path, files[i])
#     img = Image.open(img_path).resize((256, 256))
#     display(img)

In [8]:
from glob import glob

In [9]:
len(glob('/home/lab16/jupyter_home/Data/product_image/Training/image/**/*.jpg'))

8664

In [10]:
len(glob('/home/lab16/jupyter_home/Data/product_image/Validation/image/**/*.jpg'))

1140

In [80]:
total_images_path = glob('/home/lab16/jupyter_home/Data/product_image/Training/image/**/*.jpg')

In [81]:
total_images_path

['/home/lab16/jupyter_home/Data/product_image/Training/image/35102_삼양사)건포도150G/35102_30_s_6.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Training/image/35102_삼양사)건포도150G/35102_60_m_13.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Training/image/35102_삼양사)건포도150G/35102_30_s_9.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Training/image/35102_삼양사)건포도150G/35102_30_m_17.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Training/image/35102_삼양사)건포도150G/35102_60_s_8.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Training/image/35102_삼양사)건포도150G/35102_00_s_20.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Training/image/35102_삼양사)건포도150G/35102_60_s_9.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Training/image/35102_삼양사)건포도150G/35102_30_s_11.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Training/image/35102_삼양사)건포도150G/35102_30_m_4.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Training/image/35102_삼양사)건포도150G/35102_00_s_11.jpg',
 '/home/lab16/j

In [82]:
save_path = '/home/lab16/jupyter_home/Data/product_image/Training/total_image/'

In [83]:
total_images_path

['/home/lab16/jupyter_home/Data/product_image/Training/image/35102_삼양사)건포도150G/35102_30_s_6.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Training/image/35102_삼양사)건포도150G/35102_60_m_13.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Training/image/35102_삼양사)건포도150G/35102_30_s_9.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Training/image/35102_삼양사)건포도150G/35102_30_m_17.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Training/image/35102_삼양사)건포도150G/35102_60_s_8.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Training/image/35102_삼양사)건포도150G/35102_00_s_20.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Training/image/35102_삼양사)건포도150G/35102_60_s_9.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Training/image/35102_삼양사)건포도150G/35102_30_s_11.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Training/image/35102_삼양사)건포도150G/35102_30_m_4.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Training/image/35102_삼양사)건포도150G/35102_00_s_11.jpg',
 '/home/lab16/j

In [84]:
os.path.basename(total_images_path[0])

'35102_30_s_6.jpg'

### 한 폴더에 병합

In [11]:
total_v_images_path = glob('/home/lab16/jupyter_home/Data/product_image/Validation/image/**/*.jpg')

In [12]:
total_v_images_path

['/home/lab16/jupyter_home/Data/product_image/Validation/image/35102_삼양사)건포도150G/35102_60_m_15.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Validation/image/35102_삼양사)건포도150G/35102_30_m_16.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Validation/image/35102_삼양사)건포도150G/35102_60_m_3.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Validation/image/35102_삼양사)건포도150G/35102_60_s_20.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Validation/image/35102_삼양사)건포도150G/35102_60_s_2.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Validation/image/35102_삼양사)건포도150G/35102_30_m_20.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Validation/image/35102_삼양사)건포도150G/35102_60_m_19.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Validation/image/35102_삼양사)건포도150G/35102_30_m_7.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Validation/image/35102_삼양사)건포도150G/35102_30_s_23.jpg',
 '/home/lab16/jupyter_home/Data/product_image/Validation/image/35102_삼양사)건포도150G/35102_00_s_13

In [14]:
v_save_path = '/home/lab16/jupyter_home/Data/product_image/Validation/total_image/'

In [None]:
for i in range(len(total_images_path)):
    img = Image.open(total_images_path[i])
    os.makedirs(save_path, exist_ok=True)
    img.save(save_path + os.path.basename(total_images_path[i]))

In [16]:
for i in range(len(total_v_images_path)):
    img = Image.open(total_v_images_path[i])
    os.makedirs(v_save_path, exist_ok=True)
    img.save(v_save_path + os.path.basename(total_v_images_path[i]))

## 데이터 확인

In [1]:
import os
from PIL import Image
from glob import glob

In [4]:
total_images_path = glob('/home/lab16/jupyter_home/Data/product_image/Training/total_image/*.jpg')

In [5]:
len(total_images_path)

8664

In [6]:
img_folder_path = '/home/lab16/jupyter_home/Data/product_image/Training/label/'
files = os.listdir(img_folder_path)
img_path = os.path.join(img_folder_path, files[0])
print(img_path)

/home/lab16/jupyter_home/Data/product_image/Training/label/35102_삼양사)건포도150G


In [8]:
labels = []
for i in range(len(files)):
    labels.append(int((files[i][:5])))
print(labels)
print(len(labels))

[35102, 55701, 65753, 66304, 35192, 45661, 35954, 35189, 35191, 25607, 66309, 35196, 45678, 35211, 45659, 35193, 35270, 35274, 35953, 25222, 35272, 35194, 35265, 35273, 65738, 35584, 45204, 45203, 35563, 46020, 55034, 55798, 35190, 35955, 65754, 45660, 45470, 55845, 65742, 65752, 45679, 35266, 35195, 66308, 35583, 45471, 65736, 35587, 65740, 35269, 10268, 46018, 35585, 70208, 35271, 45658, 55702, 45468, 35586, 46019, 45469, 45030, 45657, 65744, 35268, 45662, 35267, 45467, 10242, 25228, 60195, 46017, 65739, 35188, 45680, 25223]
76


In [13]:
import numpy as np

In [15]:
total_images_path = glob('/home/lab16/jupyter_home/Data/product_image/Training/image/**/*.jpg')
file_names = []
for i in range(len(total_images_path)):
    file_names.append(os.path.basename(total_images_path[i]))
file_names = np.array(file_names)
print(file_names)
print(len(file_names))

['35102_30_s_6.jpg' '35102_60_m_13.jpg' '35102_30_s_9.jpg' ...
 '25223_30_s_1.jpg' '25223_00_s_24.jpg' '25223_00_m_18.jpg']
8664


In [16]:
each_label = []
for i in range(len(total_images_path)):
    each_label.append(os.path.basename(total_images_path[i])[:5])
each_label = np.array(each_label)
print(each_label)
print(len(each_label))

['35102' '35102' '35102' ... '25223' '25223' '25223']
8664


# Dataset & Loader

In [5]:
from torchvision.datasets import ImageFolder
import torch.utils.data as data
from torchvision import transforms
from glob import glob

In [6]:
import os
from PIL import Image
from glob import glob

In [7]:
training_img_folder_path = '/home/lab16/jupyter_home/Data/product_image/Training/label/'
trainig_files = os.listdir(training_img_folder_path)
training_img_path = os.path.join(training_img_folder_path, trainig_files[0])
# print(training_img_path)
# print(len(trainig_files))

keys = []
values = []
for i in range(len(trainig_files)):
    keys.append(int(trainig_files[i][:5]))
    values.append(trainig_files[i][6:])
# print(keys)
# print(values)
labels = dict(zip(keys,values))
print(labels)

{35102: '삼양사)건포도150G', 55701: '쁘띠첼요거젤리밀감', 65753: '매일데르뜨복숭아3개입90G_3', 66304: '매일데르뜨자몽130G', 35192: '신선에프앤브이)파인애플컵400G', 45661: '씨제이)쁘티첼(요거젤리블루베리)', 35954: '팜팩토리)아넬라사과_자두디저트', 35189: 'CJ쟈뎅)쁘띠첼과일젤리포도90G', 35191: 'CJ쟈뎅)쁘띠첼과일젤리복숭아90G', 25607: '돌코리아애플팝496ML', 66309: '풍림푸드피코크포도젤리90G', 35196: '엠디에스코리아)사과푸딩220G', 45678: '롯데제과)디저뜨와(구운치즈케이크타르트)', 35211: '매일유업)데르뜨130G', 45659: '씨제이)쁘티첼(요거젤리딸기)', 35193: '신선에프앤브이)파인애플컵100G', 35270: '홈플러스)마이프루타애플앤망고100G', 35274: '홈플러스)마이프루타애플앤블루베리100G', 35953: '팜팩토리)아넬라사과_바나나디저트', 25222: '대만)망고케익184g', 35272: '홈플러스)마이프루타애플앤페어100G', 35194: 'CJ쟈뎅)쁘띠첼과일젤리밀감270G', 35265: '홈플러스)마이프루타애플앤스트로베리200G', 35273: '홈플러스)마이프루타애플앤블루베리200G', 65738: '풍림푸드망고젤리4개입90G_4', 35584: '매일데르뜨파인애플90G', 45204: '세방유통)애플망고슬라이스인망고쥬스370G', 45203: '세방유통)골든파인애플슬라이스인시럽370G', 35563: '금광약초)대추', 46020: '팜팩토리)하넬라사과_망고디저트', 55034: '돌트로피칼666G', 55798: '이멕스무역)혼합푸딩(ASSORTEDPUDDING)4개입', 35190: 'CJ쟈뎅)쁘띠첼과일젤리복숭아270G', 35955: '팜팩토리)아넬라배_사과디저트', 65754: '매일데르뜨자몽3개입130G_3', 45660: '씨제이)쁘티첼(요거젤리화이트코코)', 45470: '호남샤니)제

In [63]:
total_images_path = glob('/home/lab16/jupyter_home/Data/product_image/Validation/image/**/*.jpg')

In [68]:
each_label = []
for i in range(len(total_images_path)):
    each_label.append(os.path.basename(total_images_path[i])[:5])
label_decoder = {val:key for key, val in labels.items()}

In [72]:
each_label = pd.Series(each_label)

In [73]:
# each_label = np.array(each_label)
each_label = each_label.replace(label_decoder)
# each_label = np.array(each_label)
each_label

0       35102
1       35102
2       35102
3       35102
4       35102
        ...  
1135    25223
1136    25223
1137    25223
1138    25223
1139    25223
Length: 1140, dtype: object

In [74]:
each_label = np.array(each_label)
each_label

array(['35102', '35102', '35102', ..., '25223', '25223', '25223'],
      dtype=object)

In [None]:
total_images_path = glob('/home/lab16/jupyter_home/Data/product_image/Validation/image/**/*.jpg')

each_label = []
for i in range(len(total_images_path)):
    each_label.append(os.path.basename(total_images_path[i])[:5])
label_decoder = {val:key for key, val in labels.items()}

each_label = pd.Series(each_label)
each_label = each_label.replace(label_decoder)
each_label = np.array(each_label)
each_label

In [18]:
# validation용 추가
class Valid_Dataset(Dataset):
#     def __init__(self, df, transform=None):
    def __init__(self, transform=None):
#         self.file_name = df['file_name'].values      
        
        total_images_path = glob('/home/lab16/jupyter_home/Data/product_image/Validation/image/**/*.jpg')
        file_names = []
        for i in range(len(total_images_path)):
            file_names.append(os.path.basename(total_images_path[i]))
        file_names = np.array(file_names)

        self.file_name = file_names
        # 각 label을 str->index로 변환
#         labels = ['bottle-broken_large', 'bottle-broken_small', 'bottle-contamination', 'bottle-good', 'cable-bent_wire', 'cable-cable_swap', 'cable-combined', 'cable-cut_inner_insulation', 'cable-cut_outer_insulation', 'cable-good', 'cable-missing_cable', 'cable-missing_wire', 'cable-poke_insulation', 'capsule-crack', 'capsule-faulty_imprint', 'capsule-good', 'capsule-poke', 'capsule-scratch', 'capsule-squeeze', 'carpet-color', 'carpet-cut', 'carpet-good', 'carpet-hole', 'carpet-metal_contamination', 'carpet-thread', 'grid-bent', 'grid-broken', 'grid-glue', 'grid-good', 'grid-metal_contamination', 'grid-thread', 'hazelnut-crack', 'hazelnut-cut', 'hazelnut-good', 'hazelnut-hole', 'hazelnut-print', 'leather-color', 'leather-cut', 'leather-fold', 'leather-glue', 'leather-good', 'leather-poke', 'metal_nut-bent', 'metal_nut-color', 'metal_nut-flip', 'metal_nut-good', 'metal_nut-scratch', 'pill-color', 'pill-combined', 'pill-contamination', 'pill-crack', 'pill-faulty_imprint', 'pill-good', 'pill-pill_type', 'pill-scratch', 'screw-good', 'screw-manipulated_front', 'screw-scratch_head', 'screw-scratch_neck', 'screw-thread_side', 'screw-thread_top', 'tile-crack', 'tile-glue_strip', 'tile-good', 'tile-gray_stroke', 'tile-oil', 'tile-rough', 'toothbrush-defective', 'toothbrush-good', 'transistor-bent_lead', 'transistor-cut_lead', 'transistor-damaged_case', 'transistor-good', 'transistor-misplaced', 'wood-color', 'wood-combined', 'wood-good', 'wood-hole', 'wood-liquid', 'wood-scratch', 'zipper-broken_teeth', 'zipper-combined', 'zipper-fabric_border', 'zipper-fabric_interior', 'zipper-good', 'zipper-rough', 'zipper-split_teeth', 'zipper-squeezed_teeth']
#         new = dict(zip(range(len(labels)),labels))
#         label_decoder = {val:key for key, val in new.items()}
        
#         label_decoder = {val:key for key, val in labels.items()}
        
#         file_names = np.array(val for val in labels)
        
        each_label = []
        for i in range(len(total_images_path)):
            each_label.append(os.path.basename(total_images_path[i])[:5])
#         label_decoder = {val:key for key, val in labels.items()}

#         each_label = pd.Series(each_label)
#         each_label = each_label.replace(label_decoder)
        
#         df['label'] = df['label'].replace(label_decoder)

#         self.target = df['label'].values  # 목표는 label
        self.target = np.array(each_label) # 목표는 label
        self.transform = transform

        print(f'Dataset size:{len(self.file_name)}')

    def __getitem__(self, idx):  # train 경로에 있는 png 이미지 읽어서 float32로 변환
#         image = cv2.imread(opj('./data/train_256_new/', self.file_name[idx])).astype(np.float32)
        image = cv2.imread(opj('/home/lab16/jupyter_home/Data/product_image/Validation/total_image/', self.file_name[idx])).astype(np.float32)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0  # BGR=>RGB 변환

        target = self.target[idx]

        if self.transform is not None:
        # HWC => CHW-layout 변환
            image = self.transform(torch.from_numpy(image.transpose(2,0,1)))

        return image, target

    def __len__(self):
        return len(self.file_name)

In [19]:
class Train_Dataset(Dataset):
#     def __init__(self, df, transform=None):
    def __init__(self, transform=None):
#         self.file_name = df['file_name'].values      
        
        total_images_path = glob('/home/lab16/jupyter_home/Data/product_image/Training/image/**/*.jpg')
        file_names = []
        for i in range(len(total_images_path)):
            file_names.append(os.path.basename(total_images_path[i]))
        file_names = np.array(file_names)

        self.file_name = file_names
        # 각 label을 str->index로 변환
#         labels = ['bottle-broken_large', 'bottle-broken_small', 'bottle-contamination', 'bottle-good', 'cable-bent_wire', 'cable-cable_swap', 'cable-combined', 'cable-cut_inner_insulation', 'cable-cut_outer_insulation', 'cable-good', 'cable-missing_cable', 'cable-missing_wire', 'cable-poke_insulation', 'capsule-crack', 'capsule-faulty_imprint', 'capsule-good', 'capsule-poke', 'capsule-scratch', 'capsule-squeeze', 'carpet-color', 'carpet-cut', 'carpet-good', 'carpet-hole', 'carpet-metal_contamination', 'carpet-thread', 'grid-bent', 'grid-broken', 'grid-glue', 'grid-good', 'grid-metal_contamination', 'grid-thread', 'hazelnut-crack', 'hazelnut-cut', 'hazelnut-good', 'hazelnut-hole', 'hazelnut-print', 'leather-color', 'leather-cut', 'leather-fold', 'leather-glue', 'leather-good', 'leather-poke', 'metal_nut-bent', 'metal_nut-color', 'metal_nut-flip', 'metal_nut-good', 'metal_nut-scratch', 'pill-color', 'pill-combined', 'pill-contamination', 'pill-crack', 'pill-faulty_imprint', 'pill-good', 'pill-pill_type', 'pill-scratch', 'screw-good', 'screw-manipulated_front', 'screw-scratch_head', 'screw-scratch_neck', 'screw-thread_side', 'screw-thread_top', 'tile-crack', 'tile-glue_strip', 'tile-good', 'tile-gray_stroke', 'tile-oil', 'tile-rough', 'toothbrush-defective', 'toothbrush-good', 'transistor-bent_lead', 'transistor-cut_lead', 'transistor-damaged_case', 'transistor-good', 'transistor-misplaced', 'wood-color', 'wood-combined', 'wood-good', 'wood-hole', 'wood-liquid', 'wood-scratch', 'zipper-broken_teeth', 'zipper-combined', 'zipper-fabric_border', 'zipper-fabric_interior', 'zipper-good', 'zipper-rough', 'zipper-split_teeth', 'zipper-squeezed_teeth']
#         new = dict(zip(range(len(labels)),labels))
#         label_decoder = {val:key for key, val in new.items()}
        
#         label_decoder = {val:key for key, val in labels.items()}
        
#         file_names = np.array(val for val in labels)
                
        each_label = []
        for i in range(len(total_images_path)):
            each_label.append(os.path.basename(total_images_path[i])[:5])
#         label_decoder = {val:key for key, val in labels.items()}

#         each_label = pd.Series(each_label)
#         each_label = each_label.replace(label_decoder)
        
#         df['label'] = df['label'].replace(label_decoder)

#         self.target = df['label'].values  # 목표는 label
        self.target = np.array(each_label) # 목표는 label
        self.transform = transform

        print(f'Dataset size:{len(self.file_name)}')

    def __getitem__(self, idx):  # train 경로에 있는 png 이미지 읽어서 float32로 변환
#         image = cv2.imread(opj('./data/train_256_new/', self.file_name[idx])).astype(np.float32)
        image = cv2.imread(opj('/home/lab16/jupyter_home/Data/product_image/Training/total_image/', self.file_name[idx])).astype(np.float32)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0  # BGR=>RGB 변환

        target = self.target[idx]

        if self.transform is not None:
        # HWC => CHW-layout 변환
            image = self.transform(torch.from_numpy(image.transpose(2,0,1)))

        return image, target

    def __len__(self):
        return len(self.file_name)

class Test_dataset(Dataset):
    def __init__(self, df, transform=None):
        self.test_file_name = df['file_name'].values
        self.transform = transform

        print(f'Test Dataset size:{len(self.test_file_name)}')

    def __getitem__(self, idx): # test 경로에 있는 png 이미지 읽어서 float32로 변환
#         image = cv2.imread(opj('./data/test_256_new/', self.test_file_name[idx])).astype(np.float32)
        image = cv2.imread(opj('/home/lab16/jupyter_home/Data/product_image/Test/total_image/', self.test_file_name[idx])).astype(np.float32)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0  # BGR=>RGB 변환

        if self.transform is not None:
            image = self.transform(torch.from_numpy(image.transpose(2,0,1)))

        return image

    def __len__(self):
        return len(self.test_file_name)

# def get_loader(df, phase: str, batch_size, shuffle, num_workers, transform):
def get_loader(phase: str, batch_size, shuffle, num_workers, transform):
    if phase == 'test':
#         dataset = Test_dataset(df, transform)  
        dataset = Test_dataset(transform) 
        # num_workers : 데이터 로딩에 사용하는 subprocess 개수
        # pin_memory : True - 데이터로더가 Tensor를 CUDA 고정 메모리에 올림
        # drop_last : batch의 크기에 따른 의존도 높은 함수를 사용할 때 우려되는 경우 마지막 batch를 사용하지 않을 수 있음
        data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, pin_memory=True)
        
    # 임시로 추가
    elif phase == 'validation':
#         dataset = Train_Dataset(df, transform)
        dataset = Train_Dataset(transform)
        # num_workers : 데이터 로딩에 사용하는 subprocess 개수
        # pin_memory : True - 데이터로더가 Tensor를 CUDA 고정 메모리에 올림
        # drop_last : batch의 크기에 따른 의존도 높은 함수를 사용할 때 우려되는 경우 마지막 batch를 사용하지 않을 수 있음
        data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, pin_memory=True)
        
        # Train_dataset 대신 ImageFolder 사용하는 방법
#         validation_imgs = ImageFolder(args.data_path + 'Validation/total_image', transform=transform)
#         validation_imgs = ImageFolder(args.data_path + 'Validation', transform=transform)
#         validation_imgs = ImageFolder(args.data_path + 'Validation/image', transform=transform)
#         data_loader = data.DataLoader(validation_imgs, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, pin_memory=True)        
        
    # 추가 끝
    else:
#         dataset = Train_Dataset(df, transform)
        path = ''
        dataset = Train_Dataset(transform)
        data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, pin_memory=True,
                                 drop_last=False)
    
        # Train_dataset 대신 ImageFolder 사용하는 방법
#         train_imgs = ImageFolder(args.data_path + 'Training/image', transform=transform)
#         data_loader = data.DataLoader(train_imgs, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, pin_memory=True, 
#                                       drop_last=False)
    
    
    
    return data_loader

def get_train_augmentation(img_size, ver):
    if ver == 1: # for validset
        transform = transforms.Compose([
#                 transforms.Resize((img_size, img_size)),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
#                 transforms.ToTensor()
#                 transforms.PILToTensor()
                ])

    if ver == 2:
        transform = transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.RandomVerticalFlip(),
                transforms.RandomCrop(224),
                transforms.RandomPerspective(),
                transforms.RandomAffine((20)),  # x, y축으로 이미지 늘림
                transforms.RandomRotation(90),
#                 transforms.Resize((img_size, img_size)),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
#                 transforms.ToTensor()
#                 transforms.PILToTensor()
            ])
    
    return transform

### ImageFolder

In [10]:
# from torchvision.datasets import ImageFolder
# import torch.utils.data as data
# from torchvision import transforms

# ### ImageFolder 작성
# train_imgs = ImageFolder(args.data_path + 'Training/total_image/',
#                          transform=transform)

# validation_imgs = ImageFolder(args.data_path + 'Validation/total_image/',
#                          transform=transform)

# test_imgs = ImageFolder(args.data_path,  # 설정 필요
#                         transform=transform)

# data_loader = data.DataLoader(train_imgs, batch_size=64, shuffle=True)
# data_loader = data.DataLoader(validation_imgs, batch_size=64, shuffle=True)
# data_loader = data.DataLoader(test_imgs, batch_size=64, shuffle=True)

# Network

In [20]:
class Network(nn.Module):
    def __init__(self, args):
        super().__init__()
        # 사전 학습된 모델 사용하기
        self.encoder = timm.create_model(args.encoder_name, pretrained=True,
                                    drop_path_rate=args.drop_path_rate,
                                    )
        
        if 'regnet' in args.encoder_name:        
            num_head = self.encoder.head.fc.in_features
            self.encoder.head.fc = nn.Linear(num_head, 76)
        
        elif 'efficient' in args.encoder_name:
            num_head = self.encoder.classifier.in_features
            self.encoder.classifier = nn.Linear(num_head, 76)

    def forward(self, x):
        x = self.encoder(x)
        return x

class Network_test(nn.Module):
    def __init__(self, encoder_name):
        super().__init__()
        self.encoder = timm.create_model(encoder_name, pretrained=True,
                                    drop_path_rate=0,
                                    )
        
        if 'regnet' in encoder_name:        
            num_head = self.encoder.head.fc.in_features
            self.encoder.head.fc = nn.Linear(num_head, 76)
        
        elif 'efficient' in encoder_name:
            num_head = self.encoder.classifier.in_features
            self.encoder.classifier = nn.Linear(num_head, 76)
    
    def forward(self, x):
        x = self.encoder(x)
        return x

# Trainer for Training & Validation

In [21]:
class Trainer():
    def __init__(self, args, save_path):
        '''
        args: arguments
        save_path: Model 가중치 저장 경로
        '''
        super(Trainer, self).__init__()
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f'device:{self.device}')

        # Logging
        log_file = os.path.join(save_path, 'log_0603.log')
        self.logger = get_root_logger(logger_name='IR', log_level=logging.INFO, log_file=log_file)
        self.logger.info(args)
        # self.logger.info(args.tag)

        # Train, Valid Set load
        ############################################################################
#         df_train = pd.read_csv(opj(args.data_path, 'train_df.csv'))
#         print('Read train_df.csv')

#         kf = StratifiedKFold(n_splits=args.Kfold, shuffle=True, random_state=args.seed)
#         for fold, (train_idx, val_idx) in enumerate(kf.split(range(len(df_train)), y=df_train['label'])):
#             df_train.loc[val_idx, 'fold'] = fold
#         val_idx = list(df_train[df_train['fold'] == int(args.fold)].index)

#         df_val = df_train[df_train['fold'] == args.fold].reset_index(drop=True)
#         df_train = df_train[df_train['fold'] != args.fold].reset_index(drop=True)

        # Augmentation
        self.train_transform = get_train_augmentation(img_size=args.img_size, ver=args.aug_ver)
        self.test_transform = get_train_augmentation(img_size=args.img_size, ver=1)
        
        # 수정 - TrainLoader
        self.train_loader = get_loader(phase='train', batch_size=args.batch_size, shuffle=True,
                                       num_workers=args.num_workers, transform=self.train_transform)
        self.val_loader = get_loader(phase='train', batch_size=args.batch_size, shuffle=False,
                                       num_workers=args.num_workers, transform=self.train_transform)
        
        # TrainLoader
#         self.train_loader = get_loader(df_train, phase='train', batch_size=args.batch_size, shuffle=True,
#                                        num_workers=args.num_workers, transform=self.train_transform)
#         self.val_loader = get_loader(df_val, phase='train', batch_size=args.batch_size, shuffle=False,
#                                        num_workers=args.num_workers, transform=self.test_transform)

        # Network
        self.model = Network(args).to(self.device)
        macs, params = get_model_complexity_info(self.model, (3, args.img_size, args.img_size), as_strings=True,
                                                 print_per_layer_stat=False, verbose=False)
        self.logger.info('{:<30}  {:<8}'.format('Computational complexity: ', macs))
        self.logger.info('{:<30}  {:<8}'.format('Number of parameters: ', params))

        # Loss
        self.criterion = nn.CrossEntropyLoss()
        
        # Optimizer & Scheduler
        self.optimizer = optim.Lamb(self.model.parameters(), lr=args.initial_lr, weight_decay=args.weight_decay)
        
        iter_per_epoch = len(self.train_loader)
        self.warmup_scheduler = WarmUpLR(self.optimizer, iter_per_epoch * args.warm_epoch)

        if args.scheduler == 'step':
            self.scheduler = torch.optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=args.milestone, gamma=args.lr_factor, verbose=True)
        elif args.scheduler == 'cos':
            tmax = args.tmax # half-cycle 
            self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max = tmax, eta_min=args.min_lr, verbose=True)
        elif args.scheduler == 'cycle':
            self.scheduler = torch.optim.lr_scheduler.OneCycleLR(self.optimizer, max_lr=args.max_lr, steps_per_epoch=iter_per_epoch, epochs=args.epochs)

        if args.multi_gpu:
            self.model = nn.DataParallel(self.model).to(self.device)

        # Train / Validate
        best_loss = np.inf
        best_acc = 0
        best_epoch = 0
        early_stopping = 0
        start = time.time()
        for epoch in range(1, args.epochs+1):
            self.epoch = epoch

            if args.scheduler == 'cos':
                if epoch > args.warm_epoch:
                    self.scheduler.step()

            # Training
            train_loss, train_acc, train_f1 = self.training(args)

            # Model weight in Multi_GPU or Single GPU
            state_dict= self.model.module.state_dict() if args.multi_gpu else self.model.state_dict()

            # Validation
            val_loss, val_acc, val_f1 = self.validate(args, phase='val')

            # Save models
            if val_loss < best_loss:
                early_stopping = 0
                best_epoch = epoch
                best_loss = val_loss
                best_acc = val_acc
                best_f1 = val_f1

                torch.save({'epoch':epoch,
                            'state_dict':state_dict,
                            'optimizer': self.optimizer.state_dict(),
                            'scheduler': self.scheduler.state_dict(),
                    }, os.path.join(save_path, 'best_model.pth'))
                self.logger.info(f'-----------------SAVE:{best_epoch}epoch----------------')
            else:
                early_stopping += 1

            # Early Stopping
            if early_stopping == args.patience:
                break
                
            print(f'\nbest epoch:{best_epoch}/loss:{best_loss:.4f}/f1:{best_f1:.4f}')

        self.logger.info(f'\nBest Val Epoch:{best_epoch} | Val Loss:{best_loss:.4f} | Val Acc:{best_acc:.4f} | Val F1:{best_f1:.4f}')
        end = time.time()
        self.logger.info(f'Total Process time:{(end - start) / 60:.3f}Minute')

    # Training
    def training(self, args):
        self.model.train()
        train_loss = AvgMeter()
        train_acc = 0
        preds_list = []
        targets_list = []

        scaler = grad_scaler.GradScaler()
        for i, (images, targets) in enumerate(tqdm(self.train_loader)):
            images = torch.tensor(images, device=self.device, dtype=torch.float32)
            targets = torch.tensor(targets, device=self.device, dtype=torch.long)
            
            if self.epoch <= args.warm_epoch:
                self.warmup_scheduler.step()

            self.model.zero_grad(set_to_none=True)
            if args.amp:
                with autocast():
                    preds = self.model(images)
                    loss = self.criterion(preds, targets)
                scaler.scale(loss).backward()

                # Gradient Clipping
                if args.clipping is not None:
                    scaler.unscale_(self.optimizer)
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), args.clipping)

                scaler.step(self.optimizer)
                scaler.update()

            else:
                preds = self.model(images)
                loss = self.criterion(preds, targets)
                loss.backward()
                nn.utils.clip_grad_norm_(self.model.parameters(), args.clipping)
                self.optimizer.step()

            if args.scheduler == 'cycle':
                if self.epoch > args.warm_epoch:
                    self.scheduler.step()

            # Metric
            train_acc += (preds.argmax(dim=1) == targets).sum().item()
            preds_list.extend(preds.argmax(dim=1).cpu().detach().numpy())
            targets_list.extend(targets.cpu().detach().numpy())
            # log
            train_loss.update(loss.item(), n=images.size(0))

        train_acc /= len(self.train_loader.dataset)
        train_f1 = f1_score(np.array(targets_list), np.array(preds_list), average='macro')

        self.logger.info(f'Epoch:[{self.epoch:03d}/{args.epochs:03d}]')
        self.logger.info(f'Train Loss:{train_loss.avg:.3f} | Acc:{train_acc:.4f} | F1:{train_f1:.4f}')
        return train_loss.avg, train_acc, train_f1
            
    # Validation or Dev
    def validate(self, args, phase='val'):
        self.model.eval()
        with torch.no_grad():
            val_loss = AvgMeter()
            val_acc = 0
            preds_list = []
            targets_list = []

            for i, (images, targets) in enumerate(self.val_loader):
                images = torch.tensor(images, device=self.device, dtype=torch.float32)
                targets = torch.tensor(targets, device=self.device, dtype=torch.long)

                preds = self.model(images)
                loss = self.criterion(preds, targets)

                # Metric
                val_acc += (preds.argmax(dim=1) == targets).sum().item()
                preds_list.extend(preds.argmax(dim=1).cpu().detach().numpy())
                targets_list.extend(targets.cpu().detach().numpy())

                # log
                val_loss.update(loss.item(), n=images.size(0))
            val_acc /= len(self.val_loader.dataset)
            val_f1 = f1_score(np.array(targets_list), np.array(preds_list), average='macro')

            self.logger.info(f'{phase} Loss:{val_loss.avg:.3f} | Acc:{val_acc:.4f} | F1:{val_f1:.4f}')
        return val_loss.avg, val_acc, val_f1

# Main

In [22]:
def main(args):
    print('<---- Training Params ---->')
    
    # Random Seed
    seed = args.seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = True

    save_path = os.path.join(args.model_path, (args.exp_num).zfill(3))
    
    # Create model directory
    os.makedirs(save_path, exist_ok=True)
    Trainer(args, save_path)

    return save_path

# Inference

In [23]:
def predict(encoder_name, test_loader, device, model_path):
    model = Network_test(encoder_name).to(device)
    model.load_state_dict(torch.load(opj(model_path, 'best_model.pth'))['state_dict'])
    model.eval()
    preds_list = []
    with torch.no_grad():
        for images in tqdm(test_loader):
            images = torch.as_tensor(images, device=device, dtype=torch.float32)
            preds = model(images)
            preds = torch.softmax(preds, dim=1)
            preds_list.extend(preds.cpu().tolist())

    return np.array(preds_list)

def ensemble_5fold(model_path_list, test_loader, device):
    predict_list = []
    for model_path in model_path_list:
        prediction = predict(encoder_name= 'regnety_064', 
                             test_loader = test_loader, device = device, model_path = model_path)
        predict_list.append(prediction)
    ensemble = (predict_list[0] + predict_list[1] + predict_list[2] + predict_list[3] + predict_list[4])/len(predict_list)

    return ensemble

In [24]:
# fold X
def result(model_path_list, test_loader, device):
    predict_list = []
    for model_path in model_path_list:
        prediction = predict(encoder_name= 'regnety_064', 
                             test_loader = test_loader, device = device, model_path = model_path)
        predict_list.append(prediction)
    ensemble = predict_list[0]

    return ensemble

# Train & Inference

In [25]:
img_size = 224
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [10]:
# sub = pd.read_csv('./data/sample_submission.csv')
# df_train = pd.read_csv('./data/train_df.csv')
# df_test = pd.read_csv('./data/test_df.csv')

In [53]:
# test_transform = get_train_augmentation(img_size=img_size, ver=1)
# test_dataset = Test_dataset(df_test, test_transform)
# test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0)

In [26]:
# fold 없이
models_path = []
args.fold = 0
args.exp_num = str(0)
save_path = main(args)
models_path.append(save_path)

2022-06-03 14:12:50,121 INFO: {'exp_num': '0', 'data_path': '/home/lab16/jupyter_home/Data/product_image/', 'Kfold': 1, 'model_path': 'results/', 'encoder_name': 'regnety_064', 'drop_path_rate': 0.2, 'img_size': 224, 'batch_size': 16, 'epochs': 50, 'optimizer': 'Lamb', 'initial_lr': 5e-06, 'weight_decay': 0.001, 'aug_ver': 2, 'scheduler': 'cycle', 'warm_epoch': 5, 'max_lr': 0.001, 'min_lr': 5e-06, 'tmax': 145, 'patience': 50, 'clipping': None, 'amp': True, 'multi_gpu': False, 'logging': False, 'num_workers': 0, 'seed': 42, 'fold': 0}


<---- Training Params ---->
device:cuda
Dataset size:8664
Dataset size:8664


2022-06-03 14:12:50,980 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_064-0a48325c.pth)


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [None]:
# start = 0 # first time : Only Trainset
# models_path = []
# for s_fold in range(5): # 5fold
#     args.fold = s_fold
#     args.exp_num = str(s_fold)
#     save_path = main(args)
#     models_path.append(save_path)

In [None]:
models_path

In [None]:
# ensemble = ensemble_5fold(models_path, test_loader, device)

In [None]:
# fold X
result = result(models_path, test_loader, device)

In [None]:
# # For submission
# sub.iloc[:, 1] = ensemble.argmax(axis=1)
# labels = ['bottle-broken_large', 'bottle-broken_small', 'bottle-contamination', 'bottle-good', 'cable-bent_wire', 'cable-cable_swap', 'cable-combined', 'cable-cut_inner_insulation', 'cable-cut_outer_insulation', 'cable-good', 'cable-missing_cable', 'cable-missing_wire', 'cable-poke_insulation', 'capsule-crack', 'capsule-faulty_imprint', 'capsule-good', 'capsule-poke', 'capsule-scratch', 'capsule-squeeze', 'carpet-color', 'carpet-cut', 'carpet-good', 'carpet-hole', 'carpet-metal_contamination', 'carpet-thread', 'grid-bent', 'grid-broken', 'grid-glue', 'grid-good', 'grid-metal_contamination', 'grid-thread', 'hazelnut-crack', 'hazelnut-cut', 'hazelnut-good', 'hazelnut-hole', 'hazelnut-print', 'leather-color', 'leather-cut', 'leather-fold', 'leather-glue', 'leather-good', 'leather-poke', 'metal_nut-bent', 'metal_nut-color', 'metal_nut-flip', 'metal_nut-good', 'metal_nut-scratch', 'pill-color', 'pill-combined', 'pill-contamination', 'pill-crack', 'pill-faulty_imprint', 'pill-good', 'pill-pill_type', 'pill-scratch', 'screw-good', 'screw-manipulated_front', 'screw-scratch_head', 'screw-scratch_neck', 'screw-thread_side', 'screw-thread_top', 'tile-crack', 'tile-glue_strip', 'tile-good', 'tile-gray_stroke', 'tile-oil', 'tile-rough', 'toothbrush-defective', 'toothbrush-good', 'transistor-bent_lead', 'transistor-cut_lead', 'transistor-damaged_case', 'transistor-good', 'transistor-misplaced', 'wood-color', 'wood-combined', 'wood-good', 'wood-hole', 'wood-liquid', 'wood-scratch', 'zipper-broken_teeth', 'zipper-combined', 'zipper-fabric_border', 'zipper-fabric_interior', 'zipper-good', 'zipper-rough', 'zipper-split_teeth', 'zipper-squeezed_teeth']
# original_labels = dict(zip(range(len(labels)),labels))
# sub['label'] = sub['label'].replace(original_labels)
# sub

In [26]:
# sub.to_csv('./data/submission.csv', index=False)

In [None]:
# # 정상 샘플 개수
# good_cnt = 0
# for i in range(len(sub)):
#     if sub['label'][i][-4:] == 'good':
#         good_cnt += 1
# print(good_cnt)

In [None]:
# 학습에 사용한 모델의 batch_size, epoch, img_size, patience
print('batch_size =', args.batch_size)
print('epochs =', args.epochs)
print('img_size =', args.img_size)
print('patience =', args.patience)

In [None]:
print('model =', args.encoder_name)