In [2]:
import cv2
import os
import timm
import numpy as np
import pandas as pd
import albumentations as A

from glob import glob
from tqdm import tqdm
from easydict import EasyDict
from albumentations.pytorch import ToTensorV2
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import f1_score

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR

# custom dataset

In [8]:
class CustomDataset(Dataset):
    def __init__(self, img_list, label_list=None, transforms=None, mode="train") :
        self.img_list = img_list
        
        if mode == "train" : 
            self.label_list = self.label_encoder(label_list)
            
        self.transforms = transforms
        self.mode = mode
    def __len__(self):
        return len(self.img_list)
    
    def __getitem__(self, idx):
        img_path = self.img_list[idx]
        
        img = cv2.imread(img_path)
        
        try :
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        except :
            print(img_path)
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            print(img)
            print(img.shape)
            
        if self.transforms:            
            img = self.transforms(image=img)['image']
        
        if self.mode == "train" :
            label = self.label_list[idx]
            return img, torch.tensor(label)
        
        elif self.mode == "test" :
            return img
    
    def label_encoder(self, label_list) :
        label_enc = {k : i for i, k in enumerate(sorted(list(set(label_list))))}
#         display(label_enc)
        return [label_enc[label] for label in label_list]

#### test code

In [9]:
# df = pd.read_csv('../data/aug_train_df.csv')
# transforms = A.Compose([
#     A.Resize(224,224),
#     A.Normalize(),
#     A.Rotate(),
#     ToTensorV2()
# ])
# db = CustomDataset(list(df['file_name']), list(df['label']), transforms, mode="train")
# db_loader = DataLoader(db, batch_size=16, shuffle=True)
# for img, label in db_loader : 
#     print(img.shape)
#     print(label.shape)
#     print(label)
#     break

# Focal Loss

In [4]:
# class FocalLoss(nn.Module) :
#     def __init__(self, alpha=2, gamma=2, logits=False, reduction='none') :
#         super(FocalLoss, self).__init__()
#         self.alpha = alpha
#         self.gamma = gamma
#         self.logits = logits
#         self.reduction = reduction

#     def forward(self, inputs, targets) :
#         ce_loss = nn.CrossEntropyLoss(reduction=self.reduction)(inputs, targets)
#         pt = torch.exp(-ce_loss)
#         F_loss = self.alpha * (1-pt)**self.gamma * ce_loss

#         if self.reduction :
#             return torch.mean(F_loss)
#         else :
#             return F_loss

# model

In [5]:
class CNN(nn.Module) :
    def __init__(self, model_name, num_classes) :
        super(CNN, self).__init__()
        self.model = timm.create_model(model_name=model_name, num_classes=num_classes, pretrained=True)
    
    def forward(self, x) :
        output = self.model(x)
        return output

# Custom SwinTransformer

In [None]:
class BackBone(nn.Module) :
    def __init__(self, model_name, backbone_output) :
        super(BackBone, self).__init__()
        self.model = timm.create_model(model_name=model_name, num_classes=backbone_output, pretrained=True)
    
    def forward(self, x) :
        output = self.model(x)
        return output
    
class MLP(nn.Module) :
    def __init__(self, in_features, dropout_rate, num_state) :
        super(MLP, self).__init__()
        #forward_features 시 LayerNorm까지 통과한 결과임
        # 따라서 LayerNorm 와 AdaptiveAvgPool1d는 필요없음    

        self.linear_1 = nn.Linear(in_features, in_features//2, bias=True)
        self.gelu = nn.GELU()
        self.dropout = nn.Dropout(p=dropout_rate, inplace=False)
        self.linear_2 = nn.Linear(in_features//2, num_state, bias=True)
        
    def forward(self, x):
        x = self.linear_1(x)
        x = self.gelu(x)
        x = self.dropout(x)
        x = self.linear_2(x)
        return x
    
class CustomSwinTransformer(nn.Module) :
    def __init__(self, 
                 model_path, 
                 model_name, 
                 backbone_output, 
                 num_class, 
                 num_state,
                 dropout_rate=0.5) :
        super(CustomSwinTransformer, self).__init__()
        
        self.backbone = self.get_backbone(model_path,
                                         model_name,
                                         backbone_output)
        
        # num_state + 1을 해준 이유 = None Class를 추가할 예정이기 때문
        self.mlps = nn.ModuleList([MLP(in_features=1024, 
                         dropout_rate=dropout_rate, 
                         num_state = num_state[i] + 1) for i in range(num_class)])
        
    def forward(self, x) :
        preds = []
        feature_map = self.backbone.forward_features(x)
        for mlp in self.mlps :
            preds.append(mlp(feature_map))
        return preds
    
    def get_backbone(self, model_path, model_name, backbone_output) :
        checkpoint = torch.load(model_path)
        backbone = BackBone(model_name, backbone_output)
        backbone.load_state_dict(checkpoint["model_state_dict"])
        return backbone.model

#### test code

In [6]:
# model = CNN("efficientnetv2_s", 88)
# test_data = torch.randn(1, 3, 666,666)
# output = model(test_data)
# output = F.softmax(output)
# output

# Training

In [7]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

def cutmix(imgs, labels):
    lam = np.random.beta(1.0, 1.0)
    rand_index = torch.randperm(imgs.size()[0]).cuda()
    target_a = labels
    target_b = labels[rand_index]
    bbx1, bby1, bbx2, bby2 = rand_bbox(imgs.size(), lam)
    imgs[:, :, bbx1:bbx2, bby1:bby2] = imgs[rand_index, :, bbx1:bbx2, bby1:bby2]

    # adjust lambda to exactly match pixel ratio
    lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (imgs.size()[-1] * imgs.size()[-2]))

    return imgs, target_a, target_b, lam

def mixup(imgs, labels, alpha=1.0) :
    lam = np.random.beta(alpha, alpha)
    rand_index = torch.randperm(imgs.size()[0]).cuda()
    mixed_imgs = lam * imgs + (1 - lam) * imgs[rand_index, :]
    target_a, target_b = labels, labels[rand_index]
    
    return mixed_imgs, lam, target_a, target_b

def accuracy_function(real, pred):    
    real = real.cpu()
    pred = torch.argmax(pred, dim=1).cpu()
    score = f1_score(real, pred, average='macro')
    return score

def training(model, train_loader, valid_loader, opt) :
        
    optimizer = torch.optim.AdamW(model.parameters(), lr= opt.learning_rate,
                                 weight_decay=opt.weight_decay)
    scheduler = CosineAnnealingLR(optimizer, 
                                  T_max=opt.cosine_lr_Tmax, 
                                  eta_min=opt.cosine_lr_eta_min)
    
    criterion = nn.CrossEntropyLoss()
#     criterion = FocalLoss()
    
    if opt.resume : 
        model_checkpoint = torch.load(opt.model_path)
        model.load_state_dict(model_checkpoint["model_state_dict"])
        optimizer.load_state_dict(model_checkpoint["optimizer_state_dict"])
        scheduler = CosineAnnealingLR(optimizer, 
                                  T_max=opt.cosine_lr_Tmax, 
                                  eta_min=opt.cosine_lr_eta_min)
        
        opt.start_epoch = model_checkpoint["epoch"]
    else : 
        opt.start_epoch = 0
        
    early_stop_step = 0
    best_loss = 10
    for E in range(opt.start_epoch + 1, opt.epochs + 1) :
            # train
            tqdm_train = tqdm(train_loader)
            train_loss, train_f1 = 0, 0
            for batch, (img, label) in enumerate(tqdm_train, start=1) :
                model.train()
                optimizer.zero_grad()
                
                img = img.to(opt.device)
                label = label.to(opt.device)
                
                if opt.cutmix :
                    imgs, target_a, target_b, lam = cutmix(img, label)
                    output = model(imgs)
                    loss = criterion(output, target_a) * lam + criterion(output, target_b) * (1. - lam)
                
                elif opt.mixup :
                    mixed_imgs, lam, target_a, target_b = mixup(img, label)
                    output = model(mixed_imgs)
                    loss = criterion(output, target_a) * lam + criterion(output, target_b) * (1. - lam)                    
                
                else:
                    output = model(img)
                    loss = criterion(output, label)
                
                loss.backward()
                optimizer.step()
                
                score = accuracy_function(label, output)
                train_loss += loss.item()
                train_f1 += score
                tqdm_train.set_postfix({"Epoch" : E,
                                "Mean train loss" : "{:06f}".format(train_loss / (batch)),
                                "Mean train f1" : "{:06f}".format(train_f1 / (batch))
                               })
            # validation
            tqdm_valid = tqdm(valid_loader)
            valid_loss, valid_f1 = 0, 0
            for batch, (img, label) in enumerate(tqdm_valid, start=1) :
                model.eval()
                
                img = img.to(opt.device)
                label = label.to(opt.device)
                
                with torch.no_grad() :
                    output = model(img)
                    loss = criterion(output, label)
                    
                score = accuracy_function(label, output)
                valid_loss += loss.item()
                valid_f1 += score
                tqdm_valid.set_postfix({
                    "Mean valid loss": "{:06f}".format(valid_loss / (batch)),
                    "Mean valid f1": "{:06f}".format(valid_f1 / (batch))
                    })
            
            # scheduler
            scheduler.step()
            
            mean_valid_loss = valid_loss / batch
            if mean_valid_loss < best_loss :
                early_stop_step = 0
                best_loss = mean_valid_loss
                torch.save({
                    "epoch" : E,
                    "model_state_dict" : model.state_dict(),
                    "optimizer_state_dict" : optimizer.state_dict()
                }, 
                           os.path.join(opt.save_path, f'{E}E_{mean_valid_loss:0.4f}_{opt.model_name}.pt'))
            
            elif mean_valid_loss > best_loss : 
                early_stop_step += 1
                print(f"Early Stopping Step : [{early_stop_step} / {opt.early_stopping}]")
            
            if early_stop_step == opt.early_stopping :
                print("=== Early Stop ===")
                break

# Run - efficientNetV2_s

In [15]:
# opt = {
#     "df_path" : "../data/aug_train_df.csv",
#     "save_path" : "../model",
#     "model_name" : "efficientnetv2_s",
#     "num_classes" : 88,
#     "resize" : 300,
#     "device" : "cuda:0",
#     "early_stopping" : 5,
#     "epochs" : 100,
#     "batch_size" : 32,
#     "learning_rate" : 1e-4,
#     "cosine_lr_Tmax" : 20,
#     "cosine_lr_eta_min" : 1e-5,
#     "cutmix" : False,
#     "resume" : False,
#     "model_path" : "../model/6E_0.1491_coat_mini.pt"
# }
# opt = EasyDict(opt)
# os.makedirs(opt.save_path, exist_ok=True)

# t_transforms = A.Compose([
#     A.Normalize(),
#     A.Resize(opt.resize, opt.resize),
#     A.Blur(p=0.7),
#     A.Rotate(limit=(-270, 270), p=1),
#     A.OneOf([
#         A.HorizontalFlip(),
#         A.VerticalFlip()
#     ], p=1),
#     ToTensorV2()
# ])

# v_transforms = A.Compose([
#     A.Normalize(),
#     A.Resize(opt.resize, opt.resize),
#     ToTensorV2()
# ])

# train_df = pd.read_csv(opt.df_path)
# t_imgs, v_imgs, t_labels, v_labels = train_test_split(
#     list(train_df['file_name']),
#     list(train_df['label']),
#     train_size=0.8,
#     shuffle=True,
#     random_state=51,
#     stratify=list(train_df['label']))


# train_data = CustomDataset(t_imgs, t_labels, t_transforms)
# valid_data = CustomDataset(v_imgs, v_labels, v_transforms)

# train_loader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True)
# valid_loader = DataLoader(valid_data, batch_size=opt.batch_size, shuffle=True)

# model = CNN(opt.model_name, opt.num_classes).to(opt.device)
# training(model, train_loader, valid_loader, opt)

# Run - coat_mini

In [16]:
# opt = {
#     "df_path" : "../data/aug_train_df.csv",
#     "save_path" : "../model/coat_mini",
#     "model_name" : "coat_mini",
#     "num_classes" : 88,
#     "resize" : 224,
#     "device" : "cuda:0",
#     "early_stopping" : 5,
#     "epochs" : 25,
#     "batch_size" : 32,
#     "learning_rate" : 1e-4,
#     "cosine_lr_Tmax" : 20,
#     "cosine_lr_eta_min" : 1e-5,
#     "cutmix" : False,
#     "resume" : True,
#     "model_path" : "../model/coat_mini/9E_0.0738_coat_mini.pt"
# }
# opt = EasyDict(opt)
# os.makedirs(opt.save_path, exist_ok=True)

# t_transforms = A.Compose([
#     A.Normalize(),
#     A.Resize(opt.resize, opt.resize),
#     A.Blur(p=0.7),
#     A.Rotate(limit=(-270, 270), p=1),
#     A.OneOf([
#         A.HorizontalFlip(),
#         A.VerticalFlip()
#     ], p=1),
#     ToTensorV2()
# ])

# v_transforms = A.Compose([
#     A.Normalize(),
#     A.Resize(opt.resize, opt.resize),
#     ToTensorV2()
# ])

# train_df = pd.read_csv(opt.df_path)
# t_imgs, v_imgs, t_labels, v_labels = train_test_split(
#     list(train_df['file_name']),
#     list(train_df['label']),
#     train_size=0.8,
#     shuffle=True,
#     random_state=51,
#     stratify=list(train_df['label']))


# train_data = CustomDataset(t_imgs, t_labels, t_transforms)
# valid_data = CustomDataset(v_imgs, v_labels, v_transforms)

# train_loader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True)
# valid_loader = DataLoader(valid_data, batch_size=opt.batch_size, shuffle=True)

# model = CNN(opt.model_name, opt.num_classes).to(opt.device)
    
# training(model, train_loader, valid_loader, opt)

In [17]:
# import gc

# model.cpu()
# del model
# del train_data
# del valid_data
# del train_loader
# del valid_loader

# gc.collect()
# torch.cuda.empty_cache()

# Run - coat_mini w/ aug_v2

In [11]:
# opt = {
#     "df_path" : "../data/aug_v2_train_df.csv",
#     "save_path" : "../model/coat_mini_aug_v2_CEL",
#     "model_name" : "coat_mini",
#     "num_classes" : 88,
#     "resize" : 224,
#     "device" : "cuda:0",
#     "early_stopping" : 5,
#     "epochs" : 15,
#     "batch_size" : 32,
#     "learning_rate" : 1e-4,
#     "cosine_lr_Tmax" : 20,
#     "cosine_lr_eta_min" : 1e-5,
#     "cutmix" : False,
#     "resume" : True,
#     "model_path" : "../model/coat_mini_aug_v2_CEL/5E_0.0848_coat_mini.pt"
# }
# opt = EasyDict(opt)
# os.makedirs(opt.save_path, exist_ok=True)

# t_transforms = A.Compose([
#     A.Normalize(),
#     A.Resize(opt.resize, opt.resize),
#     A.Blur(p=0.7),
#     A.Rotate(limit=(-270, 270), p=1),
#     A.OneOf([
#         A.HorizontalFlip(),
#         A.VerticalFlip()
#     ], p=1),
#     ToTensorV2()
# ])

# v_transforms = A.Compose([
#     A.Normalize(),
#     A.Resize(opt.resize, opt.resize),
#     ToTensorV2()
# ])

# train_df = pd.read_csv(opt.df_path)
# t_imgs, v_imgs, t_labels, v_labels = train_test_split(
#     list(train_df['file_name']),
#     list(train_df['label']),
#     train_size=0.8,
#     shuffle=True,
#     random_state=51,
#     stratify=list(train_df['label']))


# train_data = CustomDataset(t_imgs, t_labels, t_transforms)
# valid_data = CustomDataset(v_imgs, v_labels, v_transforms)

# train_loader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True)
# valid_loader = DataLoader(valid_data, batch_size=opt.batch_size, shuffle=True)

# model = CNN(opt.model_name, opt.num_classes).to(opt.device)
    
# training(model, train_loader, valid_loader, opt)

# print("==== Complete ====")

100%|█████████████████████| 441/441 [11:52<00:00,  1.61s/it, Epoch=6, Mean train loss=0.094364, Mean train f1=0.948913]
100%|██████████████████████████████| 111/111 [01:57<00:00,  1.06s/it, Mean valid loss=0.062684, Mean valid f1=0.964179]
100%|█████████████████████| 441/441 [11:41<00:00,  1.59s/it, Epoch=7, Mean train loss=0.086221, Mean train f1=0.952773]
100%|██████████████████████████████| 111/111 [01:58<00:00,  1.07s/it, Mean valid loss=0.080635, Mean valid f1=0.954950]


Early Stopping Step : [1 / 5]


100%|█████████████████████| 441/441 [11:52<00:00,  1.62s/it, Epoch=8, Mean train loss=0.071946, Mean train f1=0.962618]
100%|██████████████████████████████| 111/111 [02:00<00:00,  1.09s/it, Mean valid loss=0.074153, Mean valid f1=0.958979]


Early Stopping Step : [2 / 5]


100%|█████████████████████| 441/441 [11:52<00:00,  1.61s/it, Epoch=9, Mean train loss=0.068379, Mean train f1=0.961057]
100%|██████████████████████████████| 111/111 [02:00<00:00,  1.08s/it, Mean valid loss=0.053328, Mean valid f1=0.971511]
100%|████████████████████| 441/441 [11:54<00:00,  1.62s/it, Epoch=10, Mean train loss=0.070953, Mean train f1=0.963441]
 86%|██████████████████████████▊    | 96/111 [01:44<00:16,  1.09s/it, Mean valid loss=0.070124, Mean valid f1=0.969440]

../data/train\aug_v2\screw-thread_side\mixup_2_10019.png
[[[201 201 201]
  [203 203 203]
  [201 201 201]
  ...
  [210 210 210]
  [210 210 210]
  [209 209 209]]

 [[200 200 200]
  [202 202 202]
  [200 200 200]
  ...
  [210 210 210]
  [209 209 209]
  [208 208 208]]

 [[200 200 200]
  [201 201 201]
  [202 202 202]
  ...
  [209 209 209]
  [208 208 208]
  [207 207 207]]

 ...

 [[205 205 205]
  [206 206 206]
  [206 206 206]
  ...
  [199 199 199]
  [199 199 199]
  [199 199 199]]

 [[206 206 206]
  [207 207 207]
  [206 206 206]
  ...
  [200 200 200]
  [200 200 200]
  [199 199 199]]

 [[209 209 209]
  [208 208 208]
  [207 207 207]
  ...
  [199 199 199]
  [200 200 200]
  [200 200 200]]]
(1024, 1024, 3)


100%|██████████████████████████████| 111/111 [02:00<00:00,  1.08s/it, Mean valid loss=0.071714, Mean valid f1=0.968272]


Early Stopping Step : [1 / 5]


100%|████████████████████| 441/441 [11:52<00:00,  1.62s/it, Epoch=11, Mean train loss=0.054935, Mean train f1=0.968551]
100%|██████████████████████████████| 111/111 [01:59<00:00,  1.08s/it, Mean valid loss=0.046592, Mean valid f1=0.978910]
100%|████████████████████| 441/441 [11:52<00:00,  1.61s/it, Epoch=12, Mean train loss=0.046091, Mean train f1=0.975138]
100%|██████████████████████████████| 111/111 [01:57<00:00,  1.06s/it, Mean valid loss=0.042580, Mean valid f1=0.976637]
100%|████████████████████| 441/441 [12:03<00:00,  1.64s/it, Epoch=13, Mean train loss=0.036532, Mean train f1=0.981590]
100%|██████████████████████████████| 111/111 [02:04<00:00,  1.12s/it, Mean valid loss=0.037757, Mean valid f1=0.983826]
100%|████████████████████| 441/441 [12:04<00:00,  1.64s/it, Epoch=14, Mean train loss=0.029939, Mean train f1=0.985718]
100%|██████████████████████████████| 111/111 [02:00<00:00,  1.09s/it, Mean valid loss=0.052639, Mean valid f1=0.973456]


Early Stopping Step : [1 / 5]


100%|████████████████████| 441/441 [11:55<00:00,  1.62s/it, Epoch=15, Mean train loss=0.030142, Mean train f1=0.982964]
100%|██████████████████████████████| 111/111 [02:00<00:00,  1.08s/it, Mean valid loss=0.024858, Mean valid f1=0.985774]


# Run - coat_mini w/ aug_v3

In [8]:
# opt = {
#     "df_path" : "../data/aug_v3_train_df.csv",
#     "save_path" : "../model/coat_mini_aug_v3_CEL",
#     "model_name" : "coat_mini",
#     "num_classes" : 88,
#     "resize" : 224,
#     "device" : "cuda:0",
#     "early_stopping" : 5,
#     "epochs" : 20,
#     "batch_size" : 32,
#     "learning_rate" : 1e-4,
#     "cosine_lr_Tmax" : 20,
#     "cosine_lr_eta_min" : 1e-5,
#     "cutmix" : False,
#     "resume" : False,
#     "model_path" : "../model/coat_mini_aug_v2_CEL/5E_0.0848_coat_mini.pt"
# }
# opt = EasyDict(opt)
# os.makedirs(opt.save_path, exist_ok=True)

# t_transforms = A.Compose([
#     A.Normalize(),
#     A.Resize(opt.resize, opt.resize),
# #     A.Blur(p=0.7),
#     A.Rotate(limit=(45), p=1),
#     A.OneOf([
#         A.HorizontalFlip(),
#         A.VerticalFlip()
#     ], p=1),
#     ToTensorV2()
# ])

# v_transforms = A.Compose([
#     A.Normalize(),
#     A.Resize(opt.resize, opt.resize),
#     ToTensorV2()
# ])

# train_df = pd.read_csv(opt.df_path)
# t_imgs, v_imgs, t_labels, v_labels = train_test_split(
#     list(train_df['file_name']),
#     list(train_df['label']),
#     train_size=0.8,
#     shuffle=True,
#     random_state=51,
#     stratify=list(train_df['label']))


# train_data = CustomDataset(t_imgs, t_labels, t_transforms)
# valid_data = CustomDataset(v_imgs, v_labels, v_transforms)

# train_loader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True)
# valid_loader = DataLoader(valid_data, batch_size=opt.batch_size, shuffle=True)

# model = CNN(opt.model_name, opt.num_classes).to(opt.device)
    
# training(model, train_loader, valid_loader, opt)

# print("==== Complete ====")

100%|█████████████████████| 512/512 [14:15<00:00,  1.67s/it, Epoch=1, Mean train loss=1.133451, Mean train f1=0.570951]
100%|██████████████████████████████| 128/128 [02:30<00:00,  1.17s/it, Mean valid loss=0.338896, Mean valid f1=0.846434]
100%|█████████████████████| 512/512 [13:46<00:00,  1.61s/it, Epoch=2, Mean train loss=0.189016, Mean train f1=0.912570]
100%|██████████████████████████████| 128/128 [02:18<00:00,  1.08s/it, Mean valid loss=0.251873, Mean valid f1=0.879343]
100%|█████████████████████| 512/512 [13:40<00:00,  1.60s/it, Epoch=3, Mean train loss=0.095582, Mean train f1=0.951464]
100%|██████████████████████████████| 128/128 [02:18<00:00,  1.08s/it, Mean valid loss=0.126928, Mean valid f1=0.928785]
100%|█████████████████████| 512/512 [13:38<00:00,  1.60s/it, Epoch=4, Mean train loss=0.058838, Mean train f1=0.971937]
100%|██████████████████████████████| 128/128 [02:19<00:00,  1.09s/it, Mean valid loss=0.168830, Mean valid f1=0.912340]


Early Stopping Step : [1 / 5]


100%|█████████████████████| 512/512 [13:37<00:00,  1.60s/it, Epoch=5, Mean train loss=0.042170, Mean train f1=0.978022]
100%|██████████████████████████████| 128/128 [02:18<00:00,  1.08s/it, Mean valid loss=0.091073, Mean valid f1=0.955261]
100%|█████████████████████| 512/512 [13:37<00:00,  1.60s/it, Epoch=6, Mean train loss=0.040044, Mean train f1=0.980119]
100%|██████████████████████████████| 128/128 [02:18<00:00,  1.08s/it, Mean valid loss=0.056357, Mean valid f1=0.967770]
100%|█████████████████████| 512/512 [13:39<00:00,  1.60s/it, Epoch=7, Mean train loss=0.025724, Mean train f1=0.987839]
100%|██████████████████████████████| 128/128 [02:18<00:00,  1.08s/it, Mean valid loss=0.061417, Mean valid f1=0.966731]


Early Stopping Step : [1 / 5]


100%|█████████████████████| 512/512 [13:38<00:00,  1.60s/it, Epoch=8, Mean train loss=0.026787, Mean train f1=0.987034]
100%|██████████████████████████████| 128/128 [02:18<00:00,  1.08s/it, Mean valid loss=0.065773, Mean valid f1=0.960392]


Early Stopping Step : [2 / 5]


100%|█████████████████████| 512/512 [13:37<00:00,  1.60s/it, Epoch=9, Mean train loss=0.026573, Mean train f1=0.987955]
100%|██████████████████████████████| 128/128 [02:18<00:00,  1.08s/it, Mean valid loss=0.063512, Mean valid f1=0.968032]


Early Stopping Step : [3 / 5]


100%|████████████████████| 512/512 [13:37<00:00,  1.60s/it, Epoch=10, Mean train loss=0.018340, Mean train f1=0.991154]
100%|██████████████████████████████| 128/128 [02:18<00:00,  1.08s/it, Mean valid loss=0.079449, Mean valid f1=0.955973]


Early Stopping Step : [4 / 5]


100%|████████████████████| 512/512 [13:37<00:00,  1.60s/it, Epoch=11, Mean train loss=0.008917, Mean train f1=0.995069]
100%|██████████████████████████████| 128/128 [02:18<00:00,  1.08s/it, Mean valid loss=0.044160, Mean valid f1=0.972664]
100%|████████████████████| 512/512 [13:37<00:00,  1.60s/it, Epoch=12, Mean train loss=0.008879, Mean train f1=0.996316]
 74%|███████████████████████        | 95/128 [01:42<00:35,  1.09s/it, Mean valid loss=0.037978, Mean valid f1=0.978461]

../data/train\aug_v3\cable-poke_insulation\mixup_1_11637.png
[[[ 82 112 139]
  [ 83 113 144]
  [ 84 113 147]
  ...
  [ 75 103 127]
  [ 76 104 129]
  [ 78 105 132]]

 [[ 83 113 141]
  [ 83 113 140]
  [ 84 112 143]
  ...
  [ 78 102 128]
  [ 78 104 130]
  [ 78 105 132]]

 [[ 84 114 139]
  [ 85 114 139]
  [ 84 113 140]
  ...
  [ 78 103 132]
  [ 78 104 131]
  [ 79 106 126]]

 ...

 [[ 62  80 101]
  [ 63  83 103]
  [ 66  85 105]
  ...
  [ 97 139 168]
  [ 95 137 172]
  [ 96 137 170]]

 [[ 64  83 102]
  [ 64  84 102]
  [ 65  86 107]
  ...
  [ 98 138 167]
  [ 97 138 172]
  [ 96 137 167]]

 [[ 66  85 105]
  [ 66  86 105]
  [ 67  87 111]
  ...
  [ 97 137 167]
  [ 96 137 176]
  [ 95 134 166]]]
(1024, 1024, 3)


100%|██████████████████████████████| 128/128 [02:18<00:00,  1.08s/it, Mean valid loss=0.039812, Mean valid f1=0.977612]
100%|████████████████████| 512/512 [13:38<00:00,  1.60s/it, Epoch=13, Mean train loss=0.006019, Mean train f1=0.997383]
100%|██████████████████████████████| 128/128 [02:18<00:00,  1.08s/it, Mean valid loss=0.038480, Mean valid f1=0.979249]
100%|████████████████████| 512/512 [13:37<00:00,  1.60s/it, Epoch=14, Mean train loss=0.009647, Mean train f1=0.996660]
100%|██████████████████████████████| 128/128 [02:18<00:00,  1.08s/it, Mean valid loss=0.050805, Mean valid f1=0.970108]


Early Stopping Step : [1 / 5]


100%|████████████████████| 512/512 [13:37<00:00,  1.60s/it, Epoch=15, Mean train loss=0.003375, Mean train f1=0.998583]
100%|██████████████████████████████| 128/128 [02:18<00:00,  1.08s/it, Mean valid loss=0.042192, Mean valid f1=0.978982]


Early Stopping Step : [2 / 5]


100%|████████████████████| 512/512 [13:37<00:00,  1.60s/it, Epoch=16, Mean train loss=0.005947, Mean train f1=0.997608]
100%|██████████████████████████████| 128/128 [02:18<00:00,  1.08s/it, Mean valid loss=0.037625, Mean valid f1=0.977259]
100%|████████████████████| 512/512 [13:39<00:00,  1.60s/it, Epoch=17, Mean train loss=0.002849, Mean train f1=0.998541]
100%|██████████████████████████████| 128/128 [02:19<00:00,  1.09s/it, Mean valid loss=0.030221, Mean valid f1=0.981207]
100%|████████████████████| 512/512 [13:44<00:00,  1.61s/it, Epoch=18, Mean train loss=0.001050, Mean train f1=0.999720]
100%|██████████████████████████████| 128/128 [02:18<00:00,  1.08s/it, Mean valid loss=0.032288, Mean valid f1=0.981493]


Early Stopping Step : [1 / 5]


100%|████████████████████| 512/512 [13:38<00:00,  1.60s/it, Epoch=19, Mean train loss=0.001040, Mean train f1=0.999552]
100%|██████████████████████████████| 128/128 [02:18<00:00,  1.08s/it, Mean valid loss=0.041419, Mean valid f1=0.978189]


Early Stopping Step : [2 / 5]


100%|████████████████████| 512/512 [13:38<00:00,  1.60s/it, Epoch=20, Mean train loss=0.000972, Mean train f1=0.999907]
100%|██████████████████████████████| 128/128 [02:18<00:00,  1.08s/it, Mean valid loss=0.037915, Mean valid f1=0.981300]

Early Stopping Step : [3 / 5]
==== Complete ====





In [9]:
import gc

model.cpu()
del model
del train_data
del valid_data
del train_loader
del valid_loader

gc.collect()
torch.cuda.empty_cache()

# Run - SwinTransformer w/ aug_v5

In [8]:
opt = {
    "df_path" : "../data/aug_v5_train_df.csv",
    "save_path" : "../model/swin_aug_v5_mixup",
    "model_name" : "swin_base_patch4_window7_224_in22k",
    "num_classes" : 88,
    "resize" : 224,
    "device" : "cuda:0",
    "early_stopping" : 5,
    "epochs" : 30,
    "batch_size" : 32,
    "learning_rate" : 1e-4,
    "weight_decay" : 0.01,
    "cosine_lr_Tmax" : 20,
    "cosine_lr_eta_min" : 1e-5,
    "cutmix" : False,
    "mixup" : True,
    "resume" : False,
    "model_path" : "../model/swin_aug_v4_mixup/21E_0.0382_swin_base_patch4_window7_224_in22k.pt"
}
opt = EasyDict(opt)
os.makedirs(opt.save_path, exist_ok=True)

t_transforms = A.Compose([
    A.Normalize(),
    A.Resize(opt.resize, opt.resize),
    A.Blur(p=0.7),#blur_limit=(7, 7), p=0.7),
    A.Rotate(limit=(45), p=1),
    A.OneOf([
        A.HorizontalFlip(),
        A.VerticalFlip()
    ], p=1),
    ToTensorV2()
])

v_transforms = A.Compose([
    A.Normalize(),
    A.Resize(opt.resize, opt.resize),
    ToTensorV2()
])

train_df = pd.read_csv(opt.df_path)
t_imgs, v_imgs, t_labels, v_labels = train_test_split(
    list(train_df['file_name']),
    list(train_df['label']),
    train_size=0.8,
    shuffle=True,
    random_state=51,
    stratify=list(train_df['label']))


train_data = CustomDataset(t_imgs, t_labels, t_transforms)
valid_data = CustomDataset(v_imgs, v_labels, v_transforms)

train_loader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=opt.batch_size, shuffle=True)

model = CNN(opt.model_name, opt.num_classes).to(opt.device)
    
training(model, train_loader, valid_loader, opt)

print("==== Complete ====")

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
100%|███████████████████| 1018/1018 [23:28<00:00,  1.38s/it, Epoch=1, Mean train loss=1.751818, Mean train f1=0.255129]
100%|██████████████████████████████| 255/255 [04:08<00:00,  1.03it/s, Mean valid loss=0.397987, Mean valid f1=0.809560]
 12%|██▍                 | 127/1018 [02:53<20:17,  1.37s/it, Epoch=2, Mean train loss=1.305180, Mean train f1=0.339653]

../data/train\aug_v5\bottle-contamination\aug_18_10759.png
[[[  0   0   0]
  [  0   0   0]
  [  0   0   0]
  ...
  [  0   0   0]
  [  0   0   0]
  [  0   0   0]]

 [[  0   0   0]
  [  0   0   0]
  [  0   0   0]
  ...
  [  0   0   0]
  [  0   0   0]
  [  0   0   0]]

 [[  0   0   0]
  [  0   0   0]
  [  0   0   0]
  ...
  [  0   0   0]
  [  0   0   0]
  [  0   0   0]]

 ...

 [[117  75  62]
  [124  77  63]
  [124  79  60]
  ...
  [  0   0   0]
  [  0   0   0]
  [  0   0   0]]

 [[113  76  62]
  [120  78  62]
  [119  80  61]
  ...
  [  0   0   0]
  [  0   0   0]
  [  0   0   0]]

 [[111  76  64]
  [114  78  63]
  [115  79  63]
  ...
  [  0   0   0]
  [  0   0   0]
  [  0   0   0]]]
(900, 900, 3)


100%|███████████████████| 1018/1018 [23:22<00:00,  1.38s/it, Epoch=2, Mean train loss=1.241543, Mean train f1=0.372965]
100%|██████████████████████████████| 255/255 [04:08<00:00,  1.03it/s, Mean valid loss=0.253121, Mean valid f1=0.872212]
100%|███████████████████| 1018/1018 [23:24<00:00,  1.38s/it, Epoch=3, Mean train loss=1.118839, Mean train f1=0.398421]
100%|██████████████████████████████| 255/255 [04:08<00:00,  1.03it/s, Mean valid loss=0.174458, Mean valid f1=0.913646]
100%|███████████████████| 1018/1018 [23:23<00:00,  1.38s/it, Epoch=4, Mean train loss=1.036602, Mean train f1=0.402771]
100%|██████████████████████████████| 255/255 [04:09<00:00,  1.02it/s, Mean valid loss=0.161981, Mean valid f1=0.924002]
100%|███████████████████| 1018/1018 [23:21<00:00,  1.38s/it, Epoch=5, Mean train loss=1.000440, Mean train f1=0.438458]
100%|██████████████████████████████| 255/255 [04:08<00:00,  1.03it/s, Mean valid loss=0.129871, Mean valid f1=0.934118]
100%|███████████████████| 1018/1018 [23:

Early Stopping Step : [1 / 5]


100%|███████████████████| 1018/1018 [23:23<00:00,  1.38s/it, Epoch=9, Mean train loss=0.850858, Mean train f1=0.443841]
100%|██████████████████████████████| 255/255 [04:07<00:00,  1.03it/s, Mean valid loss=0.076973, Mean valid f1=0.974431]
100%|██████████████████| 1018/1018 [23:22<00:00,  1.38s/it, Epoch=10, Mean train loss=0.823434, Mean train f1=0.471229]
100%|██████████████████████████████| 255/255 [04:08<00:00,  1.03it/s, Mean valid loss=0.084904, Mean valid f1=0.966505]


Early Stopping Step : [1 / 5]


100%|██████████████████| 1018/1018 [23:21<00:00,  1.38s/it, Epoch=11, Mean train loss=0.793115, Mean train f1=0.453100]
100%|██████████████████████████████| 255/255 [04:09<00:00,  1.02it/s, Mean valid loss=0.064208, Mean valid f1=0.975928]
100%|██████████████████| 1018/1018 [23:25<00:00,  1.38s/it, Epoch=12, Mean train loss=0.763741, Mean train f1=0.476994]
100%|██████████████████████████████| 255/255 [04:09<00:00,  1.02it/s, Mean valid loss=0.050924, Mean valid f1=0.979366]
100%|██████████████████| 1018/1018 [23:25<00:00,  1.38s/it, Epoch=13, Mean train loss=0.759044, Mean train f1=0.485229]
100%|██████████████████████████████| 255/255 [04:08<00:00,  1.03it/s, Mean valid loss=0.050954, Mean valid f1=0.981786]


Early Stopping Step : [1 / 5]


100%|██████████████████| 1018/1018 [23:21<00:00,  1.38s/it, Epoch=14, Mean train loss=0.746793, Mean train f1=0.474233]
100%|██████████████████████████████| 255/255 [04:08<00:00,  1.03it/s, Mean valid loss=0.041918, Mean valid f1=0.987184]
100%|██████████████████| 1018/1018 [23:22<00:00,  1.38s/it, Epoch=15, Mean train loss=0.734085, Mean train f1=0.486780]
100%|██████████████████████████████| 255/255 [04:08<00:00,  1.03it/s, Mean valid loss=0.038461, Mean valid f1=0.987341]
100%|██████████████████| 1018/1018 [23:22<00:00,  1.38s/it, Epoch=16, Mean train loss=0.713862, Mean train f1=0.482083]
100%|██████████████████████████████| 255/255 [04:08<00:00,  1.03it/s, Mean valid loss=0.030427, Mean valid f1=0.990257]
100%|██████████████████| 1018/1018 [23:22<00:00,  1.38s/it, Epoch=17, Mean train loss=0.689480, Mean train f1=0.472212]
100%|██████████████████████████████| 255/255 [04:08<00:00,  1.03it/s, Mean valid loss=0.029990, Mean valid f1=0.990974]
100%|██████████████████| 1018/1018 [23:2

Early Stopping Step : [1 / 5]


100%|██████████████████| 1018/1018 [23:22<00:00,  1.38s/it, Epoch=19, Mean train loss=0.681280, Mean train f1=0.496608]
100%|██████████████████████████████| 255/255 [04:08<00:00,  1.03it/s, Mean valid loss=0.030392, Mean valid f1=0.989260]


Early Stopping Step : [2 / 5]


100%|██████████████████| 1018/1018 [23:22<00:00,  1.38s/it, Epoch=20, Mean train loss=0.672969, Mean train f1=0.508400]
100%|██████████████████████████████| 255/255 [04:08<00:00,  1.03it/s, Mean valid loss=0.029551, Mean valid f1=0.990935]
100%|██████████████████| 1018/1018 [23:26<00:00,  1.38s/it, Epoch=21, Mean train loss=0.677278, Mean train f1=0.489742]
100%|██████████████████████████████| 255/255 [04:09<00:00,  1.02it/s, Mean valid loss=0.027530, Mean valid f1=0.992056]
100%|██████████████████| 1018/1018 [23:25<00:00,  1.38s/it, Epoch=22, Mean train loss=0.669404, Mean train f1=0.475047]
100%|██████████████████████████████| 255/255 [04:08<00:00,  1.03it/s, Mean valid loss=0.026936, Mean valid f1=0.992148]
100%|██████████████████| 1018/1018 [23:22<00:00,  1.38s/it, Epoch=23, Mean train loss=0.666457, Mean train f1=0.482464]
100%|██████████████████████████████| 255/255 [04:08<00:00,  1.03it/s, Mean valid loss=0.029484, Mean valid f1=0.992262]


Early Stopping Step : [1 / 5]


100%|██████████████████| 1018/1018 [23:21<00:00,  1.38s/it, Epoch=24, Mean train loss=0.654196, Mean train f1=0.499701]
100%|██████████████████████████████| 255/255 [04:08<00:00,  1.03it/s, Mean valid loss=0.030132, Mean valid f1=0.991238]


Early Stopping Step : [2 / 5]


100%|██████████████████| 1018/1018 [23:21<00:00,  1.38s/it, Epoch=25, Mean train loss=0.674930, Mean train f1=0.502389]
100%|██████████████████████████████| 255/255 [04:08<00:00,  1.03it/s, Mean valid loss=0.029178, Mean valid f1=0.992022]


Early Stopping Step : [3 / 5]


100%|██████████████████| 1018/1018 [23:23<00:00,  1.38s/it, Epoch=26, Mean train loss=0.678675, Mean train f1=0.487835]
100%|██████████████████████████████| 255/255 [04:08<00:00,  1.03it/s, Mean valid loss=0.027453, Mean valid f1=0.992319]


Early Stopping Step : [4 / 5]


100%|██████████████████| 1018/1018 [23:21<00:00,  1.38s/it, Epoch=27, Mean train loss=0.683802, Mean train f1=0.479744]
100%|██████████████████████████████| 255/255 [04:08<00:00,  1.03it/s, Mean valid loss=0.029503, Mean valid f1=0.991907]

Early Stopping Step : [5 / 5]
=== Early Stop ===
==== Complete ====



