In [1]:
import cv2
import os
import timm
import numpy as np
import pandas as pd
import albumentations as A

from glob import glob
from tqdm import tqdm
from easydict import EasyDict
from albumentations.pytorch import ToTensorV2
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import f1_score

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR

# custom dataset

In [2]:
class CustomDataset(Dataset):
    def __init__(self, img_list, label_list=None, transforms=None, mode="train") :
        self.img_list = img_list
        
        if mode == "train" : 
            self.label_list = self.label_encoder(label_list)
            
        self.transforms = transforms
        self.mode = mode
    def __len__(self):
        return len(self.img_list)
    
    def __getitem__(self, idx):
        img_path = self.img_list[idx]
        print(img_path)
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        if self.transforms:            
            img = self.transforms(image=img)['image']
        
        if self.mode == "train" :
            label = self.label_list[idx]
            return img, torch.tensor(label)
        
        elif self.mode == "test" :
            return img
    
    def label_encoder(self, label_list) :
        label_enc = {k : i for i, k in enumerate(sorted(list(set(label_list))))}
        return [label_enc[label] for label in label_list]

#### test code

In [3]:
# df = pd.read_csv('../data/aug_train_df.csv')
# transforms = A.Compose([
#     A.Resize(224,224),
#     A.Normalize(),
#     A.Rotate(),
#     ToTensorV2()
# ])
# db = CustomDataset(list(df['file_name']), list(df['label']), transforms, mode="train")
# db_loader = DataLoader(db, batch_size=16, shuffle=True)
# for img, label in db_loader : 
#     print(img.shape)
#     print(label.shape)
#     print(label)
#     break

# Focal Loss

In [4]:
class FocalLoss(nn.Module) :
    def __init__(self, alpha=2, gamma=2, logits=False, reduction='none') :
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.logits = logits
        self.reduction = reduction

    def forward(self, inputs, targets) :
        ce_loss = nn.CrossEntropyLoss(reduction=self.reduction)(inputs, targets)
        pt = torch.exp(-ce_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * ce_loss

        if self.reduction :
            return torch.mean(F_loss)
        else :
            return F_loss

# model

In [5]:
class CNN(nn.Module) :
    def __init__(self, model_name, num_classes) :
        super(CNN, self).__init__()
        self.model = timm.create_model(model_name=model_name, num_classes=num_classes, pretrained=True)
    
    def forward(self, x) :
        output = self.model(x)
        return output

#### test code

In [6]:
# model = CNN("efficientnetv2_s", 88)
# test_data = torch.randn(1, 3, 666,666)
# output = model(test_data)
# output = F.softmax(output)
# output

# Training

In [7]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

def cutmix(imgs, labels):
    lam = np.random.beta(1.0, 1.0)
    rand_index = torch.randperm(imgs.size()[0])
    target_a = labels
    target_b = labels[rand_index]
    bbx1, bby1, bbx2, bby2 = rand_bbox(imgs.size(), lam)
    imgs[:, :, bbx1:bbx2, bby1:bby2] = imgs[rand_index, :, bbx1:bbx2, bby1:bby2]

    # adjust lambda to exactly match pixel ratio
    lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (imgs.size()[-1] * imgs.size()[-2]))

    return imgs, target_a, target_b, lam

def accuracy_function(real, pred):    
    real = real.cpu()
    pred = torch.argmax(pred, dim=1).cpu()
    score = f1_score(real, pred, average='macro')
    return score

def training(model, train_loader, valid_loader, opt) :
        
    optimizer = torch.optim.AdamW(model.parameters(), lr= opt.learning_rate)
    scheduler = CosineAnnealingLR(optimizer, 
                                  T_max=opt.cosine_lr_Tmax, 
                                  eta_min=opt.cosine_lr_eta_min)
    
    #criterion = nn.CrossEntropyLoss()
    criterion = FocalLoss()
    
    if opt.resume : 
        model_checkpoint = torch.load(opt.model_path)
        model.load_state_dict(model_checkpoint["model_state_dict"])
        optimizer.load_state_dict(model_checkpoint["optimizer_state_dict"])
        
    early_stop_step = 0
    best_loss = 10
    for E in range(1, opt.epochs + 1) :
            # train
            tqdm_train = tqdm(train_loader)
            train_loss, train_f1 = 0, 0
            for batch, (img, label) in enumerate(tqdm_train, start=1) :
                model.train()
                optimizer.zero_grad()
                
                img = img.to(opt.device)
                label = label.to(opt.device)
                
                if opt.cutmix :
                    imgs, target_a, target_b, lam = cutmix(img, label)
                    output = model(imgs)
                    loss = criterion(output, target_a) * lam + criterion(output, target_b) * (1. - lam)
                else:
                    output = model(img)
                    loss = criterion(output, label)
                
                loss.backward()
                optimizer.step()
                
                score = accuracy_function(label, output)
                train_loss += loss.item()
                train_f1 += score
                tqdm_train.set_postfix({"Epoch" : E,
                                "Mean train loss" : "{:06f}".format(train_loss / (batch)),
                                "Mean train f1" : "{:06f}".format(train_f1 / (batch))
                               })
            # validation
            tqdm_valid = tqdm(valid_loader)
            valid_loss, valid_f1 = 0, 0
            for batch, (img, label) in enumerate(tqdm_valid, start=1) :
                model.eval()
                
                img = img.to(opt.device)
                label = label.to(opt.device)
                
                with torch.no_grad() :
                    output = model(img)
                    loss = criterion(output, label)
                    
                score = accuracy_function(label, output)
                valid_loss += loss.item()
                valid_f1 += score
                tqdm_valid.set_postfix({
                    "Mean valid loss": "{:06f}".format(valid_loss / (batch)),
                    "Mean valid f1": "{:06f}".format(valid_f1 / (batch))
                    })
            
            # scheduler
            scheduler.step()
            
            mean_valid_loss = valid_loss / batch
            if mean_valid_loss < best_loss :
                early_stop_step = 0
                best_loss = mean_valid_loss
                torch.save({
                    "epoch" : E,
                    "model_state_dict" : model.state_dict(),
                    "optimizer_state_dict" : optimizer.state_dict()
                }, 
                           os.path.join(opt.save_path, f'{E}E_{mean_valid_loss:0.4f}_{opt.model_name}.pt'))
            
            elif mean_valid_loss > best_loss : 
                early_stop_step += 1
                print(f"Early Stopping Step : [{early_stop_step} / {opt.early_stopping}]")
            
            if early_stop_step == opt.early_stopping :
                print("=== Early Stop ===")
                break

# Run - efficientNetV2_s

In [8]:
# opt = {
#     "df_path" : "../data/aug_train_df.csv",
#     "save_path" : "../model",
#     "model_name" : "efficientnetv2_s",
#     "num_classes" : 88,
#     "resize" : 300,
#     "device" : "cuda:0",
#     "early_stopping" : 5,
#     "epochs" : 100,
#     "batch_size" : 32,
#     "learning_rate" : 1e-4,
#     "cosine_lr_Tmax" : 20,
#     "cosine_lr_eta_min" : 1e-5,
#     "cutmix" : False,
#     "resume" : False,
#     "model_path" : "../model/6E_0.1491_coat_mini.pt"
# }
# opt = EasyDict(opt)
# os.makedirs(opt.save_path, exist_ok=True)

# t_transforms = A.Compose([
#     A.Normalize(),
#     A.Resize(opt.resize, opt.resize),
#     A.Blur(p=0.7),
#     A.Rotate(limit=(-270, 270), p=1),
#     A.OneOf([
#         A.HorizontalFlip(),
#         A.VerticalFlip()
#     ], p=1),
#     ToTensorV2()
# ])

# v_transforms = A.Compose([
#     A.Normalize(),
#     A.Resize(opt.resize, opt.resize),
#     ToTensorV2()
# ])

# train_df = pd.read_csv(opt.df_path)
# t_imgs, v_imgs, t_labels, v_labels = train_test_split(
#     list(train_df['file_name']),
#     list(train_df['label']),
#     train_size=0.8,
#     shuffle=True,
#     random_state=51,
#     stratify=list(train_df['label']))


# train_data = CustomDataset(t_imgs, t_labels, t_transforms)
# valid_data = CustomDataset(v_imgs, v_labels, v_transforms)

# train_loader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True)
# valid_loader = DataLoader(valid_data, batch_size=opt.batch_size, shuffle=True)

# model = CNN(opt.model_name, opt.num_classes).to(opt.device)
# training(model, train_loader, valid_loader, opt)

# Run - coat_mini

In [9]:
opt = {
    "df_path" : "../data/aug_train_df.csv",
    "save_path" : "../model/coat_mini",
    "model_name" : "coat_mini",
    "num_classes" : 88,
    "resize" : 224,
    "device" : "cuda:0",
    "early_stopping" : 5,
    "epochs" : 25,
    "batch_size" : 32,
    "learning_rate" : 1e-4,
    "cosine_lr_Tmax" : 20,
    "cosine_lr_eta_min" : 1e-5,
    "cutmix" : False,
    "resume" : True,
    "model_path" : "../model/coat_mini/3E_0.4225_coat_mini.pt"
}
opt = EasyDict(opt)
os.makedirs(opt.save_path, exist_ok=True)

t_transforms = A.Compose([
    A.Normalize(),
    A.Resize(opt.resize, opt.resize),
    A.Blur(p=0.7),
    A.Rotate(limit=(-270, 270), p=1),
    A.OneOf([
        A.HorizontalFlip(),
        A.VerticalFlip()
    ], p=1),
    ToTensorV2()
])

v_transforms = A.Compose([
    A.Normalize(),
    A.Resize(opt.resize, opt.resize),
    ToTensorV2()
])

train_df = pd.read_csv(opt.df_path)
t_imgs, v_imgs, t_labels, v_labels = train_test_split(
    list(train_df['file_name']),
    list(train_df['label']),
    train_size=0.8,
    shuffle=True,
    random_state=51,
    stratify=list(train_df['label']))


train_data = CustomDataset(t_imgs, t_labels, t_transforms)
valid_data = CustomDataset(v_imgs, v_labels, v_transforms)

train_loader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=opt.batch_size, shuffle=True)

model = CNN(opt.model_name, opt.num_classes).to(opt.device)
    
training(model, train_loader, valid_loader, opt)

  0%|                                                                                          | 0/205 [00:00<?, ?it/s]

../data/train\aug\pill-combined\aug_3_13088.png
../data/train\aug\transistor-cut_lead\aug_1_13015.png
../data/train\11560.png
../data/train\aug\hazelnut-crack\aug_0_13704.png
../data/train\aug\leather-glue\aug_4_11842.png
../data/train\aug\pill-pill_type\aug_2_10013.png
../data/train\14029.png
../data/train\aug\carpet-thread\aug_2_10956.png
../data/train\10118.png
../data/train\aug\screw-scratch_head\aug_3_13369.png
../data/train\aug\zipper-fabric_interior\aug_2_11455.png
../data/train\11429.png
../data/train\aug\tile-glue_strip\aug_4_13799.png
../data/train\aug\metal_nut-scratch\aug_0_10016.png
../data/train\10312.png
../data/train\13295.png
../data/train\aug\wood-liquid\aug_1_12610.png
../data/train\11254.png
../data/train\11108.png
../data/train\aug\leather-color\aug_3_11900.png
../data/train\aug\capsule-poke\aug_3_13009.png
../data/train\12468.png
../data/train\aug\capsule-squeeze\mixup_12311.png
../data/train\10375.png
../data/train\12738.png
../data/train\aug\toothbrush-defective

  0%|                       | 1/205 [00:03<12:13,  3.60s/it, Epoch=1, Mean train loss=0.148526, Mean train f1=0.866667]

../data/train\aug\zipper-fabric_interior\aug_3_14205.png
../data/train\12856.png
../data/train\13028.png
../data/train\11379.png
../data/train\10261.png
../data/train\10510.png
../data/train\aug\grid-broken\aug_0_10292.png
../data/train\13764.png
../data/train\10527.png
../data/train\aug\carpet-hole\mixup_10307.png
../data/train\aug\wood-scratch\aug_3_10635.png
../data/train\aug\screw-scratch_neck\aug_3_13182.png
../data/train\13376.png
../data/train\aug\metal_nut-bent\mixup_11439.png
../data/train\12300.png
../data/train\11677.png
../data/train\12659.png
../data/train\10954.png
../data/train\13650.png
../data/train\aug\screw-scratch_neck\aug_2_11157.png
../data/train\12081.png
../data/train\aug\zipper-fabric_border\aug_1_14211.png
../data/train\12338.png
../data/train\12357.png
../data/train\aug\leather-poke\aug_1_12909.png
../data/train\aug\leather-glue\aug_2_11895.png
../data/train\aug\transistor-misplaced\aug_3_14203.png
../data/train\11316.png
../data/train\12099.png
../data/train

  0%|                       | 1/205 [00:05<17:57,  5.28s/it, Epoch=1, Mean train loss=0.148526, Mean train f1=0.866667]


KeyboardInterrupt: 

In [None]:
import gc

model.cpu()
del model
del train_data
del valid_data
del train_loader
del valid_loader

gc.collect()
torch.cuda.empty_cache()

# Run - coat_mini w/ aug_v2

In [None]:
opt = {
    "df_path" : "../data/aug_v2_train_df.csv",
    "save_path" : "../model/coat_mini_aug_v2",
    "model_name" : "coat_mini",
    "num_classes" : 88,
    "resize" : 224,
    "device" : "cuda:0",
    "early_stopping" : 5,
    "epochs" : 25,
    "batch_size" : 32,
    "learning_rate" : 1e-4,
    "cosine_lr_Tmax" : 20,
    "cosine_lr_eta_min" : 1e-5,
    "cutmix" : False,
    "resume" : False,
    "model_path" : "../model/coat_mini/3E_0.4225_coat_mini.pt"
}
opt = EasyDict(opt)
os.makedirs(opt.save_path, exist_ok=True)

t_transforms = A.Compose([
    A.Normalize(),
    A.Resize(opt.resize, opt.resize),
    A.Blur(p=0.7),
    A.Rotate(limit=(-270, 270), p=1),
    A.OneOf([
        A.HorizontalFlip(),
        A.VerticalFlip()
    ], p=1),
    ToTensorV2()
])

v_transforms = A.Compose([
    A.Normalize(),
    A.Resize(opt.resize, opt.resize),
    ToTensorV2()
])

train_df = pd.read_csv(opt.df_path)
t_imgs, v_imgs, t_labels, v_labels = train_test_split(
    list(train_df['file_name']),
    list(train_df['label']),
    train_size=0.8,
    shuffle=True,
    random_state=51,
    stratify=list(train_df['label']))


train_data = CustomDataset(t_imgs, t_labels, t_transforms)
valid_data = CustomDataset(v_imgs, v_labels, v_transforms)

train_loader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=opt.batch_size, shuffle=True)

model = CNN(opt.model_name, opt.num_classes).to(opt.device)
    
training(model, train_loader, valid_loader, opt)