### 라이브러리 불러오기

In [1]:
from glob import glob
import pandas as pd
import numpy as np 
from tqdm.auto import tqdm
import cv2

import os
import timm
import random

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torchvision.transforms as transforms
from sklearn.metrics import f1_score, accuracy_score
from sklearn.model_selection import KFold, StratifiedKFold
import time
import albumentations as albu
from albumentations.pytorch import ToTensorV2

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [2]:
CFG = {
    'IMG_SIZE':512,
    'EPOCHS':30,
    'LEARNING_RATE':1e-2,
    'BATCH_SIZE':8,
    'SEED':42
}

In [3]:
all_img_path = np.array(sorted(glob('../data/train/*.png')))
test_img_path = np.array(sorted(glob('../data/test/*.png')))

In [4]:
train_y = pd.read_csv("../data/train_df.csv")

train_labels = train_y["label"]

label_unique = sorted(np.unique(train_labels))
label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}

all_label = np.array([label_unique[k] for k in train_labels])

In [5]:
class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list, train_mode=True, transforms=None):
        self.transforms = transforms
        self.train_mode = train_mode
        self.img_path_list = img_path_list
        self.label_list = label_list

    def __getitem__(self, index):
        img_path = self.img_path_list[index]
        # Get image data
        img = cv2.imread(img_path)
        if self.transforms is not None:
            image = self.transforms(image=img)["image"]

        if self.train_mode:
            label = self.label_list[index]
            return image, label
        else:
            return image
    
    def __len__(self):
        return len(self.img_path_list)
    
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.model = timm.create_model('efficientnet_b0', pretrained=True, num_classes=88)
        
    def forward(self, x):
        x = self.model(x)
        return x

In [6]:
train_transform = albu.Compose([
    albu.Resize(CFG['IMG_SIZE'], CFG['IMG_SIZE']),
#     albu.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1,
#                           rotate_limit=30, interpolation=1, border_mode=0,
#                           value=0, p=0.5),
#     albu.HorizontalFlip(p=0.5),
#     albu.VerticalFlip(p=0.5),
#     albu.RandomRotate90(p=1.0),
#     albu.CLAHE(clip_limit=2, p=0.25),
#     albu.Sharpen(p=0.25),
#     albu.RandomBrightnessContrast(brightness_limit=(-0.1, 0.1),
#                                   contrast_limit=(-0.1, 0.1), p=0.25),
#     albu.RandomResizedCrop(height=CFG['IMG_SIZE'], width=CFG['IMG_SIZE'],
#                            scale=(0.5, 1.0), ratio=(0.75, 1.3333333333333333),
#                            interpolation=1, p=1.0),
#     albu.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, max_pixel_value=255.0, p=1.0),
    ToTensorV2()]
)

valid_transform = albu.Compose([
    albu.Resize(CFG['IMG_SIZE'], CFG['IMG_SIZE']),
#     albu.HorizontalFlip(p=0.5),
#     albu.VerticalFlip(p=0.5),
#     albu.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, max_pixel_value=255.0, p=1.0),
    ToTensorV2()]
)

test_transform = albu.Compose([
    albu.Resize(CFG['IMG_SIZE'], CFG['IMG_SIZE']),
#     albu.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, max_pixel_value=255.0, p=1.0),
    ToTensorV2()]
)

### 모델 학습

In [7]:
def train(model, optimizer, train_loader, vali_loader, scheduler, device):
    model.to(device)

    # Loss Function
    criterion = nn.CrossEntropyLoss().to(device)
    best_score = 0
    
    for epoch in range(1,CFG["EPOCHS"]+1):
        train_pred=[]
        train_y=[]
        model.train()
        train_loss = 0
        for img, label in tqdm(iter(train_loader)):
            
            img, label = img.float().to(device), label.float().to(device)
            
            optimizer.zero_grad()

            # Data -> Model -> Output
            logit = model(img)
            label = label.to(torch.int64)
            loss = criterion(logit, label)

            # backpropagation
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()/len(train_loader)
            train_pred += logit.argmax(1).detach().cpu().numpy().tolist()
            train_y += label.detach().cpu().numpy().tolist()
            
        # Evaluation Validation set
        vali_score = validation(model, vali_loader, device)
        
        # vali_score가 더 이상 커지지 않으면
        if scheduler is not None:
            scheduler.step(vali_score)
        
        print(f'Epoch [{epoch}] Train Score : [{score_function(train_y, train_pred):.5f}] Validation Score : [{vali_score:.5f}]\n')
        
        # Model Saved
        if best_score < vali_score:
            best_score = vali_score
            torch.save(model.state_dict(), '../model/best_model.pth')
            print('Model Saved.')

In [8]:
def validation(model, vali_loader, device):
    model.eval() # Evaluation
    logit_list = []
    label_list = []
    with torch.no_grad():
        for img, label in tqdm(iter(vali_loader)):
            img, label = img.float().to(device), label.float().to(device)
            label = label.to(torch.int64)

            logit_list.extend(model(img).argmax(1).detach().cpu().numpy().tolist())
            label_list.extend(label.detach().cpu().numpy().tolist())

    vali_f1_score = score_function(label_list, logit_list)
    return vali_f1_score

In [9]:
test_dataset = CustomDataset(test_img_path, None, train_mode=False, transforms=test_transform)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

def predict(model, test_loader, device):
    model.eval()
    model_pred = []
    with torch.no_grad():
        for img in tqdm(iter(test_loader)):
            
            img = img.float().to(device)

            pred_logit = model(img).detach().cpu()
            model_pred.extend(pred_logit.tolist())
    return model_pred

def score_function(real, pred):
    score = f1_score(real, pred, average="macro")
    return score

In [10]:
rows_train = len(all_img_path) # 주어진 train data의 row 수
rows_test = len(test_img_path) # 주어진 test data의 row 수
num_trial = 100 # 파라미터 튜닝을 몇 번 진행하는지의 수
splits_hp = 5 # 파라미터 튜닝을 진행할 때의 kfold 수
splits_tr = 15 # 모델 트레이닝을 진행할 때의 kfold 수
basic_seed = 42 # default seed
num_seed_tr = 10 # 트레이닝 seed 개수
sel_seed = 3 # 선택할 seed 개수
num_classes = 88

pred_dict = {}
pred_test_dict = {}

In [11]:
kfold = KFold(n_splits=splits_tr, random_state=basic_seed, shuffle=True) # CV 늘려가면서 하기
cv = np.zeros((rows_train, num_classes))
pred_test = np.zeros((rows_test, num_classes))
for n, (train_idx, val_idx) in enumerate(kfold.split(all_img_path, all_label)):
    
    train_img_path, vali_img_path = all_img_path[train_idx], all_img_path[val_idx]
    train_label, vali_label = all_label[train_idx], all_label[val_idx]
    
    # Get Dataloader
    train_dataset = CustomDataset(train_img_path.tolist(), train_label.tolist(), train_mode=True, transforms=train_transform)
    train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

    vali_dataset = CustomDataset(vali_img_path.tolist(), vali_label.tolist(), train_mode=True, transforms=valid_transform)
    vali_loader = DataLoader(vali_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)
    
    model = Network().to(device)
    optimizer = torch.optim.SGD(params = model.parameters(), lr = CFG["LEARNING_RATE"])
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='max', patience=3, factor=0.2)
    
    train(model, optimizer, train_loader, vali_loader, scheduler, device)
    
    checkpoint = torch.load('../model/best_model.pth')
    model = Network().to(device)
    model.load_state_dict(checkpoint)
    
    vali_dataset = CustomDataset(vali_img_path.tolist(), vali_label.tolist(), train_mode=False, transforms=test_transform)
    vali_loader = DataLoader(vali_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)
    
    cv[val_idx, :] = predict(model, vali_loader, device)
    pred_test += np.array(predict(model, test_loader, device)) / splits_tr
    print(f"Fold {n+1} Score: {score_function(vali_label, cv[val_idx].argmax(axis=1))}")
    
pred_dict['eff_b0'+str(seed)] = cv
pred_test_dict['eff_b0'+str(seed)] = pred_test

  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [1] Train MAE : [0.15332] Validation Score : [0.26602]

Model Saved.


  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [2] Train MAE : [0.17544] Validation Score : [0.34674]

Model Saved.


  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [3] Train MAE : [0.21542] Validation Score : [0.36092]

Model Saved.


  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [4] Train MAE : [0.28237] Validation Score : [0.38623]

Model Saved.


  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [5] Train MAE : [0.37982] Validation Score : [0.42281]

Model Saved.


  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [6] Train MAE : [0.43880] Validation Score : [0.41747]



  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [7] Train MAE : [0.49281] Validation Score : [0.49163]

Model Saved.


  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [8] Train MAE : [0.57403] Validation Score : [0.47470]



  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [9] Train MAE : [0.64257] Validation Score : [0.47706]



  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [10] Train MAE : [0.71628] Validation Score : [0.57462]

Model Saved.


  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [11] Train MAE : [0.74021] Validation Score : [0.54129]



  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [12] Train MAE : [0.78938] Validation Score : [0.54720]



  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [13] Train MAE : [0.82457] Validation Score : [0.58654]

Model Saved.


  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [14] Train MAE : [0.86391] Validation Score : [0.60087]

Model Saved.


  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [15] Train MAE : [0.89125] Validation Score : [0.62192]

Model Saved.


  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [16] Train MAE : [0.90878] Validation Score : [0.67698]

Model Saved.


  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [17] Train MAE : [0.91683] Validation Score : [0.62442]



  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [18] Train MAE : [0.94593] Validation Score : [0.68322]

Model Saved.


  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [19] Train MAE : [0.96324] Validation Score : [0.62419]



  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [20] Train MAE : [0.96153] Validation Score : [0.72363]

Model Saved.


  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [21] Train MAE : [0.96070] Validation Score : [0.63185]



  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [22] Train MAE : [0.97860] Validation Score : [0.70450]



  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [23] Train MAE : [0.97601] Validation Score : [0.67512]



  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [24] Train MAE : [0.98668] Validation Score : [0.74363]

Model Saved.


  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [25] Train MAE : [0.99014] Validation Score : [0.68476]



  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [26] Train MAE : [0.98974] Validation Score : [0.72759]



  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [27] Train MAE : [0.99044] Validation Score : [0.66366]



  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [28] Train MAE : [0.99492] Validation Score : [0.72431]



  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [29] Train MAE : [0.98844] Validation Score : [0.67867]



  0%|          | 0/499 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch [30] Train MAE : [0.99687] Validation Score : [0.72882]



  0%|          | 0/36 [00:00<?, ?it/s]

  0%|          | 0/270 [00:00<?, ?it/s]

Fold 1 Score: 0.7436334072431634


  0%|          | 0/499 [00:00<?, ?it/s]

KeyboardInterrupt: 

### 추론

### 제출물 생성

In [41]:
label_decoder = {val:key for key, val in label_unique.items()}
pred_final = [label_decoder[result] for result in pred_test.argmax(1)]

In [43]:
from datetime import datetime
submission = pd.read_csv('../data/sample_submission.csv')
submission["label"] = pred_final
submission_time = datetime.today().strftime('%Y-%m-%d-%M-%S')
submission.to_csv(f'../submission/{submission_time}.csv', index = False)