In [1]:
import cv2
import os
import timm
import numpy as np
import pandas as pd
import albumentations as A
import logging
import json
from datetime import datetime

from glob import glob
from tqdm import tqdm
from easydict import EasyDict
from albumentations.pytorch import ToTensorV2
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import f1_score, accuracy_score
from torch.utils.tensorboard import SummaryWriter

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR

# Dataset

In [2]:
class CustomDataset(Dataset):
    def __init__(self, img_list, label_list=None, transforms=None) :
        self.img_list = img_list
        self.label_list = label_list
        self.transforms = transforms
        
    def __len__(self):
        return len(self.img_list)
    
    def __getitem__(self, idx):
        img_path = self.img_list[idx]
        
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            
        if self.transforms:            
            img = self.transforms(image=img)['image']
        
        # training
        if self.label_list is not None :
            label = self.label_list[idx]
            return img, torch.tensor(label)
        
        # test
        else :
            return img
        

# CNN

In [3]:
class CNN(nn.Module):
    def __init__(self, model_name) :
        super().__init__()
        self.model = timm.create_model(model_name=model_name, num_classes=10, pretrained=True)
        
    def forward(self, x) :
        x = self.model(x)            
        return x

# Augmentation

In [4]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int32(W * cut_rat)
    cut_h = np.int32(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

def cutmix(imgs, labels):
    lam = np.random.beta(1.0, 1.0)
    rand_index = torch.randperm(imgs.size()[0]).cuda()
    target_a = labels
    target_b = labels[rand_index]
    bbx1, bby1, bbx2, bby2 = rand_bbox(imgs.size(), lam)
    imgs[:, :, bbx1:bbx2, bby1:bby2] = imgs[rand_index, :, bbx1:bbx2, bby1:bby2]

    lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (imgs.size()[-1] * imgs.size()[-2]))

    return imgs, lam, target_a, target_b

def mixup(imgs, labels) :
    lam = np.random.beta(1.0, 1.0)
    rand_index = torch.randperm(imgs.size()[0]).cuda()
    mixed_imgs = lam * imgs + (1 - lam) * imgs[rand_index, :]
    target_a, target_b = labels, labels[rand_index]
    
    return mixed_imgs, lam, target_a, target_b

# Util

In [5]:
def score(true_labels, model_preds) :
    model_preds = model_preds.argmax(1).detach().cpu().numpy().tolist()
    true_labels = true_labels.detach().cpu().numpy().tolist()
    return accuracy_score(true_labels, model_preds)

In [6]:
def img_parser(data_path, div, training=True) :
    path = sorted(glob(data_path), key = lambda x : int(x.split('\\')[-1].split('.')[0]))   
    
    if training:    
        return path[:div], path[div:]    
    else :
        return path

In [7]:
def transform_parser(grid_shuffle_p=0.8) :
    return A.Compose([
        A.Rotate(limit=(45), p=1),
        A.RandomGridShuffle(p=grid_shuffle_p, grid=(2,2)),
        A.Normalize(),
        ToTensorV2()
    ])

In [8]:
def image_label_dataset(df_path, div=0.8, grid_shuffle_p=0.8, training=True) :
    all_df = pd.read_csv(df_path)
    transform = transform_parser(grid_shuffle_p=grid_shuffle_p)
    
    if training :
        train_df = all_df.iloc[:int(len(all_df)*div)]
        val_df = all_df.iloc[int(len(all_df)*div):]
        
        train_img, valid_img = img_parser(CFG['DATA_PATH'], int(len(all_df)*div), training)
        return (train_img, valid_img), (train_df['label'].values, val_df['label'].values), transform
    
    else :
        img = img_parser(CFG['DATA_PATH'], div=None, training=training)
        return np.array(img), all_df['label'].values, transform   

In [9]:
def custom_dataload(img_set, label_set, transform, shuffle) :
    ds = CustomDataset(img_set, label_set, transform)
    dl = DataLoader(ds, batch_size = CFG['BATCH_SIZE'], shuffle=shuffle, num_workers=0)    
    return dl

In [10]:
def train_and_valid_dataload(img_set, label_set, transform) :   
    train_loader = custom_dataload(img_set[0], label_set[0], transform, True)
    val_loader = custom_dataload(img_set[1], label_set[1], transform, False)
    return train_loader, val_loader

In [11]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

CFG = {
    'EPOCHS':70,
    'LEARNING_RATE':1e-4,
    'BATCH_SIZE':64,
    'DATA_PATH' : './data/img/224img_test/*',
    
    'output' : 'scratch-weigt_freeze10E-mixup25E-grid_shuffle35E-focal-effib0.pth',
    'LOG' : "./tensorboard/PCA_img/weigt_freeze10E-mixup25E-grid_shuffle35E",   
    'reg_step' : [36, 61],
    'focal_gamma' : 2,
    'focal_alpha' : 2,
    
    'kfold' : 5,
    
    'model_name' : 'efficientnet_b0',
    
    'reuse': False,
    'clssifier_freeze' : False,
    'checkpoint' : './ckpt/23E-val0.9369-scratch-weigt_freeze5E-mixup15E-grid_shuffle25E-focal-effib0.pth',

}


In [12]:
def ensemble_predict_hardVoting(models, test_loader, device):
    model_preds = []
    with torch.no_grad():
        for img in tqdm(iter(test_loader)):
            img = img.to(device)
            
            batch_len=[i for i in range(CFG['BATCH_SIZE'])]
            batch_preds_score = []
            batch_preds_label = []
            for model in models :
                model.eval()
                pred = model(img)
                pred = pred.max(1)
                batch_preds_score.append(pred[0].detach().cpu().numpy())
                batch_preds_label.append(pred[1].detach().cpu().numpy())

            batch_preds_label = np.array(batch_preds_label)
            
            best_score_ind = np.argmax(batch_preds_score, axis=0)
            model_preds += batch_preds_label[best_score_ind[batch_len], batch_len].tolist()

    return model_preds

In [21]:
def ensemble_predict_softVoting(models, test_loader, device):
    model_preds = []
    with torch.no_grad():
        for img in tqdm(iter(test_loader)):
            img = img.to(device)
            
            batch_len=[i for i in range(CFG['BATCH_SIZE'])]
            batch_preds_score = []
            for model in models :
                model.eval()
                pred = model(img)
                batch_preds_score.append(pred.detach().cpu().numpy())

            batch_preds_score = np.mean(np.array(batch_preds_score), axis=0)
#             print("batch_preds_score.shape : ", batch_preds_score.shape)
#             best_score_ind = np.argmax(batch_preds_score, axis=1)
#             print("best_score_ind.shape : ",best_score_ind.shape)
            model_preds += batch_preds_score.tolist()
            
    return model_preds

In [18]:
test_df = pd.read_csv('./data/sample_submission.csv')

img_set, _, transform = image_label_dataset(df_path='./data/train.csv', div=0.8, grid_shuffle_p=0, training=False)

test_loader = custom_dataload(img_set, None, transform, False)

In [19]:

checkpoint_1 = torch.load('./ckpt/69E-val0.9611-4fold_3fold_2fold_1fold_0fold_scratch-weigt_freeze10E-mixup25E-grid_shuffle35E-focal-effib0.pth')
checkpoint_2 = torch.load('./ckpt/69E-val0.9571-3fold_2fold_1fold_0fold_scratch-weigt_freeze10E-mixup25E-grid_shuffle35E-focal-effib0.pth')
checkpoint_3 = torch.load('./ckpt/65E-val0.9589-2fold_1fold_0fold_scratch-weigt_freeze10E-mixup25E-grid_shuffle35E-focal-effib0.pth')
checkpoint_4 = torch.load('./ckpt/69E-val0.9545-1fold_0fold_scratch-weigt_freeze10E-mixup25E-grid_shuffle35E-focal-effib0.pth')
checkpoint_5 = torch.load('./ckpt/67E-val0.9615-0fold_scratch-weigt_freeze10E-mixup25E-grid_shuffle35E-focal-effib0.pth')

model_1 = CNN(CFG['model_name']).to(device)
model_2 = CNN(CFG['model_name']).to(device)
model_3 = CNN(CFG['model_name']).to(device)
model_4 = CNN(CFG['model_name']).to(device)
model_5 = CNN(CFG['model_name']).to(device)

model_1.load_state_dict(checkpoint_1['model_state_dict'])
model_2.load_state_dict(checkpoint_2['model_state_dict'])
model_3.load_state_dict(checkpoint_3['model_state_dict'])
model_4.load_state_dict(checkpoint_4['model_state_dict'])
model_5.load_state_dict(checkpoint_5['model_state_dict'])




<All keys matched successfully>

In [19]:
preds = ensemble_predict_hardVoting([model_1, model_2, model_3, model_4, model_5], test_loader, device)

100%|████████████████████████████████████████████████████████████████████████████████| 625/625 [03:47<00:00,  2.75it/s]


In [22]:
preds = ensemble_predict_softVoting([model_1, model_2, model_3, model_4, model_5], test_loader, device)

100%|████████████████████████████████████████████████████████████████████████████████| 625/625 [04:17<00:00,  2.42it/s]


In [23]:
test_df['label'] = preds

test_df.to_csv('./submission/ensemble_softVoting-5fold-4fval0.9611-3fval0.9571-2fval0.9589-1fval0.9545-0fval0.9615.csv', index=False)

In [23]:
np_preds = np.array(preds)
np_preds.shape

(40000, 10)

In [28]:
np.save("ensemble_softVoting-5fold.npy", np_preds)