In [1]:
#label_slr = label_slr*n+label_r/(n+1)
#n = 3

#ensemble learner: 0.7*trainPi + 0.3*oofPi

## Prepare Lib

In [2]:
import os
print(os.listdir("../../input"))
os.environ["CUDA_VISIBLE_DEVICES"] = '1'

['regular-fundus-training.7z', 'Readme.txt', 'regular-fundus-training.csv', 'ultra-widefield-training.7z', 'ultra-widefield-validation', 'ultra-widefield-validation.csv', 'regular-fundus-validation.7z', 'ultra-widefield-validation.7z', 'old', 'Readme.docx', 'ultra-widefield-training', 'regular-fundus-validation.csv', 'regular-fundus-validation', 'ultra-widefield-training.csv', 'regular-fundus-training']


In [3]:
import cv2
import matplotlib.pyplot as plt
from os.path import isfile
import torch.nn.init as init
import torch
import torch.nn as nn
import numpy as np
import pandas as pd 
from PIL import Image, ImageFilter
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder, OneHotEncoder
from torch.utils.data import Dataset
from torchvision import transforms
from torch.optim import Adam, SGD, RMSprop
import time
import math
from torch.nn.parameter import Parameter
from torch.autograd import Variable
import torch.functional as F
from tqdm import tqdm
from sklearn import metrics
import urllib
import pickle
import cv2
import torch.nn.functional as F
from torchvision import models
import seaborn as sns
import random
import sys
import shutil
import albumentations
from albumentations import pytorch as AT

from apex import amp
from efficientnet_pytorch import EfficientNet
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

from torchcontrib.optim import SWA

# torch.backends.cudnn.benchmark = True

In [4]:
import scipy.special

SEED = 42
base_dir = '../../input'
def seed_everything(seed=SEED):
    random.seed(seed)
    os.environ['PYHTONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(SEED)

def l2_norm(input,axis=1):
    norm = torch.norm(input,2,axis,True)
    output = torch.div(input, norm)
    return output

sigmoid = lambda x: scipy.special.expit(x)

In [5]:
# visualize tools
def visualize(**images):
    """PLot images in one row."""
    n = len(images)
    plt.figure(figsize=(16, 5))
    for i, (name, image) in enumerate(images.items()):
        plt.subplot(1, n, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.title(' '.join(name.split('_')).title())
        plt.imshow(image)
    plt.show()
    
def test_transform(img_path, transform):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = transform(image = img)['image']
    visualize(image = img)
    
def write_aug(img_path, transform, num=30):
    img = cv2.imread(img_path)
    for i in range(num):
        t = transform(image = img)['image']
        cv2.imwrite('./aug/'+str(i)+'.jpg',t)

## Param

In [6]:
FOLD = 5
BATCH_SIZE = 8
ACCUMULATE = 4
LR = 1e-3
EPOCH = 25
IMG_SIZE = 384

In [7]:
EXP = 6
# while os.path.exists('./exp/exp%d'%EXP):
#     EXP+=1
# os.makedirs('./exp/exp%d'%EXP)

## Prepare Data

In [8]:
train_df = pd.read_csv( base_dir + '/regular-fundus-training.csv')
val_df = pd.read_csv( base_dir + '/regular-fundus-validation.csv')

In [9]:
df = pd.concat([train_df,val_df],axis=0,sort=True)
df.head()

Unnamed: 0,Artifact,Clarity,Field definition,Overall quality,image_id,image_path,left_eye_DR_Level,patient_DR_Level,patient_id,right_eye_DR_Level
0,4,8,8,0,1_l1,/regular-fundus-training/1/1_l1.jpg,0.0,0,1,
1,0,8,8,0,1_l2,/regular-fundus-training/1/1_l2.jpg,0.0,0,1,
2,4,8,8,0,1_r1,/regular-fundus-training/1/1_r1.jpg,,0,1,0.0
3,0,10,8,0,1_r2,/regular-fundus-training/1/1_r2.jpg,,0,1,0.0
4,6,10,8,0,2_l1,/regular-fundus-training/2/2_l1.jpg,2.0,2,2,


In [10]:
train_df.fillna(False, inplace=True)
val_df.fillna(False, inplace=True)
df.fillna(False, inplace=True)
df.head()

Unnamed: 0,Artifact,Clarity,Field definition,Overall quality,image_id,image_path,left_eye_DR_Level,patient_DR_Level,patient_id,right_eye_DR_Level
0,4,8,8,0,1_l1,/regular-fundus-training/1/1_l1.jpg,0,0,1,False
1,0,8,8,0,1_l2,/regular-fundus-training/1/1_l2.jpg,0,0,1,False
2,4,8,8,0,1_r1,/regular-fundus-training/1/1_r1.jpg,False,0,1,0
3,0,10,8,0,1_r2,/regular-fundus-training/1/1_r2.jpg,False,0,1,0
4,6,10,8,0,2_l1,/regular-fundus-training/2/2_l1.jpg,2,2,2,False


In [11]:
class DRDataset(Dataset):
    
    def __init__(self, dataframe, transform=None):
        self.df = dataframe
        self.transform = transform
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):

        label_l = self.df.left_eye_DR_Level.values[idx]
        label_r = self.df.right_eye_DR_Level.values[idx]
        label = label_l + label_r
        label = np.expand_dims(label, -1)

        path = base_dir + self.df.image_path[idx]
        image = cv2.imread(path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform:
            image = self.transform(image=image)
            
        image = image['image']
            
        return image, label
    
class DRDataset_refine(Dataset):
    
    def __init__(self, dataframe, refine, transform=None):
        self.df = dataframe
        self.refine = refine
        self.transform = transform
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):

        label = self.refine[idx]
        #label = np.expand_dims(label, -1)

        path = base_dir + self.df.image_path[idx]
        image = cv2.imread(path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform:
            image = self.transform(image=image)
            
        image = image['image']
            
        return image, label

In [12]:
train_transform = albumentations.Compose([
    albumentations.Resize(IMG_SIZE, IMG_SIZE),
    albumentations.RandomRotate90(p=0.5),
    albumentations.Transpose(p=0.5),
    albumentations.Flip(p=0.5),
    albumentations.OneOf([
        albumentations.RandomBrightness(0.15, p=1), 
        albumentations.RandomContrast(0.15, p=1),
        albumentations.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=20, p=1),
    ], p=0.5), 
    albumentations.OneOf([
        albumentations.ISONoise(color_shift=(0.01, 0.03), intensity=(0.1, 0.3)),
        albumentations.IAASharpen(alpha=(0.1, 0.3), lightness=(0.5, 1.0)),
    ], p=0.5), 
    albumentations.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=45, border_mode=1, p=0.5),
    albumentations.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    AT.ToTensor(),
    ])

train_transform_advprop = albumentations.Compose([
    albumentations.Resize(IMG_SIZE, IMG_SIZE),
    albumentations.RandomRotate90(p=0.5),
    albumentations.Transpose(p=0.5),
    albumentations.Flip(p=0.5),
    albumentations.OneOf([
        albumentations.RandomBrightness(0.15, p=1), 
        albumentations.RandomContrast(0.15, p=1),
        albumentations.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=20, p=1),
    ], p=0.5), 
    albumentations.OneOf([
        albumentations.ISONoise(color_shift=(0.01, 0.03), intensity=(0.1, 0.3)),
        albumentations.IAASharpen(alpha=(0.1, 0.3), lightness=(0.5, 1.0)),
    ], p=0.5), 
    albumentations.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=45, border_mode=1, p=0.5),
    albumentations.Lambda(lambda img: img * 2.0 - 1.0),
    AT.ToTensor(),
    ])


test_transform = albumentations.Compose([
    albumentations.Resize(IMG_SIZE, IMG_SIZE),
    albumentations.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    AT.ToTensor(),
    ])

  "Using lambda is incompatible with multiprocessing. "


In [13]:
if FOLD == 1:
    sfolder = StratifiedKFold(n_splits=5,random_state=SEED,shuffle=True)
else:
    sfolder = StratifiedKFold(n_splits=FOLD,random_state=SEED,shuffle=True)

tr_idx = []
val_idx = []

for train, test in sfolder.split(range(len(train_df)//4),list(train_df[::4].patient_DR_Level)):
    tr = []
    val = []
    for i in train:
        tr.append(i*4)
        tr.append(i*4+1)
        tr.append(i*4+2)
        tr.append(i*4+3)
    for i in test:
        val.append(i*4)
        val.append(i*4+1)
        val.append(i*4+2)
        val.append(i*4+3)
    tr_idx.append(tr)
    val_idx.append(val)
    print('Train: %s | test: %s' % (len(tr), len(val)))
    if FOLD==1:
        break

Train: 960 | test: 240
Train: 960 | test: 240
Train: 960 | test: 240
Train: 960 | test: 240
Train: 960 | test: 240


## Modeling

In [14]:
from utils.ranger import RangerVA 
from utils.lr_scheduler import CosineAnnealingWarmUpRestarts

In [15]:
def gem(x, p=3, eps=1e-6):
    return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)

class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM,self).__init__()
        self.p = Parameter(torch.ones(1)*p)
        self.eps = eps
    def forward(self, x):
        return gem(x, p=self.p, eps=self.eps)       
    def __repr__(self):
        return self.__class__.__name__ + '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + ', ' + 'eps=' + str(self.eps) + ')'

In [16]:
import scipy as sp
from functools import partial
from sklearn import metrics
from collections import Counter
import json

class OptimizedRounder(object):
    def __init__(self):
        self.coef_ = 0

    def _kappa_loss(self, coef, X, y):
        X_p = np.copy(X)
        for i, pred in enumerate(X_p):
            if pred < coef[0]:
                X_p[i] = 0
            elif pred >= coef[0] and pred < coef[1]:
                X_p[i] = 1
            elif pred >= coef[1] and pred < coef[2]:
                X_p[i] = 2
            elif pred >= coef[2] and pred < coef[3]:
                X_p[i] = 3
            else:
                X_p[i] = 4
        
        ll = metrics.cohen_kappa_score(y, X_p, weights='quadratic')
        return -ll
    
    def fit(self, X, y):
        loss_partial = partial(self._kappa_loss, X=X, y=y)
        initial_coef = [0.5, 1.5, 2.5, 3.5]
        self.coef_ = sp.optimize.minimize(loss_partial, initial_coef, method='nelder-mead')
        
    def predict(self, X, coef):
        X_p = np.copy(X)
        for i, pred in enumerate(X_p):
            if pred < coef[0]:
                X_p[i] = 0
            elif pred >= coef[0] and pred < coef[1]:
                X_p[i] = 1
            elif pred >= coef[1] and pred < coef[2]:
                X_p[i] = 2
            elif pred >= coef[2] and pred < coef[3]:
                X_p[i] = 3
            else:
                X_p[i] = 4
        return X_p

    def coefficients(self):
        return self.coef_['x']
    
def find_threshold(valid_predictions, targets):
    optR = OptimizedRounder()
    #optR.fit(valid_predictions, targets)
    #coefficients = optR.coefficients()
    coefficients = [0.5,1.5,2.5,3.5]
    valid_predictions = optR.predict(valid_predictions, coefficients)
    
    return coefficients, valid_predictions

from sklearn.metrics import cohen_kappa_score
def quadratic_kappa(y_hat, y):
    return cohen_kappa_score(np.argmax(y_hat,1), np.argmax(y,1), weights='quadratic')

def quadratic_kappa_v2(y_hat, y):
    return cohen_kappa_score(y_hat, y, weights='quadratic')

In [17]:
def train_model(epoch):
    model_conv.train()         
    avg_loss = 0.
    optimizer.zero_grad()
    for idx, (imgs, labels) in enumerate(train_loader):
        imgs_train, labels_train = imgs.cuda(), labels.float().cuda()
        output_train = model_conv(imgs_train)
        loss = criterion(output_train,labels_train)
        #loss.backward()
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
        if ((idx+1)%ACCUMULATE==0):
            torch.nn.utils.clip_grad_norm_(model_conv.parameters(), max_norm=5.0, norm_type=2)
            optimizer.step()
            optimizer.zero_grad()
            scheduler.step()
        avg_loss += loss.item() / len(train_loader)  
    return avg_loss

def test_model():    
    avg_val_loss = 0.
    model_conv.eval()
    y_pred_val = np.zeros((len(valset), 1))
    y_true_val = np.zeros((len(valset), 1))
    with torch.no_grad():
        for idx, (imgs, labels) in enumerate(val_loader):
            imgs_vaild, labels_vaild = imgs.cuda(), labels.float().cuda()
            output_test = model_conv(imgs_vaild)
            avg_val_loss += (criterion_test(output_test, labels_vaild).item() / len(val_loader)) 
            a = labels_vaild.detach().cpu().numpy().astype(np.int)
            b = output_test.detach().cpu().numpy()

            y_pred_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = b
            y_true_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = a

    thres, y_pred_val = find_threshold(y_pred_val, y_true_val)
    print('threshould:', thres)
    metric_val = quadratic_kappa_v2(np.squeeze(y_pred_val), np.squeeze(y_true_val))
    print('qwk:', metric_val)
    return avg_val_loss, metric_val

In [18]:
def train(fold):
    best_avg_loss = 100.0
    best_avg_qwk = 0.0
    saved_qwk = 0.0

    # avg_val_loss, avg_val_qwk = test_model()
    # print('pretrain val loss %.4f'%avg_val_loss)        

    ### training
    for epoch in range(EPOCH):   
        print('lr:', scheduler.get_lr()[0]) 
        start_time   = time.time()
        avg_loss     = train_model(epoch)
        avg_val_loss, avg_val_qwk = test_model()
        elapsed_time = time.time() - start_time 
        print('Epoch {}/{} \t loss={:.4f} \t val_loss={:.4f} \t time={:.2f}s'.format(
            epoch + 1, EPOCH, avg_loss, avg_val_loss, elapsed_time))

        if avg_val_loss < best_avg_loss:
            best_avg_loss = avg_val_loss
            
        if avg_val_qwk > best_avg_qwk:
            best_avg_qwk = avg_val_qwk
            torch.save(model_conv.state_dict(), './exp/exp' + str(EXP) + '/efficientnet-b5-best' + str(fold) + '.pth')
            #torch.save(model_conv, './exp/exp' + str(EXP) + '/efficientnet-b5-best' + str(FOLD) + '.pth')
            print('model saved!')
            saved_loss = avg_val_loss

        print('=================================')   

    print('best loss:', best_avg_loss)
    print('best qwk:', best_avg_qwk)
    print('p:', model_conv._avg_pooling.p)
    #optimizer.swap_swa_sgd()
    #torch.save(model_conv.state_dict(), './exp/exp' + str(EXP) + '/efficientnet-b5-swa' + str(FOLD) + '.pth')
    
    return saved_loss, best_avg_qwk

In [19]:
log = open('./exp/exp' + str(EXP) +'/log.txt', 'w')
# cv_losses = []
# cv_metrics = []

# for fold in range(FOLD):
#     print('\n ********** Fold %d **********\n'%fold)
#     ###################### Dataset #######################
#     trainset     = DRDataset(train_df.iloc[tr_idx[fold]].reset_index(), transform =train_transform)
#     train_loader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

#     valset       = DRDataset(train_df.iloc[val_idx[fold]].reset_index(), transform   =test_transform)
#     val_loader   = torch.utils.data.DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

#     ####################### Model ########################
#     model_conv = EfficientNet.from_pretrained('efficientnet-b5')
#     #model_conv = EfficientNet.from_pretrained("efficientnet-b5", advprop=True)
#     #model_conv.load_state_dict(torch.load(''))
#     #model_conv.cuda()
#     model_conv._avg_pooling = GeM(p=3.0)
#     model_conv._fc = nn.Linear(2048, 1)
#     model_conv._dropout = nn.Dropout(p=0.5)
#     model_conv.cuda()

#     ###################### Optim ########################
#     optimizer = torch.optim.AdamW(model_conv.parameters(), lr=LR/10.0, weight_decay=1e-4)
#     #optimizer = torch.optim.SGD(model_conv.parameters(), lr=LR, weight_decay=1e-5)
#     #optimizer = RangerVA(model_conv.parameters(), lr=LR*10, weight_decay=1e-5)

#     #optimizer = SWA(optimizer, swa_start=1600, swa_freq=50, swa_lr=2e-5)

#     criterion = nn.SmoothL1Loss()
#     criterion_test = nn.SmoothL1Loss()
#     #criterion_test = nn.MSELoss()

#     T = len(train_loader)//ACCUMULATE * 5 # cycle
#     #lr = CosineAnnealingWarmUpRestarts(0, T_0=T, T_warmup=T//10, gamma=0.66)
#     #optimizer.param_groups[0]['lr'] = lr_rate * lr
#     scheduler = CosineAnnealingWarmUpRestarts(optimizer, T_0=T, T_mult=1, eta_max=LR, T_up=T//5, gamma=0.8)
#     scheduler.step()
#     # print(scheduler.get_lr()[0])

#     model_conv, optimizer = amp.initialize(model_conv, optimizer, opt_level="O1",verbosity=0)
    
#     val_loss, val_qwk = train(fold)
    
#     cv_losses.append(val_loss)
#     cv_metrics.append(val_qwk)
#     log.write('[Fold%d] val loss:%.5f, val qwk:%.5f; \n'%(fold, val_loss, val_qwk))

# cv_loss = sum(cv_losses)/FOLD
# cv_qwk = sum(cv_metrics)/FOLD    
# print('CV loss:%.6f  CV qwk:%.6f'%(cv_loss, cv_qwk))
# log.write('CV loss:%.6f  CV qwk:%.6f\n\n'%(cv_loss, cv_qwk))

## Test

In [20]:
torch.cuda.empty_cache()

In [21]:
valset       = DRDataset(val_df, transform   =test_transform)
val_loader   = torch.utils.data.DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

In [22]:
pred_list = []
metric_val_list = []
for fold in range(FOLD):
    print('*********Fold%d************'%fold)
    model_conv = EfficientNet.from_name('efficientnet-b5')
    model_conv._avg_pooling = GeM()
    model_conv._fc = nn.Linear(2048, 1)
    model_conv._dropout = nn.Dropout(p=0.5)

    model_conv.load_state_dict(torch.load('./exp/exp' + str(EXP) + '/efficientnet-b5-best' + str(fold) + '.pth'))
    model_conv.cuda()
    model_conv.eval()

    y_pred_val = np.zeros((len(val_df), 1))
    y_true_val = np.zeros((len(val_df), 1))
    with torch.no_grad():
        for idx, (imgs, labels) in enumerate(val_loader):
            imgs_vaild, labels_vaild = imgs.cuda(), labels.float().cuda()
            output_test = model_conv(imgs_vaild) 
            a = labels_vaild.detach().cpu().numpy().astype(np.int)
            b = output_test.detach().cpu().numpy()

            y_pred_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = b
            y_true_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = a

    pred_list.append(y_pred_val)
    thres, pred = find_threshold(y_pred_val, y_true_val)
    print('threshould:', thres)
    metric_val = quadratic_kappa_v2(np.squeeze(pred), np.squeeze(y_true_val))
    print('qwk:', metric_val)
    log.write('Test model %d... qwk:%.5f \n'%(fold, metric_val))    
    metric_val_list.append(metric_val)
    
avg_qwk = sum(metric_val_list)/FOLD    
log.write('================\n Test average qwk is %.6f\n'%avg_qwk)
print('Average test qwk;',avg_qwk)

log.write('\n\n')

*********Fold0************


Traceback (most recent call last):
  File "/home/fangxi/miniconda3/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/home/fangxi/miniconda3/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/home/fangxi/miniconda3/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/home/fangxi/miniconda3/lib/python3.7/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe


KeyboardInterrupt: 

# refinery

In [23]:
# trainset_     = DRDataset(train_df, transform = test_transform)
# train_loader_ = torch.utils.data.DataLoader(trainset_, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

oof_list = []
gt_list = []
for fold in range(FOLD):
    valset       = DRDataset(train_df.iloc[val_idx[fold]].reset_index(), transform   =test_transform)
    val_loader   = torch.utils.data.DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
    print('*********Fold%d************'%fold)
    model_conv = EfficientNet.from_name('efficientnet-b5')
    model_conv._avg_pooling = GeM()
    model_conv._fc = nn.Linear(2048, 1)
    model_conv._dropout = nn.Dropout(p=0.5)

    model_conv.load_state_dict(torch.load('./exp/exp' + str(EXP) + '/efficientnet-b5-best' + str(fold) + '.pth'))
    model_conv.cuda()
    model_conv.eval()

    y_pred_val = np.zeros((len(train_df), 1))
    y_true_val = np.zeros((len(train_df), 1))
    with torch.no_grad():
        for idx, (imgs, labels) in enumerate(val_loader):
            imgs_vaild, labels_vaild = imgs.cuda(), labels.float().cuda()
            output_test = model_conv(imgs_vaild) 
            a = labels_vaild.detach().cpu().numpy().astype(np.int)
            b = output_test.detach().cpu().numpy()

            y_pred_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = b
            y_true_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = a
    
    oof_list.append(y_pred_val)
    gt_list.append(y_true_val)

*********Fold0************
*********Fold1************
*********Fold2************
*********Fold3************
*********Fold4************


In [24]:
oof = np.zeros((len(train_df), 1))
gt = np.zeros((len(train_df), 1))
for fold in range(FOLD):
    for i,idx in enumerate(val_idx[fold]):
        oof[idx] = oof_list[fold][i]
        gt[idx] = gt_list[fold][i]

In [25]:
refine1 = (oof + 3.0*gt)/(3.0+1)

In [26]:
cv_losses = []
cv_metrics = []

for fold in range(FOLD):
    print('\n ********** Fold %d **********\n'%fold)
    ###################### Dataset #######################
    trainset     = DRDataset_refine(train_df.iloc[tr_idx[fold]].reset_index(), refine1[tr_idx[fold]], transform =train_transform)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

    valset       = DRDataset(train_df.iloc[val_idx[fold]].reset_index(), transform   =test_transform)
    val_loader   = torch.utils.data.DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

    ####################### Model ########################
    model_conv = EfficientNet.from_pretrained('efficientnet-b5')
    #model_conv = EfficientNet.from_pretrained("efficientnet-b5", advprop=True)
    #model_conv.load_state_dict(torch.load(''))
    #model_conv.cuda()
    model_conv._avg_pooling = GeM(p=3.0)
    model_conv._fc = nn.Linear(2048, 1)
    model_conv._dropout = nn.Dropout(p=0.5)
    model_conv.cuda()

    ###################### Optim ########################
    optimizer = torch.optim.AdamW(model_conv.parameters(), lr=LR/10.0, weight_decay=1e-4)
    #optimizer = torch.optim.SGD(model_conv.parameters(), lr=LR, weight_decay=1e-5)
    #optimizer = RangerVA(model_conv.parameters(), lr=LR*10, weight_decay=1e-5)

    #optimizer = SWA(optimizer, swa_start=1600, swa_freq=50, swa_lr=2e-5)

    criterion = nn.SmoothL1Loss()
    criterion_test = nn.SmoothL1Loss()
    #criterion_test = nn.MSELoss()

    T = len(train_loader)//ACCUMULATE * 5 # cycle
    #lr = CosineAnnealingWarmUpRestarts(0, T_0=T, T_warmup=T//10, gamma=0.66)
    #optimizer.param_groups[0]['lr'] = lr_rate * lr
    scheduler = CosineAnnealingWarmUpRestarts(optimizer, T_0=T, T_mult=1, eta_max=LR, T_up=T//5, gamma=0.8)
    scheduler.step()
    # print(scheduler.get_lr()[0])

    model_conv, optimizer = amp.initialize(model_conv, optimizer, opt_level="O1",verbosity=0)
    
    val_loss, val_qwk = train(fold)
    
    cv_losses.append(val_loss)
    cv_metrics.append(val_qwk)
    log.write('[Fold%d] val loss:%.5f, val qwk:%.5f; \n'%(fold, val_loss, val_qwk))

cv_loss = sum(cv_losses)/FOLD
cv_qwk = sum(cv_metrics)/FOLD    
print('CV loss:%.6f  CV qwk:%.6f'%(cv_loss, cv_qwk))
log.write('CV loss:%.6f  CV qwk:%.6f\n\n'%(cv_loss, cv_qwk))

torch.cuda.empty_cache()
valset       = DRDataset(val_df, transform   =test_transform)
val_loader   = torch.utils.data.DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
pred_list = []
metric_val_list = []
for fold in range(FOLD):
    print('*********Fold%d************'%fold)
    model_conv = EfficientNet.from_name('efficientnet-b5')
    model_conv._avg_pooling = GeM()
    model_conv._fc = nn.Linear(2048, 1)
    model_conv._dropout = nn.Dropout(p=0.5)

    model_conv.load_state_dict(torch.load('./exp/exp' + str(EXP) + '/efficientnet-b5-best' + str(fold) + '.pth'))
    model_conv.cuda()
    model_conv.eval()

    y_pred_val = np.zeros((len(val_df), 1))
    y_true_val = np.zeros((len(val_df), 1))
    with torch.no_grad():
        for idx, (imgs, labels) in enumerate(val_loader):
            imgs_vaild, labels_vaild = imgs.cuda(), labels.float().cuda()
            output_test = model_conv(imgs_vaild) 
            a = labels_vaild.detach().cpu().numpy().astype(np.int)
            b = output_test.detach().cpu().numpy()

            y_pred_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = b
            y_true_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = a

    pred_list.append(y_pred_val)
    thres, pred = find_threshold(y_pred_val, y_true_val)
    print('threshould:', thres)
    metric_val = quadratic_kappa_v2(np.squeeze(pred), np.squeeze(y_true_val))
    print('qwk:', metric_val)
    metric_val_list.append(metirc_val)
    log.write('Test model %d... qwk:%.5f \n'%(fold, metric_val))    
    
avg_qwk = sum(metric_val_list)/FOLD        
log.write('================\n Test average qwk is %.6f\n'%avg_qwk)
print('Average test qwk;',avg_qwk)

log.write('\n\n')

trainset_     = DRDataset(train_df, transform = test_transform)
train_loader_ = torch.utils.data.DataLoader(trainset_, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

oof_list = []
gt_list = []
for fold in range(FOLD):
    valset       = DRDataset(train_df.iloc[val_idx[fold]].reset_index(), transform   =test_transform)
    val_loader   = torch.utils.data.DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
    print('*********Fold%d************'%fold)
    model_conv = EfficientNet.from_name('efficientnet-b5')
    model_conv._avg_pooling = GeM()
    model_conv._fc = nn.Linear(2048, 1)
    model_conv._dropout = nn.Dropout(p=0.5)

    model_conv.load_state_dict(torch.load('./exp/exp' + str(EXP) + '/efficientnet-b5-best' + str(fold) + '.pth'))
    model_conv.cuda()
    model_conv.eval()

    y_pred_val = np.zeros((len(train_df), 1))
    y_true_val = np.zeros((len(train_df), 1))
    with torch.no_grad():
        for idx, (imgs, labels) in enumerate(val_loader):
            imgs_vaild, labels_vaild = imgs.cuda(), labels.float().cuda()
            output_test = model_conv(imgs_vaild) 
            a = labels_vaild.detach().cpu().numpy().astype(np.int)
            b = output_test.detach().cpu().numpy()

            y_pred_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = b
            y_true_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = a
    
    oof_list.append(y_pred_val)
    gt_list.append(y_true_val)
    
oof = np.zeros((len(train_df), 1))
gt = np.zeros((len(train_df), 1))
for fold in range(FOLD):
    for i,idx in enumerate(val_idx[fold]):
        oof[idx] = oof_list[fold][i]
        gt[idx] = gt_list[fold][i]
        
refine2 = (oof + 3.0*refine1)/(3.0+1)

del model_conv


 ********** Fold 0 **********

Loaded pretrained weights for efficientnet-b5
lr: 0.0001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.6156382477626001
Epoch 1/25 	 loss=0.5965 	 val_loss=0.4450 	 time=63.99s
model saved!
lr: 0.001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.6904444713576359
Epoch 2/25 	 loss=0.3759 	 val_loss=0.3377 	 time=64.06s
model saved!
lr: 0.0008681980515339464
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.805783340526543
Epoch 3/25 	 loss=0.2959 	 val_loss=0.2378 	 time=63.97s
model saved!
lr: 0.0005500000000000001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8049615055603079
Epoch 4/25 	 loss=0.2832 	 val_loss=0.2331 	 time=63.89s
lr: 0.00023180194846605365
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8288949465296708
Epoch 5/25 	 loss=0.2170 	 val_loss=0.2244 	 time=64.00s
model saved!
lr: 0.0001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7648248830446327
Epoch 6/25 	 loss=0.1880 	 val_loss=0.2802 	 time=63.96s
lr: 0.0008
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7507745266781412
Epoch 7/

threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8440754627149913
Epoch 22/25 	 loss=0.0965 	 val_loss=0.2100 	 time=64.70s
model saved!
lr: 0.00036426012972767764
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8440975325836158
Epoch 23/25 	 loss=0.0965 	 val_loss=0.2161 	 time=64.44s
model saved!
lr: 0.00025480000000000007
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8269777050873719
Epoch 24/25 	 loss=0.0944 	 val_loss=0.2127 	 time=64.38s
lr: 0.00014533987027232247
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8447100661859133
Epoch 25/25 	 loss=0.0730 	 val_loss=0.2072 	 time=64.35s
model saved!
best loss: 0.2031598331406712
best qwk: 0.8447100661859133
p: Parameter containing:
tensor([2.9435], device='cuda:0', requires_grad=True)

 ********** Fold 2 **********

Loaded pretrained weights for efficientnet-b5
lr: 0.0001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.5479358580689184
Epoch 1/25 	 loss=0.5991 	 val_loss=0.4266 	 time=64.76s
model saved!
lr: 0.001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.5014320921867714

threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7779905972488247
Epoch 17/25 	 loss=0.1177 	 val_loss=0.2822 	 time=65.01s
lr: 0.00045166399692442885
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7138964577656676
Epoch 18/25 	 loss=0.1146 	 val_loss=0.3076 	 time=65.07s
lr: 0.0003060000000000001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8251904975347377
Epoch 19/25 	 loss=0.1293 	 val_loss=0.2395 	 time=65.03s
model saved!
lr: 0.00016033600307557123
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.757828439634525
Epoch 20/25 	 loss=0.0805 	 val_loss=0.2772 	 time=65.30s
lr: 0.0001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7407458755025649
Epoch 21/25 	 loss=0.0826 	 val_loss=0.3139 	 time=64.77s
lr: 0.0004096000000000001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7530633242914602
Epoch 22/25 	 loss=0.0919 	 val_loss=0.2795 	 time=65.36s
lr: 0.00036426012972767764
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7844883210736869
Epoch 23/25 	 loss=0.0957 	 val_loss=0.2622 	 time=65.03s
lr: 0.00025480000000000007
threshould: 

In [27]:
np.save('./exp/exp' + str(EXP) + '/refine2.npy',refine2)

In [22]:
refine2 = np.load('./exp/exp' + str(EXP) + '/refine2.npy')

In [23]:
refine2

array([[ 0.0163333 ],
       [-0.00443362],
       [ 0.00442136],
       ...,
       [ 0.82298497],
       [ 0.23720761],
       [ 0.29910083]])

In [24]:
cv_losses = []
cv_metrics = []

for fold in range(FOLD):
    print('\n ********** Fold %d **********\n'%fold)
    ###################### Dataset #######################
    trainset     = DRDataset_refine(train_df.iloc[tr_idx[fold]].reset_index(), refine2[tr_idx[fold]], transform =train_transform)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

    valset       = DRDataset(train_df.iloc[val_idx[fold]].reset_index(), transform   =test_transform)
    val_loader   = torch.utils.data.DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

    ####################### Model ########################
    model_conv = EfficientNet.from_pretrained('efficientnet-b5')
    #model_conv = EfficientNet.from_pretrained("efficientnet-b5", advprop=True)
    #model_conv.load_state_dict(torch.load(''))
    #model_conv.cuda()
    model_conv._avg_pooling = GeM(p=3.0)
    model_conv._fc = nn.Linear(2048, 1)
    model_conv._dropout = nn.Dropout(p=0.5)
    model_conv.cuda()

    ###################### Optim ########################
    optimizer = torch.optim.AdamW(model_conv.parameters(), lr=LR/10.0, weight_decay=1e-4)
    #optimizer = torch.optim.SGD(model_conv.parameters(), lr=LR, weight_decay=1e-5)
    #optimizer = RangerVA(model_conv.parameters(), lr=LR*10, weight_decay=1e-5)

    #optimizer = SWA(optimizer, swa_start=1600, swa_freq=50, swa_lr=2e-5)

    criterion = nn.SmoothL1Loss()
    criterion_test = nn.SmoothL1Loss()
    #criterion_test = nn.MSELoss()

    T = len(train_loader)//ACCUMULATE * 5 # cycle
    #lr = CosineAnnealingWarmUpRestarts(0, T_0=T, T_warmup=T//10, gamma=0.66)
    #optimizer.param_groups[0]['lr'] = lr_rate * lr
    scheduler = CosineAnnealingWarmUpRestarts(optimizer, T_0=T, T_mult=1, eta_max=LR, T_up=T//5, gamma=0.8)
    scheduler.step()
    # print(scheduler.get_lr()[0])

    model_conv, optimizer = amp.initialize(model_conv, optimizer, opt_level="O1",verbosity=0)
    
    val_loss, val_qwk = train(fold)
    
    cv_losses.append(val_loss)
    cv_metrics.append(val_qwk)
    log.write('[Fold%d] val loss:%.5f, val qwk:%.5f; \n'%(fold, val_loss, val_qwk))

cv_loss = sum(cv_losses)/FOLD
cv_qwk = sum(cv_metrics)/FOLD    
print('CV loss:%.6f  CV qwk:%.6f'%(cv_loss, cv_qwk))
log.write('CV loss:%.6f  CV qwk:%.6f\n\n'%(cv_loss, cv_qwk))

torch.cuda.empty_cache()
valset       = DRDataset(val_df, transform   =test_transform)
val_loader   = torch.utils.data.DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
pred_list = []
metric_val_list = []
for fold in range(FOLD):
    print('*********Fold%d************'%fold)
    model_conv = EfficientNet.from_name('efficientnet-b5')
    model_conv._avg_pooling = GeM()
    model_conv._fc = nn.Linear(2048, 1)
    model_conv._dropout = nn.Dropout(p=0.5)

    model_conv.load_state_dict(torch.load('./exp/exp' + str(EXP) + '/efficientnet-b5-best' + str(fold) + '.pth'))
    model_conv.cuda()
    model_conv.eval()

    y_pred_val = np.zeros((len(val_df), 1))
    y_true_val = np.zeros((len(val_df), 1))
    with torch.no_grad():
        for idx, (imgs, labels) in enumerate(val_loader):
            imgs_vaild, labels_vaild = imgs.cuda(), labels.float().cuda()
            output_test = model_conv(imgs_vaild) 
            a = labels_vaild.detach().cpu().numpy().astype(np.int)
            b = output_test.detach().cpu().numpy()

            y_pred_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = b
            y_true_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = a

    pred_list.append(y_pred_val)
    thres, pred = find_threshold(y_pred_val, y_true_val)
    print('threshould:', thres)
    metric_val = quadratic_kappa_v2(np.squeeze(pred), np.squeeze(y_true_val))
    print('qwk:', metric_val)
    metric_val_list.append(metirc_val)
    log.write('Test model %d... qwk:%.5f \n'%(fold, metric_val))    
    
avg_qwk = sum(metric_val_list)/FOLD        
log.write('================\n Test average qwk is %.6f\n'%avg_qwk)
print('Average test qwk;',avg_qwk)

log.write('\n\n')

trainset_     = DRDataset(train_df, transform = test_transform)
train_loader_ = torch.utils.data.DataLoader(trainset_, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

oof_list = []
gt_list = []
for fold in range(FOLD):
    valset       = DRDataset(train_df.iloc[val_idx[fold]].reset_index(), transform   =test_transform)
    val_loader   = torch.utils.data.DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
    print('*********Fold%d************'%fold)
    model_conv = EfficientNet.from_name('efficientnet-b5')
    model_conv._avg_pooling = GeM()
    model_conv._fc = nn.Linear(2048, 1)
    model_conv._dropout = nn.Dropout(p=0.5)

    model_conv.load_state_dict(torch.load('./exp/exp' + str(EXP) + '/efficientnet-b5-best' + str(fold) + '.pth'))
    model_conv.cuda()
    model_conv.eval()

    y_pred_val = np.zeros((len(train_df), 1))
    y_true_val = np.zeros((len(train_df), 1))
    with torch.no_grad():
        for idx, (imgs, labels) in enumerate(val_loader):
            imgs_vaild, labels_vaild = imgs.cuda(), labels.float().cuda()
            output_test = model_conv(imgs_vaild) 
            a = labels_vaild.detach().cpu().numpy().astype(np.int)
            b = output_test.detach().cpu().numpy()

            y_pred_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = b
            y_true_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = a
    
    oof_list.append(y_pred_val)
    gt_list.append(y_true_val)
    
oof = np.zeros((len(train_df), 1))
gt = np.zeros((len(train_df), 1))
for fold in range(FOLD):
    for i,idx in enumerate(val_idx[fold]):
        oof[idx] = oof_list[fold][i]
        gt[idx] = gt_list[fold][i]
        
refine3 = (oof + 3.0*refine2)/(3.0+1)

del model_conv


 ********** Fold 0 **********

Loaded pretrained weights for efficientnet-b5
lr: 0.0001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.56047197640118
Epoch 1/25 	 loss=0.5881 	 val_loss=0.5070 	 time=62.91s
model saved!
lr: 0.001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7144636850629127
Epoch 2/25 	 loss=0.3736 	 val_loss=0.2861 	 time=62.98s
model saved!
lr: 0.0008681980515339464
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8062111801242237
Epoch 3/25 	 loss=0.2522 	 val_loss=0.2219 	 time=63.05s
model saved!
lr: 0.0005500000000000001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.804165771956195
Epoch 4/25 	 loss=0.1977 	 val_loss=0.2243 	 time=63.37s
lr: 0.00023180194846605365
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8427835051546392
Epoch 5/25 	 loss=0.1738 	 val_loss=0.1955 	 time=63.35s
model saved!
lr: 0.0001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7532871145111164
Epoch 6/25 	 loss=0.1427 	 val_loss=0.2719 	 time=63.38s
lr: 0.0008
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8190843905129619
Epoch 7/25

threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.841643657625467
Epoch 22/25 	 loss=0.0653 	 val_loss=0.2102 	 time=64.04s
lr: 0.00036426012972767764
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8232097256476854
Epoch 23/25 	 loss=0.0717 	 val_loss=0.2106 	 time=64.35s
lr: 0.00025480000000000007
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8588063455998701
Epoch 24/25 	 loss=0.0572 	 val_loss=0.1829 	 time=64.32s
model saved!
lr: 0.00014533987027232247
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8582362325572003
Epoch 25/25 	 loss=0.0493 	 val_loss=0.1874 	 time=64.09s
best loss: 0.17777867281499007
best qwk: 0.8588063455998701
p: Parameter containing:
tensor([2.9476], device='cuda:0', requires_grad=True)

 ********** Fold 2 **********

Loaded pretrained weights for efficientnet-b5
lr: 0.0001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.5063910796845255
Epoch 1/25 	 loss=0.6240 	 val_loss=0.5384 	 time=64.64s
model saved!
lr: 0.001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7485361353974493
Epoch 2/25 	 loss=0.3547 

threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8331891758309651
Epoch 17/25 	 loss=0.0983 	 val_loss=0.2261 	 time=65.24s
model saved!
lr: 0.00045166399692442885
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8383754662246167
Epoch 18/25 	 loss=0.0841 	 val_loss=0.2315 	 time=65.08s
model saved!
lr: 0.0003060000000000001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8220171390903098
Epoch 19/25 	 loss=0.0754 	 val_loss=0.2371 	 time=65.17s
lr: 0.00016033600307557123
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8161748728868802
Epoch 20/25 	 loss=0.0564 	 val_loss=0.2415 	 time=64.79s
lr: 0.0001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8322886667796037
Epoch 21/25 	 loss=0.0614 	 val_loss=0.2300 	 time=65.02s
lr: 0.0004096000000000001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.814563149357766
Epoch 22/25 	 loss=0.0747 	 val_loss=0.2398 	 time=64.81s
lr: 0.00036426012972767764
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7952727437067836
Epoch 23/25 	 loss=0.0547 	 val_loss=0.2584 	 time=65.01s
lr: 0.00025480000000000007

In [25]:
np.save('./exp/exp' + str(EXP) + '/refine3.npy',refine3)

In [26]:
#log.close()

In [23]:
refine3 = np.load('./exp/exp' + str(EXP) + '/refine3.npy')

In [24]:
cv_losses = []
cv_metrics = []

for fold in range(FOLD):
    print('\n ********** Fold %d **********\n'%fold)
    ###################### Dataset #######################
    trainset     = DRDataset_refine(train_df.iloc[tr_idx[fold]].reset_index(), refine3[tr_idx[fold]], transform =train_transform)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

    valset       = DRDataset(train_df.iloc[val_idx[fold]].reset_index(), transform   =test_transform)
    val_loader   = torch.utils.data.DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

    ####################### Model ########################
    model_conv = EfficientNet.from_pretrained('efficientnet-b5')
    #model_conv = EfficientNet.from_pretrained("efficientnet-b5", advprop=True)
    #model_conv.load_state_dict(torch.load(''))
    #model_conv.cuda()
    model_conv._avg_pooling = GeM(p=3.0)
    model_conv._fc = nn.Linear(2048, 1)
    model_conv._dropout = nn.Dropout(p=0.5)
    model_conv.cuda()

    ###################### Optim ########################
    optimizer = torch.optim.AdamW(model_conv.parameters(), lr=LR/10.0, weight_decay=1e-4)
    #optimizer = torch.optim.SGD(model_conv.parameters(), lr=LR, weight_decay=1e-5)
    #optimizer = RangerVA(model_conv.parameters(), lr=LR*10, weight_decay=1e-5)

    #optimizer = SWA(optimizer, swa_start=1600, swa_freq=50, swa_lr=2e-5)

    criterion = nn.SmoothL1Loss()
    criterion_test = nn.SmoothL1Loss()
    #criterion_test = nn.MSELoss()

    T = len(train_loader)//ACCUMULATE * 5 # cycle
    #lr = CosineAnnealingWarmUpRestarts(0, T_0=T, T_warmup=T//10, gamma=0.66)
    #optimizer.param_groups[0]['lr'] = lr_rate * lr
    scheduler = CosineAnnealingWarmUpRestarts(optimizer, T_0=T, T_mult=1, eta_max=LR, T_up=T//5, gamma=0.8)
    scheduler.step()
    # print(scheduler.get_lr()[0])

    model_conv, optimizer = amp.initialize(model_conv, optimizer, opt_level="O1",verbosity=0)
    
    val_loss, val_qwk = train(fold)
    
    cv_losses.append(val_loss)
    cv_metrics.append(val_qwk)
    log.write('[Fold%d] val loss:%.5f, val qwk:%.5f; \n'%(fold, val_loss, val_qwk))

cv_loss = sum(cv_losses)/FOLD
cv_qwk = sum(cv_metrics)/FOLD    
print('CV loss:%.6f  CV qwk:%.6f'%(cv_loss, cv_qwk))
log.write('CV loss:%.6f  CV qwk:%.6f\n\n'%(cv_loss, cv_qwk))

torch.cuda.empty_cache()
valset       = DRDataset(val_df, transform   =test_transform)
val_loader   = torch.utils.data.DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
pred_list = []
metric_val_list = []
for fold in range(FOLD):
    print('*********Fold%d************'%fold)
    model_conv = EfficientNet.from_name('efficientnet-b5')
    model_conv._avg_pooling = GeM()
    model_conv._fc = nn.Linear(2048, 1)
    model_conv._dropout = nn.Dropout(p=0.5)

    model_conv.load_state_dict(torch.load('./exp/exp' + str(EXP) + '/efficientnet-b5-best' + str(fold) + '.pth'))
    model_conv.cuda()
    model_conv.eval()

    y_pred_val = np.zeros((len(val_df), 1))
    y_true_val = np.zeros((len(val_df), 1))
    with torch.no_grad():
        for idx, (imgs, labels) in enumerate(val_loader):
            imgs_vaild, labels_vaild = imgs.cuda(), labels.float().cuda()
            output_test = model_conv(imgs_vaild) 
            a = labels_vaild.detach().cpu().numpy().astype(np.int)
            b = output_test.detach().cpu().numpy()

            y_pred_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = b
            y_true_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = a

    pred_list.append(y_pred_val)
    thres, pred = find_threshold(y_pred_val, y_true_val)
    print('threshould:', thres)
    metric_val = quadratic_kappa_v2(np.squeeze(pred), np.squeeze(y_true_val))
    print('qwk:', metric_val)
    metric_val_list.append(metirc_val)
    log.write('Test model %d... qwk:%.5f \n'%(fold, metric_val))    
    
avg_qwk = sum(metric_val_list)/FOLD        
log.write('================\n Test average qwk is %.6f\n'%avg_qwk)
print('Average test qwk;',avg_qwk)

log.write('\n\n')

trainset_     = DRDataset(train_df, transform = test_transform)
train_loader_ = torch.utils.data.DataLoader(trainset_, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

oof_list = []
gt_list = []
for fold in range(FOLD):
    valset       = DRDataset(train_df.iloc[val_idx[fold]].reset_index(), transform   =test_transform)
    val_loader   = torch.utils.data.DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
    print('*********Fold%d************'%fold)
    model_conv = EfficientNet.from_name('efficientnet-b5')
    model_conv._avg_pooling = GeM()
    model_conv._fc = nn.Linear(2048, 1)
    model_conv._dropout = nn.Dropout(p=0.5)

    model_conv.load_state_dict(torch.load('./exp/exp' + str(EXP) + '/efficientnet-b5-best' + str(fold) + '.pth'))
    model_conv.cuda()
    model_conv.eval()

    y_pred_val = np.zeros((len(train_df), 1))
    y_true_val = np.zeros((len(train_df), 1))
    with torch.no_grad():
        for idx, (imgs, labels) in enumerate(val_loader):
            imgs_vaild, labels_vaild = imgs.cuda(), labels.float().cuda()
            output_test = model_conv(imgs_vaild) 
            a = labels_vaild.detach().cpu().numpy().astype(np.int)
            b = output_test.detach().cpu().numpy()

            y_pred_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = b
            y_true_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = a
    
    oof_list.append(y_pred_val)
    gt_list.append(y_true_val)
    
oof = np.zeros((len(train_df), 1))
gt = np.zeros((len(train_df), 1))
for fold in range(FOLD):
    for i,idx in enumerate(val_idx[fold]):
        oof[idx] = oof_list[fold][i]
        gt[idx] = gt_list[fold][i]
        
refine4 = (oof + 3.0*refine3)/(3.0+1)

del model_conv


 ********** Fold 0 **********

Loaded pretrained weights for efficientnet-b5
lr: 0.0001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.3736654804270463
Epoch 1/25 	 loss=0.5491 	 val_loss=0.6899 	 time=60.88s
model saved!
lr: 0.001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7729580064605445
Epoch 2/25 	 loss=0.3413 	 val_loss=0.2546 	 time=61.47s
model saved!
lr: 0.0008681980515339464
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.818482941371315
Epoch 3/25 	 loss=0.2252 	 val_loss=0.2301 	 time=61.85s
model saved!
lr: 0.0005500000000000001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7150630719762552
Epoch 4/25 	 loss=0.1964 	 val_loss=0.3093 	 time=61.75s
lr: 0.00023180194846605365
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8105975197294251
Epoch 5/25 	 loss=0.1403 	 val_loss=0.2130 	 time=61.98s
lr: 0.0001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7735304724125496
Epoch 6/25 	 loss=0.1325 	 val_loss=0.2588 	 time=61.90s
lr: 0.0008
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7240522063393412
Epoch 7/25 	 loss=0.1

threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8340267038400383
Epoch 22/25 	 loss=0.0666 	 val_loss=0.2013 	 time=63.18s
lr: 0.00036426012972767764
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8453446216391793
Epoch 23/25 	 loss=0.0673 	 val_loss=0.1922 	 time=62.89s
lr: 0.00025480000000000007
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8302852330879019
Epoch 24/25 	 loss=0.0416 	 val_loss=0.1967 	 time=62.69s
lr: 0.00014533987027232247
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8489413512732675
Epoch 25/25 	 loss=0.0404 	 val_loss=0.1860 	 time=62.68s
best loss: 0.18604900360805918
best qwk: 0.8555585276023128
p: Parameter containing:
tensor([2.9502], device='cuda:0', requires_grad=True)

 ********** Fold 2 **********

Loaded pretrained weights for efficientnet-b5
lr: 0.0001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.44592434736281295
Epoch 1/25 	 loss=0.5667 	 val_loss=0.6796 	 time=63.49s
model saved!
lr: 0.001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7712019524100061
Epoch 2/25 	 loss=0.3103 	 val_loss=

threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8140141936536422
Epoch 17/25 	 loss=0.0763 	 val_loss=0.2398 	 time=64.00s
model saved!
lr: 0.00045166399692442885
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8046797932367913
Epoch 18/25 	 loss=0.0722 	 val_loss=0.2675 	 time=63.67s
lr: 0.0003060000000000001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8326653720613147
Epoch 19/25 	 loss=0.0690 	 val_loss=0.2361 	 time=63.81s
model saved!
lr: 0.00016033600307557123
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.797861827329081
Epoch 20/25 	 loss=0.0497 	 val_loss=0.2553 	 time=63.66s
lr: 0.0001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7970584384410597
Epoch 21/25 	 loss=0.0471 	 val_loss=0.2491 	 time=63.21s
lr: 0.0004096000000000001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8246221798813493
Epoch 22/25 	 loss=0.0614 	 val_loss=0.2384 	 time=63.49s
lr: 0.00036426012972767764
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7878285165458832
Epoch 23/25 	 loss=0.0529 	 val_loss=0.2676 	 time=63.76s
lr: 0.00025480000000000007

In [25]:
log.close()

In [27]:
np.save('./exp/exp' + str(EXP) + '/refine4.npy',refine4)

In [25]:
refine4 = np.load('./exp/exp' + str(EXP) + '/refine4.npy')

In [26]:
cv_losses = []
cv_metrics = []

for fold in range(FOLD):
    print('\n ********** Fold %d **********\n'%fold)
    ###################### Dataset #######################
    trainset     = DRDataset_refine(train_df.iloc[tr_idx[fold]].reset_index(), refine4[tr_idx[fold]], transform =train_transform)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

    valset       = DRDataset(train_df.iloc[val_idx[fold]].reset_index(), transform   =test_transform)
    val_loader   = torch.utils.data.DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

    ####################### Model ########################
    model_conv = EfficientNet.from_pretrained('efficientnet-b5')
    #model_conv = EfficientNet.from_pretrained("efficientnet-b5", advprop=True)
    #model_conv.load_state_dict(torch.load(''))
    #model_conv.cuda()
    model_conv._avg_pooling = GeM(p=3.0)
    model_conv._fc = nn.Linear(2048, 1)
    model_conv._dropout = nn.Dropout(p=0.5)
    model_conv.cuda()

    ###################### Optim ########################
    optimizer = torch.optim.AdamW(model_conv.parameters(), lr=LR/10.0, weight_decay=1e-4)
    #optimizer = torch.optim.SGD(model_conv.parameters(), lr=LR, weight_decay=1e-5)
    #optimizer = RangerVA(model_conv.parameters(), lr=LR*10, weight_decay=1e-5)

    #optimizer = SWA(optimizer, swa_start=1600, swa_freq=50, swa_lr=2e-5)

    criterion = nn.SmoothL1Loss()
    criterion_test = nn.SmoothL1Loss()
    #criterion_test = nn.MSELoss()

    T = len(train_loader)//ACCUMULATE * 5 # cycle
    #lr = CosineAnnealingWarmUpRestarts(0, T_0=T, T_warmup=T//10, gamma=0.66)
    #optimizer.param_groups[0]['lr'] = lr_rate * lr
    scheduler = CosineAnnealingWarmUpRestarts(optimizer, T_0=T, T_mult=1, eta_max=LR, T_up=T//5, gamma=0.8)
    scheduler.step()
    # print(scheduler.get_lr()[0])

    model_conv, optimizer = amp.initialize(model_conv, optimizer, opt_level="O1",verbosity=0)
    
    val_loss, val_qwk = train(fold)
    
    cv_losses.append(val_loss)
    cv_metrics.append(val_qwk)
    log.write('[Fold%d] val loss:%.5f, val qwk:%.5f; \n'%(fold, val_loss, val_qwk))

cv_loss = sum(cv_losses)/FOLD
cv_qwk = sum(cv_metrics)/FOLD    
print('CV loss:%.6f  CV qwk:%.6f'%(cv_loss, cv_qwk))
log.write('CV loss:%.6f  CV qwk:%.6f\n\n'%(cv_loss, cv_qwk))

torch.cuda.empty_cache()
valset       = DRDataset(val_df, transform   =test_transform)
val_loader   = torch.utils.data.DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
pred_list = []
metric_val_list = []
for fold in range(FOLD):
    print('*********Fold%d************'%fold)
    model_conv = EfficientNet.from_name('efficientnet-b5')
    model_conv._avg_pooling = GeM()
    model_conv._fc = nn.Linear(2048, 1)
    model_conv._dropout = nn.Dropout(p=0.5)

    model_conv.load_state_dict(torch.load('./exp/exp' + str(EXP) + '/efficientnet-b5-best' + str(fold) + '.pth'))
    model_conv.cuda()
    model_conv.eval()

    y_pred_val = np.zeros((len(val_df), 1))
    y_true_val = np.zeros((len(val_df), 1))
    with torch.no_grad():
        for idx, (imgs, labels) in enumerate(val_loader):
            imgs_vaild, labels_vaild = imgs.cuda(), labels.float().cuda()
            output_test = model_conv(imgs_vaild) 
            a = labels_vaild.detach().cpu().numpy().astype(np.int)
            b = output_test.detach().cpu().numpy()

            y_pred_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = b
            y_true_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = a

    pred_list.append(y_pred_val)
    thres, pred = find_threshold(y_pred_val, y_true_val)
    print('threshould:', thres)
    metric_val = quadratic_kappa_v2(np.squeeze(pred), np.squeeze(y_true_val))
    print('qwk:', metric_val)
    metric_val_list.append(metirc_val)
    log.write('Test model %d... qwk:%.5f \n'%(fold, metric_val))    
    
avg_qwk = sum(metric_val_list)/FOLD        
log.write('================\n Test average qwk is %.6f\n'%avg_qwk)
print('Average test qwk;',avg_qwk)

log.write('\n\n')

trainset_     = DRDataset(train_df, transform = test_transform)
train_loader_ = torch.utils.data.DataLoader(trainset_, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

oof_list = []
gt_list = []
for fold in range(FOLD):
    valset       = DRDataset(train_df.iloc[val_idx[fold]].reset_index(), transform   =test_transform)
    val_loader   = torch.utils.data.DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
    print('*********Fold%d************'%fold)
    model_conv = EfficientNet.from_name('efficientnet-b5')
    model_conv._avg_pooling = GeM()
    model_conv._fc = nn.Linear(2048, 1)
    model_conv._dropout = nn.Dropout(p=0.5)

    model_conv.load_state_dict(torch.load('./exp/exp' + str(EXP) + '/efficientnet-b5-best' + str(fold) + '.pth'))
    model_conv.cuda()
    model_conv.eval()

    y_pred_val = np.zeros((len(train_df), 1))
    y_true_val = np.zeros((len(train_df), 1))
    with torch.no_grad():
        for idx, (imgs, labels) in enumerate(val_loader):
            imgs_vaild, labels_vaild = imgs.cuda(), labels.float().cuda()
            output_test = model_conv(imgs_vaild) 
            a = labels_vaild.detach().cpu().numpy().astype(np.int)
            b = output_test.detach().cpu().numpy()

            y_pred_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = b
            y_true_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = a
    
    oof_list.append(y_pred_val)
    gt_list.append(y_true_val)
    
oof = np.zeros((len(train_df), 1))
gt = np.zeros((len(train_df), 1))
for fold in range(FOLD):
    for i,idx in enumerate(val_idx[fold]):
        oof[idx] = oof_list[fold][i]
        gt[idx] = gt_list[fold][i]
        
refine5 = (oof + 3.0*refine4)/(3.0+1)

del model_conv


 ********** Fold 0 **********

Loaded pretrained weights for efficientnet-b5
lr: 0.0001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.664167916041979
Epoch 1/25 	 loss=0.6118 	 val_loss=0.3656 	 time=62.67s
model saved!
lr: 0.001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.5553571428571429
Epoch 2/25 	 loss=0.2912 	 val_loss=0.5297 	 time=63.03s
lr: 0.0008681980515339464
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7800943706792495
Epoch 3/25 	 loss=0.2528 	 val_loss=0.2745 	 time=63.15s
model saved!
lr: 0.0005500000000000001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8179519595448799
Epoch 4/25 	 loss=0.1699 	 val_loss=0.2216 	 time=63.31s
model saved!
lr: 0.00023180194846605365
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7906434048227581
Epoch 5/25 	 loss=0.1409 	 val_loss=0.2245 	 time=63.39s
lr: 0.0001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8265510340226818
Epoch 6/25 	 loss=0.1189 	 val_loss=0.2092 	 time=63.22s
model saved!
lr: 0.0008
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.772887323943662
Epoch 7/2

threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8543289632022294
Epoch 22/25 	 loss=0.0597 	 val_loss=0.1891 	 time=63.88s
lr: 0.00036426012972767764
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.842091638622832
Epoch 23/25 	 loss=0.0482 	 val_loss=0.1957 	 time=63.98s
lr: 0.00025480000000000007
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8444366320970293
Epoch 24/25 	 loss=0.0376 	 val_loss=0.1915 	 time=63.71s
lr: 0.00014533987027232247
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8589167113567922
Epoch 25/25 	 loss=0.0349 	 val_loss=0.1818 	 time=63.61s
best loss: 0.18179612176803256
best qwk: 0.8650306748466258
p: Parameter containing:
tensor([2.9494], device='cuda:0', requires_grad=True)

 ********** Fold 2 **********

Loaded pretrained weights for efficientnet-b5
lr: 0.0001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.680733437759145
Epoch 1/25 	 loss=0.6362 	 val_loss=0.3726 	 time=64.25s
model saved!
lr: 0.001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.795757488892074
Epoch 2/25 	 loss=0.2984 	 val_loss=0.31

threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7889892323723515
Epoch 17/25 	 loss=0.0559 	 val_loss=0.2463 	 time=64.60s
lr: 0.00045166399692442885
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8024559530165509
Epoch 18/25 	 loss=0.0610 	 val_loss=0.2441 	 time=64.82s
lr: 0.0003060000000000001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.797812988172283
Epoch 19/25 	 loss=0.0484 	 val_loss=0.2431 	 time=64.56s
lr: 0.00016033600307557123
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7862048114715156
Epoch 20/25 	 loss=0.0374 	 val_loss=0.2429 	 time=64.64s
lr: 0.0001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8032911713693472
Epoch 21/25 	 loss=0.0454 	 val_loss=0.2575 	 time=64.59s
lr: 0.0004096000000000001
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.7705696202531646
Epoch 22/25 	 loss=0.0475 	 val_loss=0.2696 	 time=64.50s
lr: 0.00036426012972767764
threshould: [0.5, 1.5, 2.5, 3.5]
qwk: 0.8286991062562066
Epoch 23/25 	 loss=0.0445 	 val_loss=0.2351 	 time=64.40s
model saved!
lr: 0.00025480000000000007
threshould: 

In [27]:
np.save('./exp/exp' + str(EXP) + '/refine5.npy',refine5)

In [28]:
log.close()