In [2]:
import numpy as np
import pandas as pd
import random
from glob import glob
import os, shutil
from tqdm import tqdm
tqdm.pandas()
import time
import copy
import joblib
from collections import defaultdict
import gc
from IPython import display as ipd
import math
# visualization
import cv2
from glob import glob
# Sklearn
from sklearn.model_selection import StratifiedKFold, KFold, StratifiedGroupKFold
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score, confusion_matrix, roc_curve
import timm
# PyTorch 
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler
import torch.nn.functional as F
from torch.optim.swa_utils import AveragedModel, SWALR
from transformers import get_cosine_schedule_with_warmup
from collections import defaultdict
# import matplotlib.pyplot as plt
# Albumentations for augmentations
import albumentations as A
import albumentations
import albumentations as albu
from albumentations.pytorch import ToTensorV2
from datetime import datetime
import warnings
warnings.filterwarnings("ignore")

In [3]:
class CFG:
    seed = 1
    model_name = "tf_efficientnetv2_b2"
    train_bs = 12
    valid_bs = 48
    image_size = 1024
    epochs = 25
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(CFG.device)

cuda:0


In [4]:
df = pd.read_csv("train_5folds.csv")
df.head()

Unnamed: 0,site_id,patient_id,image_id,laterality,view,age,cancer,biopsy,invasive,BIRADS,implant,density,machine_id,difficult_negative_case,fold
0,2,10006,462822612,L,CC,61.0,0,0,0,,0,,29,False,1
1,2,10006,1459541791,L,MLO,61.0,0,0,0,,0,,29,False,1
2,2,10006,1864590858,R,MLO,61.0,0,0,0,,0,,29,False,1
3,2,10006,1874946579,R,CC,61.0,0,0,0,,0,,29,False,1
4,2,10011,220375232,L,CC,55.0,0,0,0,0.0,0,,21,True,0


In [5]:
is_hol = df['cancer'] == 1
df_try = df[is_hol]
df1 = df.append([df_try]*1,ignore_index=True)
print(len(df1))

55864


In [6]:
def init_logger(log_file='train3.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()
now = datetime.now()
datetime_now = now.strftime("%m/%d/%Y, %H:%M:%S")
LOGGER.info(f"Date :{datetime_now}")

Date :02/24/2023, 11:39:14


In [7]:
from albumentations import DualTransform
image_size = 1024
def isotropically_resize_image(img, size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC):
    h, w = img.shape[:2]
    if max(w, h) == size:
        return img
    if w > h:
        scale = size / w
        h = h * scale
        w = size
    else:
        scale = size / h
        w = w * scale
        h = size
    interpolation = interpolation_up if scale > 1 else interpolation_down
    resized = cv2.resize(img, (int(w), int(h)), interpolation=interpolation)
    return resized


class IsotropicResize(DualTransform):
    def __init__(self, max_side, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC,
                 always_apply=False, p=1):
        super(IsotropicResize, self).__init__(always_apply, p)
        self.max_side = max_side
        self.interpolation_down = interpolation_down
        self.interpolation_up = interpolation_up

    def apply(self, img, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC, **params):
        return isotropically_resize_image(img, size=self.max_side, interpolation_down=interpolation_down,
                                          interpolation_up=interpolation_up)

    def apply_to_mask(self, img, **params):
        return self.apply(img, interpolation_down=cv2.INTER_NEAREST, interpolation_up=cv2.INTER_NEAREST, **params)

    def get_transform_init_args_names(self):
        return ("max_side", "interpolation_down", "interpolation_up")
    
data_transforms = {
    "train": A.Compose([
        # A.Resize(image_size, image_size),
        # IsotropicResize(max_side = image_size),
        # A.PadIfNeeded(min_height=image_size, min_width=image_size, border_mode=cv2.BORDER_CONSTANT),
        # A.RandomBrightnessContrast(),
        # A.VerticalFlip(p=0.5),   
        # A.ColorJitter(),
        # A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.05, rotate_limit=10, p=0.5),
        # A.HorizontalFlip(p=0.5),
        # A.Cutout(max_h_size=int(image_size * 0.1), max_w_size=int(image_size * 0.1), num_holes=5, p=0.5),
        A.VerticalFlip(p=0.5),   
        A.ColorJitter(),
        A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.05, rotate_limit=10, p=0.5),
        A.HorizontalFlip(p=0.5),
        A.Cutout(max_h_size=int(image_size * 0.1), max_w_size=int(image_size * 0.1), num_holes=5, p=0.5),
        # A.OneOf([ 
        # A.OpticalDistortion(distort_limit=1.0), 
        # A.GridDistortion(num_steps=5, distort_limit=1.),
        # A.ElasticTransform(alpha=3), ], p=0.2),
        # A.OneOf([
            # A.GaussNoise(var_limit=[10, 50]),
            # A.GaussianBlur(),
            # A.MotionBlur(),
            # A.MedianBlur(), ], p=0.2),
        # A.OneOf([
        #     A.GridDistortion(num_steps=5, distort_limit=0.05, p=1.0),
        #     A.OpticalDistortion(distort_limit=0.05, shift_limit=0.05, p=1.0),
        #     A.ElasticTransform(alpha=1, sigma=50, alpha_affine=50, p=1.0)
        # ], p=0.25),
        # A.CoarseDropout(max_holes=8, max_height=image_size//20, max_width=image_size//20,
        #                  min_holes=5, fill_value=0, mask_fill_value=0, p=0.5),
        # A.Normalize(mean=0, std=1),
        ToTensorV2(),], p=1.0),
    
    "valid": A.Compose([
        # IsotropicResize(max_side =image_size),
        # A.PadIfNeeded(min_height=image_size, min_width=image_size, border_mode=cv2.BORDER_CONSTANT),
        # A.Normalize(mean=0, std=1),
        # A.Resize(image_size, image_size),
        ToTensorV2(),
        ], p=1.0)
}

LOGGER.info(f"train transform{data_transforms['train']}")


train transformCompose([
  VerticalFlip(always_apply=False, p=0.5),
  ColorJitter(always_apply=False, p=0.5, brightness=[0.8, 1.2], contrast=[0.8, 1.2], saturation=[0.8, 1.2], hue=[-0.2, 0.2]),
  ShiftScaleRotate(always_apply=False, p=0.5, shift_limit_x=(-0.0625, 0.0625), shift_limit_y=(-0.0625, 0.0625), scale_limit=(-0.050000000000000044, 0.050000000000000044), rotate_limit=(-10, 10), interpolation=1, border_mode=4, value=None, mask_value=None, rotate_method='largest_box'),
  HorizontalFlip(always_apply=False, p=0.5),
  Cutout(always_apply=False, p=0.5, num_holes=5, max_h_size=102, max_w_size=102),
  ToTensorV2(always_apply=True, p=1.0, transpose_mask=False),
], p=1.0, bbox_params=None, keypoint_params=None, additional_targets={})


In [8]:
# from albumentations import DualTransform
# image_size = 1024
# def isotropically_resize_image(img, size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC):
#     h, w = img.shape[:2]
#     if max(w, h) == size:
#         return img
#     if w > h:
#         scale = size / w
#         h = h * scale
#         w = size
#     else:
#         scale = size / h
#         w = w * scale
#         h = size
#     interpolation = interpolation_up if scale > 1 else interpolation_down
#     resized = cv2.resize(img, (int(w), int(h)), interpolation=interpolation)
#     return resized


# class IsotropicResize(DualTransform):
#     def __init__(self, max_side, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC,
#                  always_apply=False, p=1):
#         super(IsotropicResize, self).__init__(always_apply, p)
#         self.max_side = max_side
#         self.interpolation_down = interpolation_down
#         self.interpolation_up = interpolation_up

#     def apply(self, img, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC, **params):
#         return isotropically_resize_image(img, size=self.max_side, interpolation_down=interpolation_down,
#                                           interpolation_up=interpolation_up)

#     def apply_to_mask(self, img, **params):
#         return self.apply(img, interpolation_down=cv2.INTER_NEAREST, interpolation_up=cv2.INTER_NEAREST, **params)

#     def get_transform_init_args_names(self):
#         return ("max_side", "interpolation_down", "interpolation_up")
    
# data_transforms = {
#     "train": A.Compose([
# #         A.Resize(image_size, image_size),
#         # IsotropicResize(max_side = image_size),
#        A.PadIfNeeded(min_width=image_size, border_mode=cv2.BORDER_CONSTANT),
#         albumentations.HorizontalFlip(p=0.5),
#         albumentations.VerticalFlip(p=0.5),
#         # albumentations.RandomBrightness(limit=0.2, p=0.75),
#         # albumentations.RandomContrast(limit=0.2, p=0.75),

#         # albumentations.OneOf([
#         #     albumentations.OpticalDistortion(distort_limit=1.),
#         #     albumentations.GridDistortion(num_steps=5, distort_limit=1.),
#         # ], p=0.75),

#         # albumentations.HueSaturationValue(hue_shift_limit=40, sat_shift_limit=40, val_shift_limit=0, p=0.75),
#         albumentations.ShiftScaleRotate(p = 0.5),
#         A.Cutout(always_apply=False, p=0.5, num_holes=5, max_h_size=image_size//10, max_w_size=image_size//10),
#         # A.RandomBrightnessContrast(),
#         # A.VerticalFlip(p=0.5),   
#         A.ColorJitter(p = 0.7),
#         # A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.05, rotate_limit=10, p=0.5),
#         # A.HorizontalFlip(p=0.5),
#         # A.Cutout(max_h_size=int(image_size * 0.1), max_w_size=int(image_size * 0.1), num_holes=5, p=0.5),
#         # albumentations.RandomBrightness(limit=0.2, p=0.75),
#         # albumentations.RandomContrast(limit=0.2, p=0.75),

#         # albumentations.OneOf([
#         #     albumentations.OpticalDistortion(distort_limit=1.),
#         #     albumentations.GridDistortion(num_steps=5, distort_limit=1.),
#         # ], p=0.75),

#         # albumentations.HueSaturationValue(hue_shift_limit=40, sat_shift_limit=40, val_shift_limit=0, p=0.75),
#         # albumentations.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.3, rotate_limit=30, border_mode=0, p=0.75),
#         # A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=10, val_shift_limit=10, p=0.7),
#         # A.RandomBrightnessContrast(brightness_limit=(-0.2,0.2), contrast_limit=(-0.2, 0.2), p=0.7),
#         # A.CLAHE(p=0.5),
#         # albumentations.OneOf([
#         # albumentations.OpticalDistortion(distort_limit=1.),
#         # albumentations.GridDistortion(num_steps=5, distort_limit=1.),
#         # ], p=0.75),
#         # A.OneOf([
#         # A.GaussianBlur(),
#         # A.MotionBlur(),
#         # A.MedianBlur(), ], p=0.5),
#         # A.IAASharpen(p = 0.2),
#         # A.JpegCompression(p=0.2),
#         # A.Downscale(scale_min=0.5, scale_max=0.75),
#         # A.OneOf([ A.JpegCompression(), A.Downscale(scale_min=0.1, scale_max=0.15), ], p=0.2), 
#         # A.IAAPiecewiseAffine(),
# #         A.OneOf([ 
# #         A.OpticalDistortion(distort_limit=1.0), 
# #         A.GridDistortion(num_steps=5, distort_limit=1.),
# #         A.ElasticTransform(alpha=3), ], p=0.2),
# #         A.OneOf([
# #             A.GaussNoise(var_limit=[10, 50]),
# #             A.GaussianBlur(),
# #             A.MotionBlur(),
# #             A.MedianBlur(), ], p=0.2),
#         # A.OneOf([
#         #     A.GridDistortion(num_steps=5, distort_limit=0.05, p=1.0),
#         #     A.OpticalDistortion(distort_limit=0.05, shift_limit=0.05, p=1.0),
#         #     A.ElasticTransform(alpha=1, sigma=50, alpha_affine=50, p=1.0)
#         # ], p=0.25),
#         # A.CoarseDropout(max_holes=8, max_height=image_size//20, max_width=image_size//20,
#         #                  min_holes=5, fill_value=0, mask_fill_value=0, p=0.5),
#         # A.Normalize(mean=0, std=1),
#         ToTensorV2(),], p=1.0),
    
#     "valid": A.Compose([
#         # IsotropicResize(max_side = image_size),
#         A.PadIfNeeded(min_height=image_size, min_width=image_size, border_mode=cv2.BORDER_CONSTANT),
#         # A.Normalize(mean=0, std=1),
# #         A.Resize(image_size, image_size),
#         ToTensorV2(),
#         ], p=1.0)
# }

# LOGGER.info(f"train transform{data_transforms['train']}")


In [9]:
def pad(array, target_shape):
    return np.pad(
        array,
        [(0, target_shape[i] - array.shape[i]) for i in range(len(array.shape))],
        "constant",
    )
    
def load_img(img_path):
    image = cv2.imread(img_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # image = pad(image, (1024, 800, 3))
        # img = img.reshape((*resize))
    return image
#     image = cv2.resize(image, (320, 320), cv2.INTER_NEAREST)
#     image = image.astype(np.float32)
#     mx = np.max(image)
#     if mx:
#         image/=mx
#     image = image /255.0
    
    return image
class BreastDataset(Dataset):
    def __init__(self, df, transforms=None):
        self.df = df
        self.transforms = transforms
        
    def __getitem__(self, index):
        row = self.df.iloc[index]
        img_path = f"flip/{row.patient_id}_{row.image_id}.png"
        img = load_img(img_path)
        label = row['cancer']
        # img = np.transpose(img, (2, 0, 1))
        data = self.transforms(image=img)
        img  = data['image']
        # img = img/255.0
        return torch.tensor(img).float(), torch.tensor(label).long()
        
    def __len__(self):
        return len(self.df)
    
fold0 = df[df['fold']==0]
train_dataset = BreastDataset(fold0, transforms = data_transforms['train'])
image, label = train_dataset[0]
print(image.shape, label)
print(image.max())

torch.Size([3, 1344, 840]) tensor(0)
tensor(255.)


In [10]:

# from pylab import rcParams

# f, axarr = plt.subplots(1,15, figsize = (20, 20))
# imgs = []
# for p in range(15):
#     img, label = train_dataset[p]
#     img = img.transpose(0, 1).transpose(1,2).cpu().numpy()
#     img = img.astype(np.uint8)
#     imgs.append(img)
#     axarr[p].imshow(img)


In [11]:
class Model(nn.Module):
    def __init__(self, model_name):
        super().__init__()
        # ,drop_rate = 0.3, drop_path_rate = 0.2
        self.backbone = timm.create_model(model_name, pretrained=True,drop_rate = 0.3, drop_path_rate = 0.2)
        self.fc = nn.Linear(self.backbone.classifier.in_features,2)
        self.backbone.classifier = nn.Identity()
        self.dropout = nn.Dropout(0.5)
    def forward(self, x):
        x = self.backbone(x)
        x = self.fc(self.dropout(x))
        return x

In [12]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))

In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.optimizer import Optimizer, required
import math

class AdamP(Optimizer):
    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
                 weight_decay=0, delta=0.1, wd_ratio=0.1, nesterov=False):
        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay,
                        delta=delta, wd_ratio=wd_ratio, nesterov=nesterov)
        super(AdamP, self).__init__(params, defaults)

    def _channel_view(self, x):
        return x.view(x.size(0), -1)

    def _layer_view(self, x):
        return x.view(1, -1)

    def _cosine_similarity(self, x, y, eps, view_func):
        x = view_func(x)
        y = view_func(y)

        return F.cosine_similarity(x, y, dim=1, eps=eps).abs_()

    def _projection(self, p, grad, perturb, delta, wd_ratio, eps):
        wd = 1
        expand_size = [-1] + [1] * (len(p.shape) - 1)
        for view_func in [self._channel_view, self._layer_view]:

            cosine_sim = self._cosine_similarity(grad, p.data, eps, view_func)

            if cosine_sim.max() < delta / math.sqrt(view_func(p.data).size(1)):
                p_n = p.data / view_func(p.data).norm(dim=1).view(expand_size).add_(eps)
                perturb -= p_n * view_func(p_n * perturb).sum(dim=1).view(expand_size)
                wd = wd_ratio

                return perturb, wd

        return perturb, wd

    def step(self, closure=None):
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue

                grad = p.grad.data
                beta1, beta2 = group['betas']
                nesterov = group['nesterov']

                state = self.state[p]

                # State initialization
                if len(state) == 0:
                    state['step'] = 0
                    state['exp_avg'] = torch.zeros_like(p.data)
                    state['exp_avg_sq'] = torch.zeros_like(p.data)

                # Adam
                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']

                state['step'] += 1
                bias_correction1 = 1 - beta1 ** state['step']
                bias_correction2 = 1 - beta2 ** state['step']

                exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
                exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)

                denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps'])
                step_size = group['lr'] / bias_correction1

                if nesterov:
                    perturb = (beta1 * exp_avg + (1 - beta1) * grad) / denom
                else:
                    perturb = exp_avg / denom

                # Projection
                wd_ratio = 1
                if len(p.shape) > 1:
                    perturb, wd_ratio = self._projection(p, grad, perturb, group['delta'], group['wd_ratio'], group['eps'])

                # Weight decay
                if group['weight_decay'] > 0:
                    p.data.mul_(1 - group['lr'] * group['weight_decay'] * wd_ratio)

                # Step
                p.data.add_(perturb, alpha=-step_size)

        return loss

class SGDP(Optimizer):
    def __init__(self, params, lr=required, momentum=0, dampening=0,
                 weight_decay=0, nesterov=False, eps=1e-8, delta=0.1, wd_ratio=0.1):
        defaults = dict(lr=lr, momentum=momentum, dampening=dampening, weight_decay=weight_decay,
                        nesterov=nesterov, eps=eps, delta=delta, wd_ratio=wd_ratio)
        super(SGDP, self).__init__(params, defaults)

    def _channel_view(self, x):
        return x.view(x.size(0), -1)

    def _layer_view(self, x):
        return x.view(1, -1)

    def _cosine_similarity(self, x, y, eps, view_func):
        x = view_func(x)
        y = view_func(y)

        return F.cosine_similarity(x, y, dim=1, eps=eps).abs_()

    def _projection(self, p, grad, perturb, delta, wd_ratio, eps):
        wd = 1
        expand_size = [-1] + [1] * (len(p.shape) - 1)
        for view_func in [self._channel_view, self._layer_view]:

            cosine_sim = self._cosine_similarity(grad, p.data, eps, view_func)

            if cosine_sim.max() < delta / math.sqrt(view_func(p.data).size(1)):
                p_n = p.data / view_func(p.data).norm(dim=1).view(expand_size).add_(eps)
                perturb -= p_n * view_func(p_n * perturb).sum(dim=1).view(expand_size)
                wd = wd_ratio

                return perturb, wd

        return perturb, wd

    def step(self, closure=None):
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            momentum = group['momentum']
            dampening = group['dampening']
            nesterov = group['nesterov']

            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data
                state = self.state[p]

                # State initialization
                if len(state) == 0:
                    state['momentum'] = torch.zeros_like(p.data)

                # SGD
                buf = state['momentum']
                buf.mul_(momentum).add_(grad, alpha=1 - dampening)
                if nesterov:
                    d_p = grad + momentum * buf
                else:
                    d_p = buf

                # Projection
                wd_ratio = 1
                if len(p.shape) > 1:
                    d_p, wd_ratio = self._projection(p, grad, d_p, group['delta'], group['wd_ratio'], group['eps'])

                # Weight decay
                if group['weight_decay'] > 0:
                    p.data.mul_(1 - group['lr'] * group['weight_decay'] * wd_ratio / (1-momentum))

                # Step
                p.data.add_(d_p, alpha=-group['lr'])

        return loss

class SAM(torch.optim.Optimizer):
    def __init__(self, params, base_optimizer, rho=0.05, **kwargs):
        assert rho >= 0.0, f"Invalid rho, should be non-negative: {rho}"

        defaults = dict(rho=rho, **kwargs)
        super(SAM, self).__init__(params, defaults)

        self.base_optimizer = base_optimizer(self.param_groups, **kwargs)
        self.param_groups = self.base_optimizer.param_groups

    @torch.no_grad()
    def first_step(self, zero_grad=False):
        grad_norm = self._grad_norm()
        for group in self.param_groups:
            scale = group["rho"] / (grad_norm + 1e-12)

            for p in group["params"]:
                if p.grad is None: continue
                e_w = p.grad * scale.to(p)
                p.add_(e_w)  # climb to the local maximum "w + e(w)"
                self.state[p]["e_w"] = e_w

        if zero_grad: self.zero_grad()

    @torch.no_grad()
    def second_step(self, zero_grad=False):
        for group in self.param_groups:
            for p in group["params"]:
                if p.grad is None: continue
                p.sub_(self.state[p]["e_w"])  # get back to "w" from "w + e(w)"

        self.base_optimizer.step()  # do the actual "sharpness-aware" update

        if zero_grad: self.zero_grad()

    def step(self, closure=None):
        raise NotImplementedError("SAM doesn't work like the other optimizers, you should first call `first_step` and the `second_step`; see the documentation for more info.")

    def _grad_norm(self):
        shared_device = self.param_groups[0]["params"][0].device  # put everything on the same device, in case of model parallelism
        norm = torch.norm(
                    torch.stack([
                        p.grad.norm(p=2).to(shared_device)
                        for group in self.param_groups for p in group["params"]
                        if p.grad is not None
                    ]),
                    p=2
               )
        return norm

In [14]:
from torch.optim.lr_scheduler import _LRScheduler
from torch.optim.lr_scheduler import ReduceLROnPlateau


class GradualWarmupScheduler(_LRScheduler):
    """ Gradually warm-up(increasing) learning rate in optimizer.
    Proposed in 'Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour'.
    Args:
        optimizer (Optimizer): Wrapped optimizer.
        multiplier: target learning rate = base lr * multiplier if multiplier > 1.0. if multiplier = 1.0, lr starts from 0 and ends up with the base_lr.
        total_epoch: target learning rate is reached at total_epoch, gradually
        after_scheduler: after target_epoch, use this scheduler(eg. ReduceLROnPlateau)
    """

    def __init__(self, optimizer, multiplier, total_epoch, after_scheduler=None):
        self.multiplier = multiplier
        if self.multiplier < 1.:
            raise ValueError('multiplier should be greater thant or equal to 1.')
        self.total_epoch = total_epoch
        self.after_scheduler = after_scheduler
        self.finished = False
        super(GradualWarmupScheduler, self).__init__(optimizer)

    def get_lr(self):
        if self.last_epoch > self.total_epoch:
            if self.after_scheduler:
                if not self.finished:
                    self.after_scheduler.base_lrs = [base_lr * self.multiplier for base_lr in self.base_lrs]
                    self.finished = True
                return self.after_scheduler.get_last_lr()
            return [base_lr * self.multiplier for base_lr in self.base_lrs]

        if self.multiplier == 1.0:
            return [base_lr * (float(self.last_epoch) / self.total_epoch) for base_lr in self.base_lrs]
        else:
            return [base_lr * ((self.multiplier - 1.) * self.last_epoch / self.total_epoch + 1.) for base_lr in self.base_lrs]

    def step_ReduceLROnPlateau(self, metrics, epoch=None):
        if epoch is None:
            epoch = self.last_epoch + 1
        self.last_epoch = epoch if epoch != 0 else 1  # ReduceLROnPlateau is called at the end of epoch, whereas others are called at beginning
        if self.last_epoch <= self.total_epoch:
            warmup_lr = [base_lr * ((self.multiplier - 1.) * self.last_epoch / self.total_epoch + 1.) for base_lr in self.base_lrs]
            for param_group, lr in zip(self.optimizer.param_groups, warmup_lr):
                param_group['lr'] = lr
        else:
            if epoch is None:
                self.after_scheduler.step(metrics, None)
            else:
                self.after_scheduler.step(metrics, epoch - self.total_epoch)

    def step(self, epoch=None, metrics=None):
        if type(self.after_scheduler) != ReduceLROnPlateau:
            if self.finished and self.after_scheduler:
                if epoch is None:
                    self.after_scheduler.step(None)
                else:
                    self.after_scheduler.step(epoch - self.total_epoch)
                self._last_lr = self.after_scheduler.get_last_lr()
            else:
                return super(GradualWarmupScheduler, self).step(epoch)
        else:
            self.step_ReduceLROnPlateau(metrics, epoch)

class GradualWarmupSchedulerV2(GradualWarmupScheduler):
    def __init__(self, optimizer, multiplier, total_epoch, after_scheduler=None):
        super(GradualWarmupSchedulerV2, self).__init__(optimizer, multiplier, total_epoch, after_scheduler)
    def get_lr(self):
        if self.last_epoch > self.total_epoch:
            if self.after_scheduler:
                if not self.finished:
                    self.after_scheduler.base_lrs = [base_lr * self.multiplier for base_lr in self.base_lrs]
                    self.finished = True
                return self.after_scheduler.get_lr()
            return [base_lr * self.multiplier for base_lr in self.base_lrs]
        if self.multiplier == 1.0:
            return [base_lr * (float(self.last_epoch) / self.total_epoch) for base_lr in self.base_lrs]
        else:
            return [base_lr * ((self.multiplier - 1.) * self.last_epoch / self.total_epoch + 1.) for base_lr in self.base_lrs]

class Lookahead(optim.Optimizer):
    def __init__(self, base_optimizer, alpha=0.5, k=6):
        if not 0.0 <= alpha <= 1.0:
            raise ValueError(f'Invalid slow update rate: {alpha}')
        if not 1 <= k:
            raise ValueError(f'Invalid lookahead steps: {k}')
        defaults = dict(lookahead_alpha=alpha, lookahead_k=k, lookahead_step=0)
        self.base_optimizer = base_optimizer
        self.param_groups = self.base_optimizer.param_groups
        self.defaults = base_optimizer.defaults
        self.defaults.update(defaults)
        self.state = defaultdict(dict)
        # manually add our defaults to the param groups
        for name, default in defaults.items():
            for group in self.param_groups:
                group.setdefault(name, default)

    def update_slow(self, group):
        for fast_p in group["params"]:
            if fast_p.grad is None:
                continue
            param_state = self.state[fast_p]
            if 'slow_buffer' not in param_state:
                param_state['slow_buffer'] = torch.empty_like(fast_p.data)
                param_state['slow_buffer'].copy_(fast_p.data)
            slow = param_state['slow_buffer']
            slow.add_(group['lookahead_alpha'], fast_p.data - slow)
            fast_p.data.copy_(slow)

    def sync_lookahead(self):
        for group in self.param_groups:
            self.update_slow(group)

    def step(self, closure=None):
        #assert id(self.param_groups) == id(self.base_optimizer.param_groups)
        loss = self.base_optimizer.step(closure)
        for group in self.param_groups:
            group['lookahead_step'] += 1
            if group['lookahead_step'] % group['lookahead_k'] == 0:
                self.update_slow(group)
        return loss

    def state_dict(self):
        fast_state_dict = self.base_optimizer.state_dict()
        slow_state = {
            (id(k) if isinstance(k, torch.Tensor) else k): v
            for k, v in self.state.items()
        }
        fast_state = fast_state_dict['state']
        param_groups = fast_state_dict['param_groups']
        return {
            'state': fast_state,
            'slow_state': slow_state,
            'param_groups': param_groups,
        }

    def load_state_dict(self, state_dict):
        fast_state_dict = {
            'state': state_dict['state'],
            'param_groups': state_dict['param_groups'],
        }
        self.base_optimizer.load_state_dict(fast_state_dict)

        # We want to restore the slow state, but share param_groups reference
        # with base_optimizer. This is a bit redundant but least code
        slow_state_new = False
        if 'slow_state' not in state_dict:
            print('Loading state_dict from optimizer without Lookahead applied.')
            state_dict['slow_state'] = defaultdict(dict)
            slow_state_new = True
        slow_state_dict = {
            'state': state_dict['slow_state'],
            'param_groups': state_dict['param_groups'],  # this is pointless but saves code
        }
        super(Lookahead, self).load_state_dict(slow_state_dict)
        self.param_groups = self.base_optimizer.param_groups  # make both ref same container
        if slow_state_new:
            # reapply defaults to catch missing lookahead specific ones
            for name, default in self.defaults.items():
                for group in self.param_groups:
                    group.setdefault(name, default)

In [15]:

def train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device):
    torch.cuda.empty_cache()
    gc.collect()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    # switch to train mode
    model.train()
    start = end = time.time()
    truth = []
    pred = []
    global_step = 0
    scaler = GradScaler()
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc='Train')
    for step, (images, labels) in pbar:
        optimizer.zero_grad()
        data_time.update(time.time() - end)
        images = images.to(device)
        
        
        labels = labels.to(device)
        batch_size = labels.size(0)
        with autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)
            # loss.backward()
            # optimizer.first_step(zero_grad=True)
            # criterion(model(images), labels).backward()
            # optimizer.second_step(zero_grad=True)
            # record loss
        losses.update(loss.item(), batch_size)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        # global_step += 1
        scheduler.step()
            # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
#         if step % 100 == 0 or step == (len(train_loader)-1):
#             print('Epoch: [{0}][{1}/{2}] '
#                       'Data {data_time.val:.6f} ({data_time.avg:.6f}) '
#                       'Elapsed {remain:s} '
#                       'Loss: {loss.val:.6f}({loss.avg:.6f}) '
#                       'LR: {lr:.6f}  '
#                       .format(
#                        epoch+1, step, len(train_loader), batch_time=batch_time,
#                        data_time=data_time, loss=losses,
#                        remain=timeSince(start, float(step+1)/len(train_loader)),
#                        lr=scheduler.get_lr()[0],
#                        ))
        torch.cuda.empty_cache()
        gc.collect()
        mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0
        current_lr = optimizer.param_groups[0]['lr']
        pbar.set_postfix(train_loss=f'{losses.avg:0.4f}',
                        lr=f'{current_lr:0.8f}',
                        gpu_mem=f'{mem:0.2f} GB')

    return losses.avg

def valid_fn_no_sigmoid(val_dataloader, model, criterion, device):
    losses = AverageMeter()
    model.eval()
    truth = []
    preds = []
    valid_labels = []
    start = end = time.time()
    pbar = tqdm(enumerate(val_dataloader), total=len(val_dataloader), desc='Val')
    for step, (images, labels) in pbar:
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.no_grad():
            outputs = model(images)
        valid_labels.append(labels.cpu().numpy())
        loss = criterion(outputs, labels)
#         loss = bi_tempered_logistic_loss(outputs, labels, t1=0.8, t2 = 1.4)
        losses.update(loss.item(), batch_size)
#         print(outputs)
        preds.append((outputs).to('cpu').numpy())
        mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0
        pbar.set_postfix(eval_loss=f'{losses.avg:0.4f}',
                        gpu_mem=f'{mem:0.2f} GB')
    predictions = np.concatenate(preds)
    valid_labels = np.concatenate(valid_labels)
    return losses.avg, predictions, valid_labels


def valid_fn(val_dataloader, model, criterion, device):
    losses = AverageMeter()
    model.eval()
    truth = []
    preds = []
    valid_labels = []
    start = end = time.time()
    pbar = tqdm(enumerate(val_dataloader), total=len(val_dataloader), desc='Val')
    for step, (images, labels) in pbar:
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.no_grad():
            outputs = model(images)
        valid_labels.append(labels.cpu().numpy())
        loss = criterion(outputs, labels)
#         loss = bi_tempered_logistic_loss(outputs, labels, t1=0.8, t2 = 1.4)
        losses.update(loss.item(), batch_size)
#         print(outputs)
        preds.append(torch.sigmoid(outputs).to('cpu').numpy())
        mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0
        pbar.set_postfix(eval_loss=f'{losses.avg:0.4f}',
                        gpu_mem=f'{mem:0.2f} GB')
    predictions = np.concatenate(preds)
    valid_labels = np.concatenate(valid_labels)
    return losses.avg, predictions, valid_labels
def valid_fn_two(val_dataloader, model, criterion, device):
    losses = AverageMeter()
    model.eval()
    truth = []
    preds = []
    valid_labels = []
    start = end = time.time()
    pbar = tqdm(enumerate(val_dataloader), total=len(val_dataloader), desc='Val')
    for step, (images, labels) in pbar:
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.no_grad():
            outputs = model(images)
        valid_labels.append(labels.cpu().numpy())
        loss = criterion(outputs, labels)
#         loss = bi_tempered_logistic_loss(outputs, labels, t1=0.8, t2 = 1.4)
        losses.update(loss.item(), batch_size)
#         print(outputs)
        preds.append(F.softmax(outputs).to('cpu').numpy())
        mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0
        pbar.set_postfix(eval_loss=f'{losses.avg:0.4f}',
                        gpu_mem=f'{mem:0.2f} GB')
    predictions = np.concatenate(preds)
    valid_labels = np.concatenate(valid_labels)
    return losses.avg, predictions, valid_labels
def valid_fn_flip(val_dataloader, model, criterion, device):
    losses = AverageMeter()
    model.eval()
    truth = []
    preds = []
    valid_labels = []
    start = end = time.time()
    pbar = tqdm(enumerate(val_dataloader), total=len(val_dataloader), desc='Val')
    for step, (images, labels) in pbar:
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        images = torch.flip(images, [3])
        with torch.no_grad():
            outputs = model(images)
        valid_labels.append(labels.cpu().numpy())
        loss = criterion(outputs, labels)
#         loss = bi_tempered_logistic_loss(outputs, labels, t1=0.8, t2 = 1.4)
        losses.update(loss.item(), batch_size)
#         print(outputs)
        preds.append(torch.sigmoid(outputs).to('cpu').numpy())
        mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0
        pbar.set_postfix(eval_loss=f'{losses.avg:0.4f}',
                        gpu_mem=f'{mem:0.2f} GB')
    predictions = np.concatenate(preds)
    valid_labels = np.concatenate(valid_labels)
    return losses.avg, predictions, valid_labels

In [16]:
from exhaustive_weighted_random_sampler import ExhaustiveWeightedRandomSampler
def pfbeta(labels, predictions, beta=1):
    y_true_count = 0
    ctp = 0
    cfp = 0

    for idx in range(len(labels)):
        prediction = min(max(predictions[idx], 0), 1)
        if (labels[idx]):
            y_true_count += 1
            ctp += prediction
        else:
            cfp += prediction

    beta_squared = beta * beta
    c_precision = ctp / (ctp + cfp)
    c_recall = ctp / y_true_count
    if (c_precision > 0 and c_recall > 0):
        result = (1 + beta_squared) * (c_precision * c_recall) / (beta_squared * c_precision + c_recall)
        return result
    else:
        return 0
    
def dfs_freeze(module):
    for param in module.parameters():
        param.requires_grad = False
        
def dfs_unfreeze(module):
    for param in module.parameters():
        param.requires_grad = True
    
def set_seed(seed = 42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    print('> SEEDING DONE')

def sigmoid(x):
  return 1 / (1 + math.exp(-x))

set_seed(1)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
gc.collect()
torch.cuda.empty_cache()
for fold in [0]:
    LOGGER.info(f"Fold: {fold}")
    model = Model(model_name=CFG.model_name).to(device)
    # model = ModelVIT().to(CFG.device)
    train_df = df1[df1['fold']!=fold].reset_index(drop=True)
    valid_df = df[df['fold']==fold].reset_index(drop=True)
    # print(len(valid_df))
    LOGGER.info(f"Len train df: {len(train_df)}")
    cancer_labels = train_df['cancer'].values.tolist()
    class_zero =len(train_df[train_df['cancer']==0])
    class_one = len(train_df[train_df['cancer']==1])
    class_sample_count = np.array([class_zero, class_one*32])
    weight = 1. / class_sample_count
    samples_weight = np.array([weight[t] for t in cancer_labels])
    samples_weight = torch.from_numpy(samples_weight)
    samples_weight = samples_weight.double()
#     print(samples_weight)
    sampler = ExhaustiveWeightedRandomSampler(samples_weight, len(samples_weight))
    
    train_dataset = BreastDataset(train_df, transforms=data_transforms['train'])

    train_loader = DataLoader(train_dataset, batch_size = CFG.train_bs,
                                  num_workers=1, shuffle=True, pin_memory=True, drop_last=True)
    
    valid_dataset = BreastDataset(valid_df, transforms=data_transforms['valid'])

    valid_loader = DataLoader(valid_dataset, batch_size = CFG.valid_bs, 
                                  num_workers=1, shuffle=False, pin_memory=True, drop_last=False)
    
    LEN_DL_TRAIN = len(train_loader)
    best_f1 = 0
    best_metric = 0
    total_epoch = 1
    # checkpoint = torch.load("swa_tf_efficientnetv2_b2_fold_3_model_0.5325_0.428.pth")
    # model.load_state_dict(checkpoint['state_dict'])
    # base_optimizer = AdamP
    # optimizer = SAM(model.parameters(),
    #                 base_optimizer,
    #                 rho=0.05,
    #                 lr=1e-4,
    #                 weight_decay=0.0,
    #                 nesterov=True)
    optimizer = torch.optim.AdamW(model.parameters(), lr = 1e-4, weight_decay = 5e-4)
    # optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)  
    # optimizer.load_state_dict(checkpoint['optimizer'])
    scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps = 1*LEN_DL_TRAIN, num_training_steps =total_epoch*LEN_DL_TRAIN)
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, total_epoch)
    # scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, total_epoch-1)
    # scheduler = GradualWarmupSchedulerV2(optimizer, multiplier=10, total_epoch=1, after_scheduler=scheduler_cosine)
    # swa_model = AveragedModel(model)
    # swa_scheduler = SWALR(optimizer, swa_lr=1e-4, anneal_epochs=0)
    # scheduler.load_state_dict(checkpoint['scheduler'])
    criterion = nn.CrossEntropyLoss().to(device)
    # criterion1 = nn.BCEWithLogitsLoss().to(device)
    # criterion = nn.BCEWithLogitsLoss().to(CFG.device)
    LOGGER.info(f"Train bs: {CFG.train_bs}")
    # LOGGER.info(f"Model: {model}")
    
    LOGGER.info(f"optimizer: {optimizer}")
    LOGGER.info(f"total_epoch :{total_epoch}")
    # criterion = FocalLoss().to(device)
    # for epoch in range(1, total_epoch+1):
    #     # if epoch >=7:
    #     #     swa_model.update_parameters(model)
    #     #     swa_scheduler.step()
    #     # else:
    #     # scheduler.step(epoch-1)
    #     LOGGER.info(f"Epoch: {epoch}/{total_epoch}")
    #     # loss_train = train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device)
    #     # state = {'epoch': epoch, 'state_dict': model.state_dict(),'optimizer': optimizer.state_dict(), 'scheduler':scheduler.state_dict()}
    #     # path = f'{CFG.model_name}_fold_{fold}_model_epoch_{epoch}.pth'
    #     # torch.save(state, path)
    #     loss_valid, valid_preds, valid_labels = valid_fn_two(valid_loader, model, criterion, device)
    #     # print(valid_preds)
    #     valid_preds = valid_preds[:, 1]
    #     valid_df['prediction_id'] = valid_df['patient_id'].astype(str) + '_' + valid_df['laterality'].astype(str)
    #     valid_preds = np.array(valid_preds).flatten()
        
    #     valid_df['raw_pred'] = valid_preds
        # LOGGER.info(f"Valid loss:{loss_valid:.4f}")
    #     LOGGER.info(f"Train loss:{loss_train:.4f}, Valid loss:{loss_valid:.4f}")
    #     # print(valid_df.head())
    #     grp_df = valid_df.groupby('prediction_id')['raw_pred', 'cancer'].mean()
    #     grp_df['cancer'] = grp_df['cancer'].astype(np.int)
    #     valid_labels_mean = grp_df['cancer'].values.tolist()
    #     valid_preds_mean = grp_df['raw_pred'].values.tolist()
    #     # print(valid_labels[:5], valid_preds_mean[:5])
    #     val_metric_mean = pfbeta(valid_labels_mean, valid_preds_mean)
    #     LOGGER.info(f"Val metric mean prob: {val_metric_mean:.4f}")
    #     best_metric_mean_at_epoch = 0
    #     best_threshold_mean = 0
    #     best_auc = 0
    #     best_cf = None
    #     for i in np.arange(0.001, 0.599, 0.001):
    #         valid_argmax = (valid_preds_mean>i).astype(np.int32)
    # #             print(valid_argmax)
    #         val_metric = pfbeta(valid_labels_mean, valid_argmax)
    #         val_acc = accuracy_score(valid_labels_mean, valid_argmax)
    #         val_f1 = f1_score(valid_labels_mean, valid_argmax)
    #         val_auc = roc_auc_score(valid_labels_mean, valid_argmax)
    #         cf = confusion_matrix(valid_labels_mean, valid_argmax)
    #         if val_metric> best_metric_mean_at_epoch:
    #             best_metric_mean_at_epoch = val_metric
    #             best_threshold_mean = i
    #             best_auc = val_auc
    #             best_cf = cf
    #         # print(f"Threshold: {i:.4f}, val_acc: {val_acc:.4f}, val_f1: {val_f1:.4f}, val_auc: {val_auc:.4f}, val_metric: {val_metric:.4f}")
    #     LOGGER.info(f"Best metric at epoch {epoch}: {best_metric_mean_at_epoch:.4f} {best_threshold_mean:.4f} {best_auc:.4f}")
    #     LOGGER.info(f"Cf: {best_cf}")
    # #         print(f"Train loss: {loss_train:.4f}, eval loss: {loss_valid.avg:.4f}") 
    # #         print(f"Accuracy score: {val_acc:.4f}, f1 score: {val_f1:.4f}")
    # #         print(f"Comp metric: {val_metric:.4f}")
    #     if best_metric_mean_at_epoch > best_metric:

    #         LOGGER.info(f"Model improve: {best_metric:.4f} -> {best_metric_mean_at_epoch:.4f}")
    #         best_metric = best_metric_mean_at_epoch
    #     state = {'epoch': epoch, 'state_dict': model.state_dict(),'optimizer': optimizer.state_dict(), 'scheduler':scheduler.state_dict()}
    #     path = f'{CFG.model_name}_fold_{fold}_model_epoch_{epoch}_{best_metric_mean_at_epoch:.4f}_{best_threshold_mean:.3f}.pth'
    #     torch.save(state, path)
        

Fold: 0


> SEEDING DONE


Len train df: 44652
Train bs: 12
optimizer: AdamW (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    eps: 1e-08
    foreach: None
    initial_lr: 0.0001
    lr: 0.0
    maximize: False
    weight_decay: 0.0005
)
total_epoch :1


In [17]:
# from sklearn import metrics
# def np_binary_cross_entropy_loss(probability, truth):
#     probability = probability.astype(np.float64)
#     probability = np.nan_to_num(probability, nan=1, posinf=1, neginf=0)

#     p = np.clip(probability, 1e-5, 1 - 1e-5)
#     y = truth

#     loss = -y * np.log(p) - (1 - y) * np.log(1 - p)
#     loss = loss.mean()
#     return loss

# def get_f1score(probability, truth, threshold = np.linspace(0.2, 0.5, 5000)):
#     f1score = []
#     precision=[]
#     recall=[]
#     for t in threshold:
#         predict = (probability > t).astype(np.float32)

#         tp = ((predict >= 0.5) & (truth >= 0.5)).sum()
#         fp = ((predict >= 0.5) & (truth < 0.5)).sum()
#         fn = ((predict < 0.5) & (truth >= 0.5)).sum()

#         r = tp / (tp + fn + 1e-3)
#         p = tp / (tp + fp + 1e-3)
#         f1 = 2 * r * p / (r + p + 1e-3)
#         f1score.append(f1)
#         precision.append(p)
#         recall.append(r)
#     f1score = np.array(f1score)
#     precision = np.array(precision)
#     recall = np.array(recall)
#     return f1score, precision, recall, threshold


# def compute_metric(cancer_p, cancer_t):

#     fpr, tpr, thresholds = metrics.roc_curve(cancer_t, cancer_p)
#     auc = metrics.auc(fpr, tpr)

#     f1score, precision, recall, threshold = get_f1score(cancer_p, cancer_t)
#     i = f1score.argmax()
#     f1score, precision, recall, threshold = f1score[i], precision[i], recall[i], threshold[i]

#     specificity = ((cancer_p < threshold ) & ((cancer_t <= 0.5))).sum() / (cancer_t <= 0.5).sum()
#     sensitivity = ((cancer_p >= threshold) & ((cancer_t >= 0.5))).sum() / (cancer_t >= 0.5).sum()

#     return {
#         'auc': auc,
#         'threshold': threshold,
#         'f1score': f1score,
#         'precision': precision,
#         'recall': recall,
#         'sensitivity': sensitivity,
#         'specificity': specificity,
#     }

# def compute_pfbeta(labels, predictions, beta=1):
#     y_true_count = 0
#     ctp = 0
#     cfp = 0

#     for idx in range(len(labels)):
#         prediction = min(max(predictions[idx], 0), 1)
#         if (labels[idx]):
#             y_true_count += 1
#             ctp += prediction
#             #cfp += 1 - prediction
#         else:
#             cfp += prediction

#     beta_squared = beta * beta
#     c_precision = ctp / (ctp + cfp+1e-8)
#     c_recall = ctp / y_true_count
#     if (c_precision > 0 and c_recall > 0):
#         result = (1 + beta_squared) * (c_precision * c_recall) / (beta_squared * c_precision + c_recall)
#         return result
#     else:
#         return 0

# def print_all_metric(valid_df):

# 	print(f'{"    ": <16}    \tauc      @th     f1      | 	prec    recall  | 	sens    spec ')
# 	#log.write(f'{"    ": <16}    \t0.77902	0.44898	0.28654 | 	0.32461	0.25726 | 	0.25726	0.98794\n')
# 	for site_id in [0,1,2]:

# 		#log.write(f'*** site_id [{site_id}] ***\n')
# 		#log.write(f'\n')

# 		if site_id>0:
# 			site_df = valid_df[valid_df.site_id == site_id].reset_index(drop=True)
# 		else:
# 			site_df = valid_df
# 		# ---

# 		gb = site_df
# 		m = compute_metric(gb.raw_pred, gb.cancer)
# 		text = f'{"single image": <16} [{site_id}]'
# 		text += f'\t{m["auc"]:0.5f}'
# 		text += f'\t{m["threshold"]:0.5f}'
# 		text += f'\t{m["f1score"]:0.5f} | '
# 		text += f'\t{m["precision"]:0.5f}'
# 		text += f'\t{m["recall"]:0.5f} | '
# 		text += f'\t{m["sensitivity"]:0.5f}'
# 		text += f'\t{m["specificity"]:0.5f}'
# 		#text += '\n'
# 		print(text)


# 		# ---

# 		gb = site_df[['patient_id', 'laterality', 'cancer', 'raw_pred']].groupby(['patient_id', 'laterality']).mean()
# 		m = compute_metric(gb.raw_pred, gb.cancer)
# 		text = f'{"grouby mean()": <16} [{site_id}]'
# 		text += f'\t{m["auc"]:0.5f}'
# 		text += f'\t{m["threshold"]:0.5f}'
# 		text += f'\t{m["f1score"]:0.5f} | '
# 		text += f'\t{m["precision"]:0.5f}'
# 		text += f'\t{m["recall"]:0.5f} | '
# 		text += f'\t{m["sensitivity"]:0.5f}'
# 		text += f'\t{m["specificity"]:0.5f}'
# 		#text += '\n'
# 		print(text)

# 		# ---
# 		gb = site_df[['patient_id', 'laterality', 'cancer', 'raw_pred']].groupby(['patient_id', 'laterality']).max()
# 		m = compute_metric(gb.raw_pred, gb.cancer)
# 		text = f'{"grouby max()": <16} [{site_id}]'
# 		text += f'\t{m["auc"]:0.5f}'
# 		text += f'\t{m["threshold"]:0.5f}'
# 		text += f'\t{m["f1score"]:0.5f} | '
# 		text += f'\t{m["precision"]:0.5f}'
# 		text += f'\t{m["recall"]:0.5f} | '
# 		text += f'\t{m["sensitivity"]:0.5f}'
# 		text += f'\t{m["specificity"]:0.5f}'
# 		#text += '\n'
# 		print(text)
# 		print(f'--------------\n')


# # valid_df.loc[:, 'cancer_t'] = valid_preds
# print_all_metric(valid_df)

In [18]:
import optuna
from optuna.samplers import TPESampler

In [19]:
def pfbeta_np(labels, preds, beta=1):
    preds = preds.clip(0, 1)
    y_true_count = labels.sum()
    ctp = preds[labels==1].sum()
    cfp = preds[labels==0].sum()
    beta_squared = beta * beta
    c_precision = ctp / (ctp + cfp)
    c_recall = ctp / y_true_count
    if (c_precision > 0 and c_recall > 0):
        result = (1 + beta_squared) * (c_precision * c_recall) / (beta_squared * c_precision + c_recall)
        return result
    else:
        return 0.0

In [20]:

set_seed(1)
out_file = 'swa_model_fold0_5.pth' 
iteration = [
    # 'fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth',
    # 'fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth',
    'fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth',
    'fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth',
    'fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth',
    
    # 'fold0/tf_efficientnetv2_b2_fold_0_model_epoch_10_0.4569_0.259.pth',
    # 'fold0/tf_efficientnetv2_b2_fold_0_model_epoch_6_0.4520_0.128.pth',
    # 'fold0/tf_efficientnetv2_b2_fold_0_model_epoch_7_0.4510_0.266.pth',
    
    # 'fold0/tf_efficientnetv2_b2_fold_0_model_epoch_8_0.4403_0.415.pth',
#     'fold0/tf_efficientnetv2_b2_fold_0_model_epoch_11_0.4387_0.436.pth'
]

criterion = nn.CrossEntropyLoss().to(CFG.device)
best_metric = 0
torch.cuda.empty_cache()
def objective(trial):
#     a1 = 0.036839841333967636 
#     a2 = 0.6490629183820655
#     a3 = 0.3140972402839668
#     a2 = 0.47142151346976024 
#     a3 = 0.3596277792186039
#     a1 = trial.suggest_uniform('a1', 0.01, 0.99)
#     a2 = 1-a1
    a1 = trial.suggest_uniform('a1', 0.001, 0.99)
    # a2 = 1-a1
    a2 = trial.suggest_uniform('a2', 0.0009, 1-a1-0.001)
    a3 = 1-a1-a2
    # a3 = trial.suggest_uniform('a3', 0.00009, 1-a1-a2-0.001)
    # a4 = 1-a1-a2-a3
    # a4 = trial.suggest_loguniform('a4', 0.000009, 1-a1-a2-a3-0.001)
    # a5 = 1-a1-a2-a3-a4
    # a5 = trial.suggest_loguniform('a5', 0.0000009, 1-a1-a2-a3-a4-0.001)
    # a6 = 1-a1-a2-a3-a4-a5
#     a5 = trial.suggest_loguniform('a5', 0.000009, 1-a1-a2-a3-a4-0.001)
#     a6 = trial.suggest_loguniform('a6', 0.0000009, 1-a1-a2-a3-a4-a5-0.001)
#     a7 = 1-a1-a2-a3-a4-a5-a6
    state_dict = None
    for i in iteration:
        f = i
        f = torch.load(f, map_location=lambda storage, loc: storage)
        if state_dict is None:
            print("none: ", i)
            state_dict = f['state_dict']
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = f['state_dict'][k]*a1
        elif i=='fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth':
            print("noob", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a2*f['state_dict'][k]
        elif i=='fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth':
            print("noob", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a3*f['state_dict'][k]
                
        # elif i=='fold0/tf_efficientnetv2_b2_fold_0_model_epoch_6_0.4520_0.128.pth':
        #     print("noob", i)
        #     key = list(f['state_dict'].keys())
        #     for k in key:
        #         state_dict[k] = state_dict[k] + a4*f['state_dict'][k]
        # elif i=='fold0/tf_efficientnetv2_b2_fold_0_model_epoch_7_0.4510_0.266.pth':
        #     print("noob", i)
        #     key = list(f['state_dict'].keys())
        #     for k in key:
        #         state_dict[k] = state_dict[k] + a5*f['state_dict'][k]
        # elif i=='fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth':
        #     print("noob", i)
        #     key = list(f['state_dict'].keys())
        #     for k in key:
        #         state_dict[k] = state_dict[k] + a6*f['state_dict'][k]
    print(a1, a2, a3)
    # for k in key:
    #     state_dict[k] = state_dict[k] / len(iteration)
    print('')

    # print(out_file)
    torch.save({'state_dict': state_dict}, out_file)

    model = Model(model_name=CFG.model_name).to(CFG.device)
    checkpoint = torch.load("swa_model_fold0_5.pth")
    model.load_state_dict(checkpoint['state_dict'])
#     model = nn.DataParallel(model)

    loss_valid, valid_preds, _ = valid_fn_two(valid_loader, model, criterion, CFG.device)
    valid_preds = valid_preds[:, 1]
    valid_df['prediction_id'] = valid_df['patient_id'].astype(str) + '_' + valid_df['laterality'].astype(str)
    valid_preds = np.array(valid_preds).flatten()
    
    valid_df['raw_pred'] = valid_preds
    LOGGER.info(f"Valid loss:{loss_valid:.4f}")
    grp_df = valid_df.groupby('prediction_id')['raw_pred', 'cancer'].mean()
    grp_df['cancer'] = grp_df['cancer'].astype(np.int)
    valid_labels_mean = grp_df['cancer'].values
    valid_preds_mean = grp_df['raw_pred'].values
    # print(valid_labels[:5], valid_preds_mean[:5])
    val_metric_mean = pfbeta(valid_labels_mean, valid_preds_mean)
    LOGGER.info(f"Val metric mean prob: {val_metric_mean:.4f}")
    best_metric_mean_at_epoch = 0
    best_metric = 0
    
    best_threshold_mean = 0
    best_auc = 0
    best_cf = None
    for i in np.arange(0.001, 0.599, 0.001):
        valid_argmax = (valid_preds_mean>i).astype(np.int32)
        val_metric = pfbeta_np(valid_labels_mean, valid_argmax)
        val_acc = accuracy_score(valid_labels_mean, valid_argmax)
        val_f1 = f1_score(valid_labels_mean, valid_argmax)
        val_auc = roc_auc_score(valid_labels_mean, valid_argmax)
        cf = confusion_matrix(valid_labels_mean, valid_argmax)
        if val_metric> best_metric:
            best_metric = val_metric
            # best_metric_mean_at_epoch = val_metric
            best_threshold_mean = i
            best_auc = val_auc
            best_cf = cf
    if best_metric>0.5269:
        state = {'state_dict': model.state_dict()}
        path = f'swa_{CFG.model_name}_fold_{fold}_model_{best_metric:.4f}_{best_threshold_mean:.4f}.pth'
        torch.save(state, path)
    
    LOGGER.info(f"Best metric at: {best_metric:.4f} {best_threshold_mean:.4f}  {best_auc:.4f}")
    LOGGER.info(f"Cf: {best_cf}")
    return best_metric

study = optuna.create_study(direction='maximize', sampler = TPESampler(seed=777))
study.optimize(func=objective, n_trials=1000)
study.best_params
# # 0.5563409550491111 0.4436590449508889 fold 0
# # 0.12634002523631388 0.8351954705276587 0.03846450423602743 0.5393 
# # 0.583301614081906 0.3673525472043472 0.04934583871374687 fold 2 0.50
# # 0.1689507073116359 0.47142151346976024 0.3596277792186039 fold 2 0.5055 0.5055 0.3670  0.7261

[32m[I 2023-02-24 11:39:17,949][0m A new study created in memory with name: no-name-07e7c83a-a934-4eff-85bc-5d66b29fd75f[0m


> SEEDING DONE
none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.15198443381740748 0.25672863401172846 0.591286932170864



Val: 100%|██████████| 229/229 [04:49<00:00,  1.26s/it, eval_loss=0.0838, gpu_mem=10.07 GB]
Valid loss:0.0838
Val metric mean prob: 0.2287
Best metric at: 0.4857 0.2910  0.7487
Cf: [[4607   59]
 [  49   51]]
[32m[I 2023-02-24 11:44:16,782][0m Trial 0 finished with value: 0.4857142857142857 and parameters: {'a1': 0.15198443381740748, 'a2': 0.25672863401172846}. Best is trial 0 with value: 0.4857142857142857.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.06235401415270182 0.43121246871095986 0.5064335171363383



Val: 100%|██████████| 229/229 [04:29<00:00,  1.18s/it, eval_loss=0.0838, gpu_mem=10.07 GB]
Valid loss:0.0838
Val metric mean prob: 0.2540
Best metric at: 0.4804 0.3600  0.7111
Cf: [[4630   36]
 [  57   43]]
[32m[I 2023-02-24 11:48:55,690][0m Trial 1 finished with value: 0.4804469273743017 and parameters: {'a1': 0.06235401415270182, 'a2': 0.43121246871095986}. Best is trial 0 with value: 0.4857142857142857.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.8270655972088143 0.15944838645109546 0.013486016340090196



Val: 100%|██████████| 229/229 [04:28<00:00,  1.17s/it, eval_loss=0.1087, gpu_mem=10.07 GB]
Valid loss:0.1087
Val metric mean prob: 0.1974
Best metric at: 0.5116 0.4940  0.7170
Cf: [[4638   28]
 [  56   44]]
[32m[I 2023-02-24 11:53:33,890][0m Trial 2 finished with value: 0.5116279069767442 and parameters: {'a1': 0.8270655972088143, 'a2': 0.15944838645109546}. Best is trial 2 with value: 0.5116279069767442.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.7199921054915603 0.21462486621604113 0.06538302829239853



Val: 100%|██████████| 229/229 [04:30<00:00,  1.18s/it, eval_loss=0.1068, gpu_mem=10.07 GB]
Valid loss:0.1068
Val metric mean prob: 0.1930
Best metric at: 0.5202 0.4740  0.7220
Cf: [[4638   28]
 [  55   45]]
[32m[I 2023-02-24 11:58:16,187][0m Trial 3 finished with value: 0.5202312138728323 and parameters: {'a1': 0.7199921054915603, 'a2': 0.21462486621604113}. Best is trial 3 with value: 0.5202312138728323.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.2672438107409991 0.4715927941741754 0.2611633950848255



Val: 100%|██████████| 229/229 [04:30<00:00,  1.18s/it, eval_loss=0.0893, gpu_mem=10.07 GB]
Valid loss:0.0893
Val metric mean prob: 0.2304
Best metric at: 0.4854 0.3390  0.7440
Cf: [[4610   56]
 [  50   50]]
[32m[I 2023-02-24 12:02:56,402][0m Trial 4 finished with value: 0.4854368932038835 and parameters: {'a1': 0.2672438107409991, 'a2': 0.4715927941741754}. Best is trial 3 with value: 0.5202312138728323.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.09334615100410686 0.07299611171763565 0.8336577372782575



Val: 100%|██████████| 229/229 [04:31<00:00,  1.18s/it, eval_loss=0.0812, gpu_mem=10.07 GB]
Valid loss:0.0812
Val metric mean prob: 0.2208
Best metric at: 0.4804 0.2570  0.7391
Cf: [[4611   55]
 [  51   49]]
[32m[I 2023-02-24 12:07:37,032][0m Trial 5 finished with value: 0.4803921568627451 and parameters: {'a1': 0.09334615100410686, 'a2': 0.07299611171763565}. Best is trial 3 with value: 0.5202312138728323.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.5841280021678498 0.1430333685445009 0.27283862928764935



Val: 100%|██████████| 229/229 [04:47<00:00,  1.25s/it, eval_loss=0.1024, gpu_mem=10.07 GB]
Valid loss:0.1024
Val metric mean prob: 0.1842
Best metric at: 0.5085 0.4250  0.7216
Cf: [[4634   32]
 [  55   45]]
[32m[I 2023-02-24 12:12:33,761][0m Trial 6 finished with value: 0.5084745762711864 and parameters: {'a1': 0.5841280021678498, 'a2': 0.1430333685445009}. Best is trial 3 with value: 0.5202312138728323.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.9789985109384551 0.01286657109614868 0.008134917965396185



Val: 100%|██████████| 229/229 [04:48<00:00,  1.26s/it, eval_loss=0.1123, gpu_mem=10.07 GB]
Valid loss:0.1123
Val metric mean prob: 0.2041
Best metric at: 0.4884 0.5070  0.7068
Cf: [[4636   30]
 [  58   42]]
[32m[I 2023-02-24 12:17:32,027][0m Trial 7 finished with value: 0.4883720930232558 and parameters: {'a1': 0.9789985109384551, 'a2': 0.01286657109614868}. Best is trial 3 with value: 0.5202312138728323.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6752797077033628 0.1791797060334395 0.14554058626319774



Val: 100%|██████████| 229/229 [04:29<00:00,  1.18s/it, eval_loss=0.1056, gpu_mem=10.07 GB]
Valid loss:0.1056
Val metric mean prob: 0.1889
Best metric at: 0.5172 0.4530  0.7219
Cf: [[4637   29]
 [  55   45]]
[32m[I 2023-02-24 12:22:11,250][0m Trial 8 finished with value: 0.5172413793103449 and parameters: {'a1': 0.6752797077033628, 'a2': 0.1791797060334395}. Best is trial 3 with value: 0.5202312138728323.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.2669025972514883 0.27382629391412533 0.45927110883438643



Val: 100%|██████████| 229/229 [04:31<00:00,  1.19s/it, eval_loss=0.0879, gpu_mem=10.07 GB]
Valid loss:0.0879
Val metric mean prob: 0.2151
Best metric at: 0.4898 0.3390  0.7349
Cf: [[4618   48]
 [  52   48]]
[32m[I 2023-02-24 12:26:52,279][0m Trial 9 finished with value: 0.4897959183673469 and parameters: {'a1': 0.2669025972514883, 'a2': 0.27382629391412533}. Best is trial 3 with value: 0.5202312138728323.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.4561712336624775 0.35094144785237463 0.1928873184851479



Val: 100%|██████████| 229/229 [04:34<00:00,  1.20s/it, eval_loss=0.0976, gpu_mem=10.07 GB]
Valid loss:0.0976
Val metric mean prob: 0.2026
Best metric at: 0.5081 0.4090  0.7309
Cf: [[4628   38]
 [  53   47]]
[32m[I 2023-02-24 12:31:36,056][0m Trial 10 finished with value: 0.508108108108108 and parameters: {'a1': 0.4561712336624775, 'a2': 0.35094144785237463}. Best is trial 3 with value: 0.5202312138728323.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6876988626090789 0.1763789844093658 0.13592215298155533



Val: 100%|██████████| 229/229 [04:31<00:00,  1.19s/it, eval_loss=0.1059, gpu_mem=10.07 GB]
Valid loss:0.1059
Val metric mean prob: 0.1893
Best metric at: 0.5198 0.4550  0.7267
Cf: [[4635   31]
 [  54   46]]
[32m[I 2023-02-24 12:36:17,163][0m Trial 11 finished with value: 0.5197740112994351 and parameters: {'a1': 0.6876988626090789, 'a2': 0.1763789844093658}. Best is trial 3 with value: 0.5202312138728323.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.7283056933387498 0.10900841903423822 0.16268588762701194



Val: 100%|██████████| 229/229 [04:30<00:00,  1.18s/it, eval_loss=0.1065, gpu_mem=10.07 GB]
Valid loss:0.1065
Val metric mean prob: 0.1880
Best metric at: 0.5169 0.4520  0.7266
Cf: [[4634   32]
 [  54   46]]
[32m[I 2023-02-24 12:40:57,251][0m Trial 12 finished with value: 0.5168539325842696 and parameters: {'a1': 0.7283056933387498, 'a2': 0.10900841903423822}. Best is trial 3 with value: 0.5202312138728323.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.4638432854542873 0.2315173954009313 0.3046393191447814



Val: 100%|██████████| 229/229 [04:37<00:00,  1.21s/it, eval_loss=0.0974, gpu_mem=10.07 GB]
Valid loss:0.0974
Val metric mean prob: 0.1934
Best metric at: 0.5081 0.4000  0.7309
Cf: [[4628   38]
 [  53   47]]
[32m[I 2023-02-24 12:45:44,604][0m Trial 13 finished with value: 0.508108108108108 and parameters: {'a1': 0.4638432854542873, 'a2': 0.2315173954009313}. Best is trial 3 with value: 0.5202312138728323.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.8819066755985823 0.009230384682945465 0.10886293971847227



Val: 100%|██████████| 229/229 [04:30<00:00,  1.18s/it, eval_loss=0.1089, gpu_mem=10.07 GB]
Valid loss:0.1089
Val metric mean prob: 0.1970
Best metric at: 0.4859 0.4850  0.7114
Cf: [[4632   34]
 [  57   43]]
[32m[I 2023-02-24 12:50:24,587][0m Trial 14 finished with value: 0.48587570621468923 and parameters: {'a1': 0.8819066755985823, 'a2': 0.009230384682945465}. Best is trial 3 with value: 0.5202312138728323.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6196615168494861 0.20957100013457192 0.17076748301594197



Val: 100%|██████████| 229/229 [04:32<00:00,  1.19s/it, eval_loss=0.1041, gpu_mem=10.07 GB]
Valid loss:0.1041
Val metric mean prob: 0.1890
Best metric at: 0.5087 0.4520  0.7169
Cf: [[4637   29]
 [  56   44]]
[32m[I 2023-02-24 12:55:06,530][0m Trial 15 finished with value: 0.508670520231214 and parameters: {'a1': 0.6196615168494861, 'a2': 0.20957100013457192}. Best is trial 3 with value: 0.5202312138728323.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.7464410803082124 0.12082818993370192 0.13273072975808564



Val: 100%|██████████| 229/229 [04:29<00:00,  1.18s/it, eval_loss=0.1070, gpu_mem=10.07 GB]
Valid loss:0.1070
Val metric mean prob: 0.1899
Best metric at: 0.5169 0.4590  0.7266
Cf: [[4634   32]
 [  54   46]]
[32m[I 2023-02-24 12:59:45,737][0m Trial 16 finished with value: 0.5168539325842696 and parameters: {'a1': 0.7464410803082124, 'a2': 0.12082818993370192}. Best is trial 3 with value: 0.5202312138728323.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.5551085513925993 0.28858937027296483 0.15630207833443588



Val: 100%|██████████| 229/229 [04:31<00:00,  1.18s/it, eval_loss=0.1019, gpu_mem=10.07 GB]
Valid loss:0.1019
Val metric mean prob: 0.1942
Best metric at: 0.5222 0.4320  0.7315
Cf: [[4633   33]
 [  53   47]]
[32m[I 2023-02-24 13:04:26,332][0m Trial 17 finished with value: 0.5222222222222221 and parameters: {'a1': 0.5551085513925993, 'a2': 0.28858937027296483}. Best is trial 17 with value: 0.5222222222222221.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.5282118020373872 0.30105242091330753 0.17073577704930532



Val: 100%|██████████| 229/229 [04:31<00:00,  1.19s/it, eval_loss=0.1008, gpu_mem=10.07 GB]
Valid loss:0.1008
Val metric mean prob: 0.1957
Best metric at: 0.5222 0.4320  0.7315
Cf: [[4633   33]
 [  53   47]]
[32m[I 2023-02-24 13:09:07,474][0m Trial 18 finished with value: 0.5222222222222221 and parameters: {'a1': 0.5282118020373872, 'a2': 0.30105242091330753}. Best is trial 17 with value: 0.5222222222222221.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.5309916569158131 0.31150839445855893 0.157499948625628



Val: 100%|██████████| 229/229 [04:29<00:00,  1.18s/it, eval_loss=0.1009, gpu_mem=10.07 GB]
Valid loss:0.1009
Val metric mean prob: 0.1963
Best metric at: 0.5193 0.4310  0.7314
Cf: [[4632   34]
 [  53   47]]
[32m[I 2023-02-24 13:13:47,112][0m Trial 19 finished with value: 0.5193370165745855 and parameters: {'a1': 0.5309916569158131, 'a2': 0.31150839445855893}. Best is trial 17 with value: 0.5222222222222221.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.39602955584156474 0.5921852237048515 0.011785220453583767



Val: 100%|██████████| 229/229 [04:33<00:00,  1.20s/it, eval_loss=0.0952, gpu_mem=10.07 GB]
Valid loss:0.0952
Val metric mean prob: 0.2253
Best metric at: 0.4970 0.4560  0.7071
Cf: [[4639   27]
 [  58   42]]
[32m[I 2023-02-24 13:18:30,677][0m Trial 20 finished with value: 0.4970414201183433 and parameters: {'a1': 0.39602955584156474, 'a2': 0.5921852237048515}. Best is trial 17 with value: 0.5222222222222221.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.5748382553337678 0.2225159394968799 0.20264580516935235



Val: 100%|██████████| 229/229 [04:30<00:00,  1.18s/it, eval_loss=0.1025, gpu_mem=10.07 GB]
Valid loss:0.1025
Val metric mean prob: 0.1895
Best metric at: 0.5111 0.4280  0.7264
Cf: [[4632   34]
 [  54   46]]
[32m[I 2023-02-24 13:23:10,991][0m Trial 21 finished with value: 0.5111111111111112 and parameters: {'a1': 0.5748382553337678, 'a2': 0.2225159394968799}. Best is trial 17 with value: 0.5222222222222221.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6239622382426943 0.19665564397567842 0.17938211778162733



Val: 100%|██████████| 229/229 [04:32<00:00,  1.19s/it, eval_loss=0.1042, gpu_mem=10.07 GB]
Valid loss:0.1042
Val metric mean prob: 0.1883
Best metric at: 0.5087 0.4510  0.7169
Cf: [[4637   29]
 [  56   44]]
[32m[I 2023-02-24 13:27:53,025][0m Trial 22 finished with value: 0.508670520231214 and parameters: {'a1': 0.6239622382426943, 'a2': 0.19665564397567842}. Best is trial 17 with value: 0.5222222222222221.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.4868127805898678 0.29264739789746264 0.2205398215126695



Val: 100%|██████████| 229/229 [04:30<00:00,  1.18s/it, eval_loss=0.0988, gpu_mem=10.07 GB]
Valid loss:0.0988
Val metric mean prob: 0.1967
Best metric at: 0.5109 0.4170  0.7310
Cf: [[4629   37]
 [  53   47]]
[32m[I 2023-02-24 13:32:32,729][0m Trial 23 finished with value: 0.5108695652173914 and parameters: {'a1': 0.4868127805898678, 'a2': 0.29264739789746264}. Best is trial 17 with value: 0.5222222222222221.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.7840012236450524 0.08209382449652852 0.13390495185841905



Val: 100%|██████████| 229/229 [04:32<00:00,  1.19s/it, eval_loss=0.1075, gpu_mem=10.07 GB]
Valid loss:0.1075
Val metric mean prob: 0.1910
Best metric at: 0.4974 0.4500  0.7305
Cf: [[4624   42]
 [  53   47]]
[32m[I 2023-02-24 13:37:15,284][0m Trial 24 finished with value: 0.4973544973544973 and parameters: {'a1': 0.7840012236450524, 'a2': 0.08209382449652852}. Best is trial 17 with value: 0.5222222222222221.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6471627140262406 0.24676138042650175 0.10607590554725765



Val: 100%|██████████| 229/229 [04:30<00:00,  1.18s/it, eval_loss=0.1051, gpu_mem=10.07 GB]
Valid loss:0.1051
Val metric mean prob: 0.1919
Best metric at: 0.5172 0.4550  0.7219
Cf: [[4637   29]
 [  55   45]]
[32m[I 2023-02-24 13:41:55,211][0m Trial 25 finished with value: 0.5172413793103449 and parameters: {'a1': 0.6471627140262406, 'a2': 0.24676138042650175}. Best is trial 17 with value: 0.5222222222222221.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.5418845541150757 0.2170452344460431 0.24107021143888116



Val: 100%|██████████| 229/229 [04:32<00:00,  1.19s/it, eval_loss=0.1011, gpu_mem=10.07 GB]
Valid loss:0.1011
Val metric mean prob: 0.1895
Best metric at: 0.5165 0.4220  0.7312
Cf: [[4631   35]
 [  53   47]]
[32m[I 2023-02-24 13:46:37,831][0m Trial 26 finished with value: 0.5164835164835165 and parameters: {'a1': 0.5418845541150757, 'a2': 0.2170452344460431}. Best is trial 17 with value: 0.5222222222222221.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.40100975728992605 0.3454531766082324 0.25353706610184157



Val: 100%|██████████| 229/229 [04:30<00:00,  1.18s/it, eval_loss=0.0948, gpu_mem=10.07 GB]
Valid loss:0.0948
Val metric mean prob: 0.2063
Best metric at: 0.5029 0.4150  0.7167
Cf: [[4635   31]
 [  56   44]]
[32m[I 2023-02-24 13:51:17,932][0m Trial 27 finished with value: 0.5028571428571429 and parameters: {'a1': 0.40100975728992605, 'a2': 0.3454531766082324}. Best is trial 17 with value: 0.5222222222222221.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.7055907819654187 0.1984373196832644 0.09597189835131692



Val: 100%|██████████| 229/229 [04:31<00:00,  1.19s/it, eval_loss=0.1064, gpu_mem=10.07 GB]
Valid loss:0.1064
Val metric mean prob: 0.1913
Best metric at: 0.5227 0.4660  0.7268
Cf: [[4636   30]
 [  54   46]]
[32m[I 2023-02-24 13:55:58,996][0m Trial 28 finished with value: 0.5227272727272727 and parameters: {'a1': 0.7055907819654187, 'a2': 0.1984373196832644}. Best is trial 28 with value: 0.5227272727272727.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.5827614361189276 0.2543995363721063 0.16283902750896606



Val: 100%|██████████| 229/229 [04:31<00:00,  1.19s/it, eval_loss=0.1029, gpu_mem=10.07 GB]
Valid loss:0.1029
Val metric mean prob: 0.1916
Best metric at: 0.5111 0.4330  0.7264
Cf: [[4632   34]
 [  54   46]]
[32m[I 2023-02-24 14:00:40,498][0m Trial 29 finished with value: 0.5111111111111112 and parameters: {'a1': 0.5827614361189276, 'a2': 0.2543995363721063}. Best is trial 28 with value: 0.5227272727272727.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6693802093814523 0.1926514737306175 0.13796831688793024



Val: 100%|██████████| 229/229 [04:30<00:00,  1.18s/it, eval_loss=0.1055, gpu_mem=10.07 GB]
Valid loss:0.1055
Val metric mean prob: 0.1894
Best metric at: 0.5172 0.4520  0.7219
Cf: [[4637   29]
 [  55   45]]
[32m[I 2023-02-24 14:05:20,495][0m Trial 30 finished with value: 0.5172413793103449 and parameters: {'a1': 0.6693802093814523, 'a2': 0.1926514737306175}. Best is trial 28 with value: 0.5227272727272727.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.7255632059461911 0.15415804951523257 0.12027874453857637



Val: 100%|██████████| 229/229 [04:32<00:00,  1.19s/it, eval_loss=0.1067, gpu_mem=10.07 GB]
Valid loss:0.1067
Val metric mean prob: 0.1901
Best metric at: 0.5198 0.4630  0.7267
Cf: [[4635   31]
 [  54   46]]
[32m[I 2023-02-24 14:10:02,656][0m Trial 31 finished with value: 0.5197740112994351 and parameters: {'a1': 0.7255632059461911, 'a2': 0.15415804951523257}. Best is trial 28 with value: 0.5227272727272727.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.7908258666671129 0.13466510906504978 0.07450902426783737



Val: 100%|██████████| 229/229 [04:32<00:00,  1.19s/it, eval_loss=0.1079, gpu_mem=10.07 GB]
Valid loss:0.1079
Val metric mean prob: 0.1937
Best metric at: 0.5000 0.4930  0.7072
Cf: [[4640   26]
 [  58   42]]
[32m[I 2023-02-24 14:14:44,361][0m Trial 32 finished with value: 0.5 and parameters: {'a1': 0.7908258666671129, 'a2': 0.13466510906504978}. Best is trial 28 with value: 0.5227272727272727.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.8430048962701027 0.05450574697557872 0.10248935675431861



Val: 100%|██████████| 229/229 [04:31<00:00,  1.19s/it, eval_loss=0.1084, gpu_mem=10.07 GB]
Valid loss:0.1084
Val metric mean prob: 0.1950
Best metric at: 0.4943 0.4860  0.7117
Cf: [[4635   31]
 [  57   43]]
[32m[I 2023-02-24 14:19:25,899][0m Trial 33 finished with value: 0.49425287356321834 and parameters: {'a1': 0.8430048962701027, 'a2': 0.05450574697557872}. Best is trial 28 with value: 0.5227272727272727.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6236456327508684 0.23326717607670286 0.14308719117242877



Val: 100%|██████████| 229/229 [04:32<00:00,  1.19s/it, eval_loss=0.1043, gpu_mem=10.07 GB]
Valid loss:0.1043
Val metric mean prob: 0.1906
Best metric at: 0.5114 0.4490  0.7217
Cf: [[4635   31]
 [  55   45]]
[32m[I 2023-02-24 14:24:08,192][0m Trial 34 finished with value: 0.5113636363636364 and parameters: {'a1': 0.6236456327508684, 'a2': 0.23326717607670286}. Best is trial 28 with value: 0.5227272727272727.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.0014064481955573527 0.39690969715410673 0.6016838546503359



Val: 100%|██████████| 229/229 [04:33<00:00,  1.20s/it, eval_loss=0.0833, gpu_mem=10.07 GB]
Valid loss:0.0833
Val metric mean prob: 0.2582
Best metric at: 0.4780 0.2860  0.7390
Cf: [[4610   56]
 [  51   49]]
[32m[I 2023-02-24 14:28:51,565][0m Trial 35 finished with value: 0.47804878048780486 and parameters: {'a1': 0.0014064481955573527, 'a2': 0.39690969715410673}. Best is trial 28 with value: 0.5227272727272727.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.5379071130026845 0.2699131557086035 0.19217973128871202



Val: 100%|██████████| 229/229 [04:33<00:00,  1.19s/it, eval_loss=0.1011, gpu_mem=10.07 GB]
Valid loss:0.1011
Val metric mean prob: 0.1932
Best metric at: 0.5222 0.4300  0.7315
Cf: [[4633   33]
 [  53   47]]
[32m[I 2023-02-24 14:33:34,536][0m Trial 36 finished with value: 0.5222222222222221 and parameters: {'a1': 0.5379071130026845, 'a2': 0.2699131557086035}. Best is trial 28 with value: 0.5227272727272727.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.512759847300014 0.2613484800396575 0.2258916726603285



Val: 100%|██████████| 229/229 [04:33<00:00,  1.19s/it, eval_loss=0.0999, gpu_mem=10.07 GB]
Valid loss:0.0999
Val metric mean prob: 0.1933
Best metric at: 0.5137 0.4190  0.7311
Cf: [[4630   36]
 [  53   47]]
[32m[I 2023-02-24 14:38:17,223][0m Trial 37 finished with value: 0.5136612021857923 and parameters: {'a1': 0.512759847300014, 'a2': 0.2613484800396575}. Best is trial 28 with value: 0.5227272727272727.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.5450842011418318 0.2777881871626844 0.17712761169548386



Val: 100%|██████████| 229/229 [04:31<00:00,  1.19s/it, eval_loss=0.1015, gpu_mem=10.07 GB]
Valid loss:0.1015
Val metric mean prob: 0.1936
Best metric at: 0.5222 0.4310  0.7315
Cf: [[4633   33]
 [  53   47]]
[32m[I 2023-02-24 14:42:58,631][0m Trial 38 finished with value: 0.5222222222222221 and parameters: {'a1': 0.5450842011418318, 'a2': 0.2777881871626844}. Best is trial 28 with value: 0.5227272727272727.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.3993034133535204 0.33070532726146706 0.26999125938501256



Val: 100%|██████████| 229/229 [04:31<00:00,  1.18s/it, eval_loss=0.0946, gpu_mem=10.07 GB]
Valid loss:0.0946
Val metric mean prob: 0.2054
Best metric at: 0.5000 0.4140  0.7166
Cf: [[4634   32]
 [  56   44]]
[32m[I 2023-02-24 14:47:39,179][0m Trial 39 finished with value: 0.5 and parameters: {'a1': 0.3993034133535204, 'a2': 0.33070532726146706}. Best is trial 28 with value: 0.5227272727272727.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.5947227949367027 0.2373574675755724 0.16791973748772493



Val: 100%|██████████| 229/229 [04:52<00:00,  1.28s/it, eval_loss=0.1033, gpu_mem=10.07 GB]
Valid loss:0.1033
Val metric mean prob: 0.1905
Best metric at: 0.5085 0.4360  0.7216
Cf: [[4634   32]
 [  55   45]]
[32m[I 2023-02-24 14:52:41,637][0m Trial 40 finished with value: 0.5084745762711864 and parameters: {'a1': 0.5947227949367027, 'a2': 0.2373574675755724}. Best is trial 28 with value: 0.5227272727272727.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.5460562017603949 0.278730942386057 0.17521285585354807



Val: 100%|██████████| 229/229 [04:30<00:00,  1.18s/it, eval_loss=0.1015, gpu_mem=10.07 GB]
Valid loss:0.1015
Val metric mean prob: 0.1937
Best metric at: 0.5222 0.4310  0.7315
Cf: [[4633   33]
 [  53   47]]
[32m[I 2023-02-24 14:57:21,849][0m Trial 41 finished with value: 0.5222222222222221 and parameters: {'a1': 0.5460562017603949, 'a2': 0.278730942386057}. Best is trial 28 with value: 0.5227272727272727.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.49650830311430344 0.29431224471087947 0.20917945217481715



Val: 100%|██████████| 229/229 [04:33<00:00,  1.19s/it, eval_loss=0.0993, gpu_mem=10.07 GB]
Valid loss:0.0993
Val metric mean prob: 0.1963
Best metric at: 0.5109 0.4200  0.7310
Cf: [[4629   37]
 [  53   47]]
[32m[I 2023-02-24 15:02:05,042][0m Trial 42 finished with value: 0.5108695652173914 and parameters: {'a1': 0.49650830311430344, 'a2': 0.29431224471087947}. Best is trial 28 with value: 0.5227272727272727.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6505148403805285 0.2598395285854341 0.08964563103403739



Val: 100%|██████████| 229/229 [04:31<00:00,  1.18s/it, eval_loss=0.1052, gpu_mem=10.07 GB]
Valid loss:0.1052
Val metric mean prob: 0.1928
Best metric at: 0.5257 0.4560  0.7269
Cf: [[4637   29]
 [  54   46]]
[32m[I 2023-02-24 15:06:45,580][0m Trial 43 finished with value: 0.5257142857142858 and parameters: {'a1': 0.6505148403805285, 'a2': 0.2598395285854341}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6693480876169364 0.24766131233481972 0.08299060004824385



Val: 100%|██████████| 229/229 [04:31<00:00,  1.19s/it, eval_loss=0.1057, gpu_mem=10.07 GB]
Valid loss:0.1057
Val metric mean prob: 0.1926
Best metric at: 0.5257 0.4610  0.7269
Cf: [[4637   29]
 [  54   46]]
[32m[I 2023-02-24 15:11:27,190][0m Trial 44 finished with value: 0.5257142857142858 and parameters: {'a1': 0.6693480876169364, 'a2': 0.24766131233481972}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6877619981693374 0.2459043487537935 0.06633365307686914



Val: 100%|██████████| 229/229 [04:30<00:00,  1.18s/it, eval_loss=0.1061, gpu_mem=10.07 GB]
Valid loss:0.1061
Val metric mean prob: 0.1932
Best metric at: 0.5202 0.4670  0.7220
Cf: [[4638   28]
 [  55   45]]
[32m[I 2023-02-24 15:16:07,348][0m Trial 45 finished with value: 0.5202312138728323 and parameters: {'a1': 0.6877619981693374, 'a2': 0.2459043487537935}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6405789673570168 0.258377662519918 0.10104337012306519



Val: 100%|██████████| 229/229 [04:32<00:00,  1.19s/it, eval_loss=0.1049, gpu_mem=10.07 GB]
Valid loss:0.1049
Val metric mean prob: 0.1925
Best metric at: 0.5172 0.4560  0.7219
Cf: [[4637   29]
 [  55   45]]
[32m[I 2023-02-24 15:20:49,004][0m Trial 46 finished with value: 0.5172413793103449 and parameters: {'a1': 0.6405789673570168, 'a2': 0.258377662519918}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6922359632939046 0.22213533091865906 0.08562870578743637



Val: 100%|██████████| 229/229 [04:39<00:00,  1.22s/it, eval_loss=0.1062, gpu_mem=10.07 GB]
Valid loss:0.1062
Val metric mean prob: 0.1920
Best metric at: 0.5257 0.4650  0.7269
Cf: [[4637   29]
 [  54   46]]
[32m[I 2023-02-24 15:25:38,381][0m Trial 47 finished with value: 0.5257142857142858 and parameters: {'a1': 0.6922359632939046, 'a2': 0.22213533091865906}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6975086905386345 0.22281556832545193 0.07967574113591355



Val: 100%|██████████| 229/229 [04:32<00:00,  1.19s/it, eval_loss=0.1063, gpu_mem=10.07 GB]
Valid loss:0.1063
Val metric mean prob: 0.1923
Best metric at: 0.5202 0.4690  0.7220
Cf: [[4638   28]
 [  55   45]]
[32m[I 2023-02-24 15:30:20,124][0m Trial 48 finished with value: 0.5202312138728323 and parameters: {'a1': 0.6975086905386345, 'a2': 0.22281556832545193}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.7437491950800323 0.20227839262559313 0.05397241229437452



Val: 100%|██████████| 229/229 [04:32<00:00,  1.19s/it, eval_loss=0.1072, gpu_mem=10.07 GB]
Valid loss:0.1072
Val metric mean prob: 0.1936
Best metric at: 0.5140 0.4650  0.7265
Cf: [[4633   33]
 [  54   46]]
[32m[I 2023-02-24 15:35:02,059][0m Trial 49 finished with value: 0.5139664804469275 and parameters: {'a1': 0.7437491950800323, 'a2': 0.20227839262559313}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6671165496236314 0.17538287890649332 0.15750057146987526



Val: 100%|██████████| 229/229 [04:33<00:00,  1.19s/it, eval_loss=0.1054, gpu_mem=10.07 GB]
Valid loss:0.1054
Val metric mean prob: 0.1883
Best metric at: 0.5146 0.4590  0.7171
Cf: [[4639   27]
 [  56   44]]
[32m[I 2023-02-24 15:39:44,753][0m Trial 50 finished with value: 0.5146198830409356 and parameters: {'a1': 0.6671165496236314, 'a2': 0.17538287890649332}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.5923270732503344 0.23049043533977773 0.17718249140988787



Val: 100%|██████████| 229/229 [04:30<00:00,  1.18s/it, eval_loss=0.1032, gpu_mem=10.07 GB]
Valid loss:0.1032
Val metric mean prob: 0.1900
Best metric at: 0.5085 0.4350  0.7216
Cf: [[4634   32]
 [  55   45]]
[32m[I 2023-02-24 15:44:25,316][0m Trial 51 finished with value: 0.5084745762711864 and parameters: {'a1': 0.5923270732503344, 'a2': 0.23049043533977773}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.7177615561706523 0.20493773436490337 0.07730070946444434



Val: 100%|██████████| 229/229 [04:30<00:00,  1.18s/it, eval_loss=0.1067, gpu_mem=10.07 GB]
Valid loss:0.1067
Val metric mean prob: 0.1923
Best metric at: 0.5198 0.4670  0.7267
Cf: [[4635   31]
 [  54   46]]
[32m[I 2023-02-24 15:49:05,903][0m Trial 52 finished with value: 0.5197740112994351 and parameters: {'a1': 0.7177615561706523, 'a2': 0.20493773436490337}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.647734051578119 0.24504483825496334 0.10722111016691763



Val: 100%|██████████| 229/229 [04:31<00:00,  1.19s/it, eval_loss=0.1051, gpu_mem=10.07 GB]
Valid loss:0.1051
Val metric mean prob: 0.1918
Best metric at: 0.5172 0.4550  0.7219
Cf: [[4637   29]
 [  55   45]]
[32m[I 2023-02-24 15:53:46,878][0m Trial 53 finished with value: 0.5172413793103449 and parameters: {'a1': 0.647734051578119, 'a2': 0.24504483825496334}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.760007630113453 0.21686166611251861 0.023130703774028427



Val: 100%|██████████| 229/229 [04:31<00:00,  1.19s/it, eval_loss=0.1076, gpu_mem=10.07 GB]
Valid loss:0.1076
Val metric mean prob: 0.1953
Best metric at: 0.5140 0.4700  0.7265
Cf: [[4633   33]
 [  54   46]]
[32m[I 2023-02-24 15:58:28,026][0m Trial 54 finished with value: 0.5139664804469275 and parameters: {'a1': 0.760007630113453, 'a2': 0.21686166611251861}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.7000889392292254 0.18824626171778647 0.1116647990529881



Val: 100%|██████████| 229/229 [04:32<00:00,  1.19s/it, eval_loss=0.1063, gpu_mem=10.07 GB]
Valid loss:0.1063
Val metric mean prob: 0.1905
Best metric at: 0.5202 0.4650  0.7220
Cf: [[4638   28]
 [  55   45]]
[32m[I 2023-02-24 16:03:09,943][0m Trial 55 finished with value: 0.5202312138728323 and parameters: {'a1': 0.7000889392292254, 'a2': 0.18824626171778647}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6598846198914557 0.16616486633796812 0.17395051377057613



Val: 100%|██████████| 229/229 [04:32<00:00,  1.19s/it, eval_loss=0.1052, gpu_mem=10.07 GB]
Valid loss:0.1052
Val metric mean prob: 0.1875
Best metric at: 0.5146 0.4560  0.7171
Cf: [[4639   27]
 [  56   44]]
[32m[I 2023-02-24 16:07:51,837][0m Trial 56 finished with value: 0.5146198830409356 and parameters: {'a1': 0.6598846198914557, 'a2': 0.16616486633796812}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.5944421083737955 0.267153294604393 0.13840459702181146



Val: 100%|██████████| 229/229 [04:32<00:00,  1.19s/it, eval_loss=0.1034, gpu_mem=10.07 GB]
Valid loss:0.1034
Val metric mean prob: 0.1925
Best metric at: 0.5114 0.4490  0.7217
Cf: [[4635   31]
 [  55   45]]
[32m[I 2023-02-24 16:12:34,361][0m Trial 57 finished with value: 0.5113636363636364 and parameters: {'a1': 0.5944421083737955, 'a2': 0.267153294604393}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.614396391232961 0.24875109814359703 0.136852510623442



Val: 100%|██████████| 229/229 [04:30<00:00,  1.18s/it, eval_loss=0.1040, gpu_mem=10.07 GB]
Valid loss:0.1040
Val metric mean prob: 0.1914
Best metric at: 0.5114 0.4500  0.7217
Cf: [[4635   31]
 [  55   45]]
[32m[I 2023-02-24 16:17:14,226][0m Trial 58 finished with value: 0.5113636363636364 and parameters: {'a1': 0.614396391232961, 'a2': 0.24875109814359703}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.45952459614987257 0.30765833086474537 0.232817072985382



Val: 100%|██████████| 229/229 [04:33<00:00,  1.19s/it, eval_loss=0.0976, gpu_mem=10.07 GB]
Valid loss:0.0976
Val metric mean prob: 0.1992
Best metric at: 0.5081 0.4050  0.7309
Cf: [[4628   38]
 [  53   47]]
[32m[I 2023-02-24 16:21:57,102][0m Trial 59 finished with value: 0.508108108108108 and parameters: {'a1': 0.45952459614987257, 'a2': 0.30765833086474537}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.7934416829474679 0.19506322322002773 0.01149509383250441



Val: 100%|██████████| 229/229 [04:42<00:00,  1.23s/it, eval_loss=0.1081, gpu_mem=10.07 GB]
Valid loss:0.1081
Val metric mean prob: 0.1964
Best metric at: 0.5057 0.4840  0.7168
Cf: [[4636   30]
 [  56   44]]
[32m[I 2023-02-24 16:26:49,666][0m Trial 60 finished with value: 0.5057471264367815 and parameters: {'a1': 0.7934416829474679, 'a2': 0.19506322322002773}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.5538512749613784 0.2795407273746234 0.1666079976639982



Val: 100%|██████████| 229/229 [04:32<00:00,  1.19s/it, eval_loss=0.1018, gpu_mem=10.07 GB]
Valid loss:0.1018
Val metric mean prob: 0.1936
Best metric at: 0.5222 0.4310  0.7315
Cf: [[4633   33]
 [  53   47]]
[32m[I 2023-02-24 16:31:31,462][0m Trial 61 finished with value: 0.5222222222222221 and parameters: {'a1': 0.5538512749613784, 'a2': 0.2795407273746234}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.5777791516019061 0.2566571718498706 0.16556367654822335



Val: 100%|██████████| 229/229 [04:31<00:00,  1.18s/it, eval_loss=0.1027, gpu_mem=10.07 GB]
Valid loss:0.1027
Val metric mean prob: 0.1918
Best metric at: 0.5193 0.4310  0.7314
Cf: [[4632   34]
 [  53   47]]
[32m[I 2023-02-24 16:36:12,492][0m Trial 62 finished with value: 0.5193370165745855 and parameters: {'a1': 0.5777791516019061, 'a2': 0.2566571718498706}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.5269106292378893 0.2953356824802656 0.17775368828184507



Val: 100%|██████████| 229/229 [04:29<00:00,  1.18s/it, eval_loss=0.1007, gpu_mem=10.07 GB]
Valid loss:0.1007
Val metric mean prob: 0.1953
Best metric at: 0.5193 0.4290  0.7314
Cf: [[4632   34]
 [  53   47]]
[32m[I 2023-02-24 16:40:52,045][0m Trial 63 finished with value: 0.5193370165745855 and parameters: {'a1': 0.5269106292378893, 'a2': 0.2953356824802656}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6237691260536115 0.26843229000469254 0.107798583941696



Val: 100%|██████████| 229/229 [04:35<00:00,  1.20s/it, eval_loss=0.1044, gpu_mem=10.07 GB]
Valid loss:0.1044
Val metric mean prob: 0.1928
Best metric at: 0.5114 0.4540  0.7217
Cf: [[4635   31]
 [  55   45]]
[32m[I 2023-02-24 16:45:41,216][0m Trial 64 finished with value: 0.5113636363636364 and parameters: {'a1': 0.6237691260536115, 'a2': 0.26843229000469254}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.7103513015770109 0.23945120568087938 0.050197492742109745



Val: 100%|██████████| 229/229 [04:32<00:00,  1.19s/it, eval_loss=0.1066, gpu_mem=10.07 GB]
Valid loss:0.1066
Val metric mean prob: 0.1938
Best metric at: 0.5233 0.4740  0.7221
Cf: [[4639   27]
 [  55   45]]
[32m[I 2023-02-24 16:50:23,910][0m Trial 65 finished with value: 0.5232558139534884 and parameters: {'a1': 0.7103513015770109, 'a2': 0.23945120568087938}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6951566150602201 0.20605079130927284 0.09879259363050708



Val: 100%|██████████| 229/229 [04:32<00:00,  1.19s/it, eval_loss=0.1062, gpu_mem=10.07 GB]
Valid loss:0.1062
Val metric mean prob: 0.1913
Best metric at: 0.5257 0.4650  0.7269
Cf: [[4637   29]
 [  54   46]]
[32m[I 2023-02-24 16:55:05,856][0m Trial 66 finished with value: 0.5257142857142858 and parameters: {'a1': 0.6951566150602201, 'a2': 0.20605079130927284}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6936792793128596 0.20663391497307698 0.09968680571406346



Val: 100%|██████████| 229/229 [04:32<00:00,  1.19s/it, eval_loss=0.1062, gpu_mem=10.07 GB]
Valid loss:0.1062
Val metric mean prob: 0.1912
Best metric at: 0.5257 0.4640  0.7269
Cf: [[4637   29]
 [  54   46]]
[32m[I 2023-02-24 16:59:48,272][0m Trial 67 finished with value: 0.5257142857142858 and parameters: {'a1': 0.6936792793128596, 'a2': 0.20663391497307698}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.7115773841828898 0.20985256511032346 0.07857005070678669



Val: 100%|██████████| 229/229 [04:35<00:00,  1.20s/it, eval_loss=0.1066, gpu_mem=10.07 GB]
Valid loss:0.1066
Val metric mean prob: 0.1923
Best metric at: 0.5172 0.4700  0.7219
Cf: [[4637   29]
 [  55   45]]
[32m[I 2023-02-24 17:04:33,205][0m Trial 68 finished with value: 0.5172413793103449 and parameters: {'a1': 0.7115773841828898, 'a2': 0.20985256511032346}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.7392915716302465 0.18653772861868845 0.07417069975106505



Val: 100%|██████████| 229/229 [04:37<00:00,  1.21s/it, eval_loss=0.1071, gpu_mem=10.07 GB]
Valid loss:0.1071
Val metric mean prob: 0.1926
Best metric at: 0.5169 0.4650  0.7266
Cf: [[4634   32]
 [  54   46]]
[32m[I 2023-02-24 17:09:20,325][0m Trial 69 finished with value: 0.5168539325842696 and parameters: {'a1': 0.7392915716302465, 'a2': 0.18653772861868845}. Best is trial 43 with value: 0.5257142857142858.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6786585254981048 0.22280913149668852 0.09853234300520669



Val: 100%|██████████| 229/229 [04:32<00:00,  1.19s/it, eval_loss=0.1059, gpu_mem=10.07 GB]
Valid loss:0.1059
Val metric mean prob: 0.1915
Best metric at: 0.5287 0.4630  0.7270
Cf: [[4638   28]
 [  54   46]]
[32m[I 2023-02-24 17:14:05,247][0m Trial 70 finished with value: 0.5287356321839081 and parameters: {'a1': 0.6786585254981048, 'a2': 0.22280913149668852}. Best is trial 70 with value: 0.5287356321839081.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6705983639667862 0.22385102102901 0.10555061500420382



Val: 100%|██████████| 229/229 [04:31<00:00,  1.18s/it, eval_loss=0.1057, gpu_mem=10.07 GB]
Valid loss:0.1057
Val metric mean prob: 0.1913
Best metric at: 0.5257 0.4560  0.7269
Cf: [[4637   29]
 [  54   46]]
[32m[I 2023-02-24 17:18:46,094][0m Trial 71 finished with value: 0.5257142857142858 and parameters: {'a1': 0.6705983639667862, 'a2': 0.22385102102901}. Best is trial 70 with value: 0.5287356321839081.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6734655243014309 0.22113143663022528 0.1054030390683438



Val: 100%|██████████| 229/229 [04:33<00:00,  1.19s/it, eval_loss=0.1057, gpu_mem=10.07 GB]
Valid loss:0.1057
Val metric mean prob: 0.1912
Best metric at: 0.5257 0.4570  0.7269
Cf: [[4637   29]
 [  54   46]]
[32m[I 2023-02-24 17:23:29,262][0m Trial 72 finished with value: 0.5257142857142858 and parameters: {'a1': 0.6734655243014309, 'a2': 0.22113143663022528}. Best is trial 70 with value: 0.5287356321839081.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6860347518671207 0.22612909316086924 0.08783615497201006



Val: 100%|██████████| 229/229 [04:38<00:00,  1.21s/it, eval_loss=0.1061, gpu_mem=10.07 GB]
Valid loss:0.1061
Val metric mean prob: 0.1920
Best metric at: 0.5257 0.4620  0.7269
Cf: [[4637   29]
 [  54   46]]
[32m[I 2023-02-24 17:28:16,982][0m Trial 73 finished with value: 0.5257142857142858 and parameters: {'a1': 0.6860347518671207, 'a2': 0.22612909316086924}. Best is trial 70 with value: 0.5287356321839081.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.7632382441318543 0.21338728641101648 0.02337446945712926



Val: 100%|██████████| 229/229 [04:30<00:00,  1.18s/it, eval_loss=0.1076, gpu_mem=10.07 GB]
Valid loss:0.1076
Val metric mean prob: 0.1953
Best metric at: 0.5140 0.4700  0.7265
Cf: [[4633   33]
 [  54   46]]
[32m[I 2023-02-24 17:32:56,927][0m Trial 74 finished with value: 0.5139664804469275 and parameters: {'a1': 0.7632382441318543, 'a2': 0.21338728641101648}. Best is trial 70 with value: 0.5287356321839081.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6695965909361217 0.2211438645474582 0.10925954451642012



Val: 100%|██████████| 229/229 [04:31<00:00,  1.18s/it, eval_loss=0.1056, gpu_mem=10.07 GB]
Valid loss:0.1056
Val metric mean prob: 0.1911
Best metric at: 0.5257 0.4560  0.7269
Cf: [[4637   29]
 [  54   46]]
[32m[I 2023-02-24 17:37:39,087][0m Trial 75 finished with value: 0.5257142857142858 and parameters: {'a1': 0.6695965909361217, 'a2': 0.2211438645474582}. Best is trial 70 with value: 0.5287356321839081.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6471893751129021 0.2315300063824695 0.1212806185046284



Val: 100%|██████████| 229/229 [04:31<00:00,  1.18s/it, eval_loss=0.1050, gpu_mem=10.07 GB]
Valid loss:0.1050
Val metric mean prob: 0.1910
Best metric at: 0.5202 0.4570  0.7220
Cf: [[4638   28]
 [  55   45]]
[32m[I 2023-02-24 17:42:19,887][0m Trial 76 finished with value: 0.5202312138728323 and parameters: {'a1': 0.6471893751129021, 'a2': 0.2315300063824695}. Best is trial 70 with value: 0.5287356321839081.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.7303381570149872 0.20543183755511837 0.06423000542989446



Val: 100%|██████████| 229/229 [04:32<00:00,  1.19s/it, eval_loss=0.1070, gpu_mem=10.07 GB]
Valid loss:0.1070
Val metric mean prob: 0.1930
Best metric at: 0.5169 0.4660  0.7266
Cf: [[4634   32]
 [  54   46]]
[32m[I 2023-02-24 17:47:01,673][0m Trial 77 finished with value: 0.5168539325842696 and parameters: {'a1': 0.7303381570149872, 'a2': 0.20543183755511837}. Best is trial 70 with value: 0.5287356321839081.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.8165988243392046 0.17568207630538432 0.007719099355411069



Val: 100%|██████████| 229/229 [04:33<00:00,  1.19s/it, eval_loss=0.1085, gpu_mem=10.07 GB]
Valid loss:0.1085
Val metric mean prob: 0.1972
Best metric at: 0.5116 0.4930  0.7170
Cf: [[4638   28]
 [  56   44]]
[32m[I 2023-02-24 17:51:44,420][0m Trial 78 finished with value: 0.5116279069767442 and parameters: {'a1': 0.8165988243392046, 'a2': 0.17568207630538432}. Best is trial 70 with value: 0.5287356321839081.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.7542348914221061 0.2129262811914434 0.0328388273864505



Val: 100%|██████████| 229/229 [04:35<00:00,  1.20s/it, eval_loss=0.1074, gpu_mem=10.07 GB]
Valid loss:0.1074
Val metric mean prob: 0.1948
Best metric at: 0.5137 0.4660  0.7311
Cf: [[4630   36]
 [  53   47]]
[32m[I 2023-02-24 17:56:29,116][0m Trial 79 finished with value: 0.5136612021857923 and parameters: {'a1': 0.7542348914221061, 'a2': 0.2129262811914434}. Best is trial 70 with value: 0.5287356321839081.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6830380634256324 0.238054124167146 0.07890781240722156



Val: 100%|██████████| 229/229 [04:30<00:00,  1.18s/it, eval_loss=0.1060, gpu_mem=10.07 GB]
Valid loss:0.1060
Val metric mean prob: 0.1925
Best metric at: 0.5257 0.4630  0.7269
Cf: [[4637   29]
 [  54   46]]
[32m[I 2023-02-24 18:01:09,645][0m Trial 80 finished with value: 0.5257142857142858 and parameters: {'a1': 0.6830380634256324, 'a2': 0.238054124167146}. Best is trial 70 with value: 0.5287356321839081.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6833108468963346 0.22123699111803405 0.09545216198563136



Val: 100%|██████████| 229/229 [04:34<00:00,  1.20s/it, eval_loss=0.1060, gpu_mem=10.07 GB]
Valid loss:0.1060
Val metric mean prob: 0.1916
Best metric at: 0.5287 0.4640  0.7270
Cf: [[4638   28]
 [  54   46]]
[32m[I 2023-02-24 18:05:56,494][0m Trial 81 finished with value: 0.5287356321839081 and parameters: {'a1': 0.6833108468963346, 'a2': 0.22123699111803405}. Best is trial 70 with value: 0.5287356321839081.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.641441144874789 0.22315810154567028 0.13540075357954073



Val: 100%|██████████| 229/229 [04:30<00:00,  1.18s/it, eval_loss=0.1048, gpu_mem=10.07 GB]
Valid loss:0.1048
Val metric mean prob: 0.1903
Best metric at: 0.5116 0.4580  0.7170
Cf: [[4638   28]
 [  56   44]]
[32m[I 2023-02-24 18:10:37,000][0m Trial 82 finished with value: 0.5116279069767442 and parameters: {'a1': 0.641441144874789, 'a2': 0.22315810154567028}. Best is trial 70 with value: 0.5287356321839081.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.6131105903689924 0.2006865095373842 0.1862029000936234



Val: 100%|██████████| 229/229 [04:30<00:00,  1.18s/it, eval_loss=0.1038, gpu_mem=10.07 GB]
Valid loss:0.1038
Val metric mean prob: 0.1883
Best metric at: 0.5111 0.4330  0.7264
Cf: [[4632   34]
 [  54   46]]
[32m[I 2023-02-24 18:15:16,936][0m Trial 83 finished with value: 0.5111111111111112 and parameters: {'a1': 0.6131105903689924, 'a2': 0.2006865095373842}. Best is trial 70 with value: 0.5287356321839081.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.67403053499327 0.22792082840567637 0.09804863660105359



Val: 100%|██████████| 229/229 [04:30<00:00,  1.18s/it, eval_loss=0.1058, gpu_mem=10.07 GB]
Valid loss:0.1058
Val metric mean prob: 0.1916
Best metric at: 0.5287 0.4620  0.7270
Cf: [[4638   28]
 [  54   46]]
[32m[I 2023-02-24 18:19:59,895][0m Trial 84 finished with value: 0.5287356321839081 and parameters: {'a1': 0.67403053499327, 'a2': 0.22792082840567637}. Best is trial 70 with value: 0.5287356321839081.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.7174705750469376 0.22753387083788884 0.054995554115173595



Val: 100%|██████████| 229/229 [04:31<00:00,  1.18s/it, eval_loss=0.1068, gpu_mem=10.07 GB]
Valid loss:0.1068
Val metric mean prob: 0.1935
Best metric at: 0.5202 0.4750  0.7220
Cf: [[4638   28]
 [  55   45]]
[32m[I 2023-02-24 18:24:40,773][0m Trial 85 finished with value: 0.5202312138728323 and parameters: {'a1': 0.7174705750469376, 'a2': 0.22753387083788884}. Best is trial 70 with value: 0.5287356321839081.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.7767451641304511 0.20863445211527876 0.014620383754270105



Val: 100%|██████████| 229/229 [04:34<00:00,  1.20s/it, eval_loss=0.1078, gpu_mem=10.07 GB]
Valid loss:0.1078
Val metric mean prob: 0.1959
Best metric at: 0.5085 0.4790  0.7216
Cf: [[4634   32]
 [  55   45]]
[32m[I 2023-02-24 18:29:25,681][0m Trial 86 finished with value: 0.5084745762711864 and parameters: {'a1': 0.7767451641304511, 'a2': 0.20863445211527876}. Best is trial 70 with value: 0.5287356321839081.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.658738691333155 0.24011797557725215 0.10114333308959286



Val: 100%|██████████| 229/229 [04:29<00:00,  1.18s/it, eval_loss=0.1054, gpu_mem=10.07 GB]
Valid loss:0.1054
Val metric mean prob: 0.1918
Best metric at: 0.5227 0.4550  0.7268
Cf: [[4636   30]
 [  54   46]]
[32m[I 2023-02-24 18:34:04,905][0m Trial 87 finished with value: 0.5227272727272727 and parameters: {'a1': 0.658738691333155, 'a2': 0.24011797557725215}. Best is trial 70 with value: 0.5287356321839081.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.7358701384694011 0.21655081307434443 0.04757904845625449



Val: 100%|██████████| 229/229 [04:30<00:00,  1.18s/it, eval_loss=0.1071, gpu_mem=10.07 GB]
Valid loss:0.1071
Val metric mean prob: 0.1939
Best metric at: 0.5172 0.4790  0.7219
Cf: [[4637   29]
 [  55   45]]
[32m[I 2023-02-24 18:38:45,235][0m Trial 88 finished with value: 0.5172413793103449 and parameters: {'a1': 0.7358701384694011, 'a2': 0.21655081307434443}. Best is trial 70 with value: 0.5287356321839081.[0m


none:  fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth
noob fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth
0.7047040731686101 0.2335462007025167 0.06174972612887325



Val:  70%|███████   | 161/229 [03:29<01:23,  1.23s/it, eval_loss=0.1046, gpu_mem=10.07 GB]

In [None]:
def pfbeta_np(labels, preds, beta=1):
    preds = preds.clip(0, 1)
    y_true_count = labels.sum()
    ctp = preds[labels==1].sum()
    cfp = preds[labels==0].sum()
    beta_squared = beta * beta
    c_precision = ctp / (ctp + cfp)
    c_recall = ctp / y_true_count
    if (c_precision > 0 and c_recall > 0):
        result = (1 + beta_squared) * (c_precision * c_recall) / (beta_squared * c_precision + c_recall)
        return result
    else:
        return 0.0

: 

In [None]:
def pfbeta_np(labels, preds, beta=1):
    preds = preds.clip(0, 1)
    y_true_count = labels.sum()
    ctp = preds[labels==1].sum()
    cfp = preds[labels==0].sum()
    beta_squared = beta * beta
    c_precision = ctp / (ctp + cfp)
    c_recall = ctp / y_true_count
    if (c_precision > 0 and c_recall > 0):
        result = (1 + beta_squared) * (c_precision * c_recall) / (beta_squared * c_precision + c_recall)
        return result
    else:
        return 0.0
# fold=3
# valid_df = df[df['fold']==fold].reset_index(drop=True)
# valid_dataset = BreastDataset(valid_df, transforms=data_transforms['valid'])

valid_loader = DataLoader(valid_dataset, batch_size = CFG.valid_bs, 
                                num_workers=1, shuffle=False, drop_last=False)
set_seed(1)
out_file = 'swa_model_fold2_5.pth' 
iteration = [
    'fold2/tf_efficientnetv2_b2_fold_2_model_epoch_3_0.4670_0.406.pth',
    'fold2/tf_efficientnetv2_b2_fold_2_model_epoch_4_0.4746_0.314.pth',
    'fold2/tf_efficientnetv2_b2_fold_2_model_epoch_9_0.4681_0.319.pth',
    
    'fold2/tf_efficientnetv2_b2_fold_2_model_epoch_4_0.4585_0.236.pth',
    'fold2/tf_efficientnetv2_b2_fold_2_model_epoch_7_0.4557_0.241.pth',
    'fold2/tf_efficientnetv2_b2_fold_2_model_epoch_10_0.4550_0.245.pth',
]

criterion = nn.CrossEntropyLoss().to(CFG.device)
best_metric = 0
torch.cuda.empty_cache()
def objective(trial):
#     a2 = 0.12003546043452194 
#     a3 = 0.8649578775769542
#     a1 = 0.020317850755860567 
#     a2 = 0.1293785181217534 
#     a3 = 0.850303631122386
    # a1 = 0.2
    # a2 = 0.2    
    # a3 = 0.2
    # a4 = 0.2
    # a5 = 0.2
    # a1 = 1
    a1 = trial.suggest_uniform('a1', 0.001, 0.99)
    a2 = trial.suggest_uniform('a2', 0.0009, 1-a1-0.001)
    # a3 = 1-a1-a2
    a3 = trial.suggest_uniform('a3', 0.0009, 1-a1-a2-0.001)
    # a4 = 1-a1-a2-a3
    a4 = trial.suggest_loguniform('a4', 0.0009, 1-a1-a2-a3-0.001)
    # a5 = 1-a1-a2-a3-a4
    a5 = trial.suggest_loguniform('a5', 0.00009, 1-a1-a2-a3-a4-0.001)
    a6 = 1-a1-a2-a3-a4-a5
    # a4 = 1-a1-a2-a3
    # a1 = 0.4700450486328235 
    # a2 = 0.23862687145742947 
    # a3 = 0.2913280799097471
    state_dict = None
    for i in iteration:
        f = i
        # print(f)
        f = torch.load(f, map_location=lambda storage, loc: storage)
        if state_dict is None:
            print("none: ", i)
            state_dict = f['state_dict']
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = f['state_dict'][k]*a1
        elif i=='fold2/tf_efficientnetv2_b2_fold_2_model_epoch_4_0.4746_0.314.pth': 
            print("hehe", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a2*f['state_dict'][k]
        elif i=='fold2/tf_efficientnetv2_b2_fold_2_model_epoch_9_0.4681_0.319.pth': 
            print("hehe", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a3*f['state_dict'][k]
        elif i=='fold2/tf_efficientnetv2_b2_fold_2_model_epoch_4_0.4585_0.236.pth':
            print("noob", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a4*f['state_dict'][k]
        elif i=='fold2/tf_efficientnetv2_b2_fold_2_model_epoch_7_0.4557_0.241.pth':
            print("noobie", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a5*f['state_dict'][k]
        elif i=='fold2/tf_efficientnetv2_b2_fold_2_model_epoch_10_0.4550_0.245.pth':
            print("noobie", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a6*f['state_dict'][k]
    print(a1, a2, a3, a4, a5, a6)
    # for k in key:
    #     state_dict[k] = state_dict[k] / len(iteration)
    print('')

    # print(out_file)
    torch.save({'state_dict': state_dict}, out_file)

    model = Model(model_name=CFG.model_name).to(CFG.device)
    checkpoint = torch.load("swa_model_fold2_5.pth")
    model.load_state_dict(checkpoint['state_dict'])
#     model = nn.DataParallel(model)

    loss_valid, valid_preds, _ = valid_fn_two(valid_loader, model, criterion, CFG.device)
    valid_preds = valid_preds[:, 1]
    valid_df['prediction_id'] = valid_df['patient_id'].astype(str) + '_' + valid_df['laterality'].astype(str)
    valid_preds = np.array(valid_preds).flatten()
    
    valid_df['raw_pred'] = valid_preds
    LOGGER.info(f"Valid loss:{loss_valid:.4f}")
    grp_df = valid_df.groupby('prediction_id')['raw_pred', 'cancer'].mean()
    grp_df['cancer'] = grp_df['cancer'].astype(np.int)
    valid_labels_mean = grp_df['cancer'].values
    valid_preds_mean = grp_df['raw_pred'].values
    # print(valid_labels[:5], valid_preds_mean[:5])
    val_metric_mean = pfbeta(valid_labels_mean, valid_preds_mean)
    LOGGER.info(f"Val metric mean prob: {val_metric_mean:.4f}")
    best_metric_mean_at_epoch = 0
    best_metric = 0
    
    best_threshold_mean = 0
    best_auc = 0
    best_cf = None
    for i in np.arange(0.001, 0.599, 0.001):
        valid_argmax = (valid_preds_mean>i).astype(np.int32)
        val_metric = pfbeta_np(valid_labels_mean, valid_argmax)
        val_acc = accuracy_score(valid_labels_mean, valid_argmax)
        val_f1 = f1_score(valid_labels_mean, valid_argmax)
        val_auc = roc_auc_score(valid_labels_mean, valid_argmax)
        cf = confusion_matrix(valid_labels_mean, valid_argmax)
        if val_metric> best_metric:
            best_metric = val_metric
            # best_metric_mean_at_epoch = val_metric
            best_threshold_mean = i
            best_auc = val_auc
            best_cf = cf
    if best_metric>0.52:
        state = {'state_dict': model.state_dict()}
        path = f'swa_{CFG.model_name}_fold_{fold}_model_{best_metric:.4f}_{best_threshold_mean:.3f}.pth'
        torch.save(state, path)
    
    LOGGER.info(f"Best metric at: {best_metric:.4f} {best_threshold_mean:.4f}  {best_auc:.4f}")
    LOGGER.info(f"Cf: {best_cf}")
    torch.cuda.empty_cache()
    return best_metric

study = optuna.create_study(direction='maximize', sampler = TPESampler(seed=777))
study.optimize(func=objective, n_trials=200)
study.best_params

: 

In [None]:

set_seed(1)
out_file = 'swa_model_fold0_5.pth' 
iteration = [
    'fold0/tf_efficientnetv2_b2_fold_0_model_epoch_3_0.4945_0.488.pth',
    'fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth',
    'fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth',
    
    'fold0/tf_efficientnetv2_b2_fold_0_model_epoch_10_0.4569_0.259.pth',
    'fold0/tf_efficientnetv2_b2_fold_0_model_epoch_6_0.4520_0.128.pth',
    'fold0/tf_efficientnetv2_b2_fold_0_model_epoch_7_0.4510_0.266.pth',
    # 'fold0/tf_efficientnetv2_b2_fold_0_model_epoch_8_0.4403_0.415.pth',
#     'fold0/tf_efficientnetv2_b2_fold_0_model_epoch_11_0.4387_0.436.pth'
]

criterion = nn.CrossEntropyLoss().to(CFG.device)
best_metric = 0
torch.cuda.empty_cache()
def objective(trial):
#     a1 = 0.036839841333967636 
#     a2 = 0.6490629183820655
#     a3 = 0.3140972402839668
#     a2 = 0.47142151346976024 
#     a3 = 0.3596277792186039
#     a1 = trial.suggest_uniform('a1', 0.01, 0.99)
#     a2 = 1-a1
    a1 = trial.suggest_uniform('a1', 0.001, 0.99)
    a2 = trial.suggest_uniform('a2', 0.0009, 1-a1-0.001)
    a3 = trial.suggest_uniform('a3', 0.00009, 1-a1-a2-0.001)
    # a4 = 1-a1-a2-a3
    a4 = trial.suggest_loguniform('a4', 0.000009, 1-a1-a2-a3-0.001)
    # a5 = 1-a1-a2-a3-a4
    a5 = trial.suggest_loguniform('a5', 0.0000009, 1-a1-a2-a3-a4-0.001)
    a6 = 1-a1-a2-a3-a4-a5
#     a5 = trial.suggest_loguniform('a5', 0.000009, 1-a1-a2-a3-a4-0.001)
#     a6 = trial.suggest_loguniform('a6', 0.0000009, 1-a1-a2-a3-a4-a5-0.001)
#     a7 = 1-a1-a2-a3-a4-a5-a6
    state_dict = None
    for i in iteration:
        f = i
        f = torch.load(f, map_location=lambda storage, loc: storage)
        if state_dict is None:
            print("none: ", i)
            state_dict = f['state_dict']
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = f['state_dict'][k]*a1
        elif i=='fold0/tf_efficientnetv2_b2_fold_0_model_epoch_5_0.4757_0.230.pth': 
            print("hehe", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a2*f['state_dict'][k]
        elif i=='fold0/tf_efficientnetv2_b2_fold_0_model_epoch_9_0.4713_0.430.pth':
            print("noob", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a3*f['state_dict'][k]
        elif i=='fold0/tf_efficientnetv2_b2_fold_0_model_epoch_10_0.4569_0.259.pth':
            print("noob", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a4*f['state_dict'][k]
                
        elif i=='fold0/tf_efficientnetv2_b2_fold_0_model_epoch_6_0.4520_0.128.pth':
            print("noob", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a5*f['state_dict'][k]
        elif i=='fold0/tf_efficientnetv2_b2_fold_0_model_epoch_7_0.4510_0.266.pth':
            print("noob", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a6*f['state_dict'][k]
#         elif i=='fold0/tf_efficientnetv2_b2_fold_0_model_epoch_11_0.4387_0.436.pth':
#             print("noob", i)
#             key = list(f['state_dict'].keys())
#             for k in key:
#                 state_dict[k] = state_dict[k] + a5*f['state_dict'][k]
    print(a1, a2, a3, a4, a5)
    # for k in key:
    #     state_dict[k] = state_dict[k] / len(iteration)
    print('')

    # print(out_file)
    torch.save({'state_dict': state_dict}, out_file)

    model = Model(model_name=CFG.model_name).to(CFG.device)
    checkpoint = torch.load("swa_model_fold0_5.pth")
    model.load_state_dict(checkpoint['state_dict'])
#     model = nn.DataParallel(model)

    loss_valid, valid_preds, _ = valid_fn_two(valid_loader, model, criterion, CFG.device)
    valid_preds = valid_preds[:, 1]
    valid_df['prediction_id'] = valid_df['patient_id'].astype(str) + '_' + valid_df['laterality'].astype(str)
    valid_preds = np.array(valid_preds).flatten()
    
    valid_df['raw_pred'] = valid_preds
    LOGGER.info(f"Valid loss:{loss_valid:.4f}")
    grp_df = valid_df.groupby('prediction_id')['raw_pred', 'cancer'].mean()
    grp_df['cancer'] = grp_df['cancer'].astype(np.int)
    valid_labels_mean = grp_df['cancer'].values
    valid_preds_mean = grp_df['raw_pred'].values
    # print(valid_labels[:5], valid_preds_mean[:5])
    val_metric_mean = pfbeta(valid_labels_mean, valid_preds_mean)
    LOGGER.info(f"Val metric mean prob: {val_metric_mean:.4f}")
    best_metric_mean_at_epoch = 0
    best_metric = 0
    
    best_threshold_mean = 0
    best_auc = 0
    best_cf = None
    for i in np.arange(0.001, 0.599, 0.001):
        valid_argmax = (valid_preds_mean>i).astype(np.int32)
        val_metric = pfbeta_np(valid_labels_mean, valid_argmax)
        val_acc = accuracy_score(valid_labels_mean, valid_argmax)
        val_f1 = f1_score(valid_labels_mean, valid_argmax)
        val_auc = roc_auc_score(valid_labels_mean, valid_argmax)
        cf = confusion_matrix(valid_labels_mean, valid_argmax)
        if val_metric> best_metric:
            best_metric = val_metric
            # best_metric_mean_at_epoch = val_metric
            best_threshold_mean = i
            best_auc = val_auc
            best_cf = cf
    if best_metric>0.52:
        state = {'state_dict': model.state_dict()}
        path = f'swa_{CFG.model_name}_fold_{fold}_model_{best_metric:.4f}_{best_threshold_mean:.4f}.pth'
        torch.save(state, path)
    
    LOGGER.info(f"Best metric at: {best_metric:.4f} {best_threshold_mean:.4f}  {best_auc:.4f}")
    LOGGER.info(f"Cf: {best_cf}")
    return best_metric

study = optuna.create_study(direction='maximize', sampler = TPESampler(seed=777))
study.optimize(func=objective, n_trials=100)
study.best_params
# # 0.5563409550491111 0.4436590449508889 fold 0
# # 0.12634002523631388 0.8351954705276587 0.03846450423602743 0.5393 
# # 0.583301614081906 0.3673525472043472 0.04934583871374687 fold 2 0.50
# # 0.1689507073116359 0.47142151346976024 0.3596277792186039 fold 2 0.5055 0.5055 0.3670  0.7261

: 

In [None]:
set_seed(1)
out_file = 'swa_model_fold1_5.pth' 
iteration = [
    'fold1/tf_efficientnetv2_b2_fold_1_model_epoch_7_0.4578_0.382.pth',
    'fold1/tf_efficientnetv2_b2_fold_1_model_epoch_8_0.4569_0.264.pth',
    'fold1/tf_efficientnetv2_b2_fold_1_model_epoch_6_0.4530_0.274.pth',
    'fold1/tf_efficientnetv2_b2_fold_1_model_epoch_5_0.4444_0.409.pth',
    'fold1/tf_efficientnetv2_b2_fold_1_model_epoch_6_0.4432_0.319.pth',
    'fold1/tf_efficientnetv2_b2_fold_1_model_epoch_7_0.4430_0.474.pth',
    'fold1/tf_efficientnetv2_b2_fold_1_model_epoch_6_0.4403_0.422.pth',
    # 'fold1/tf_efficientnetv2_b2_fold_1_model_epoch_6_0.4403_0.422.pth',
    # 'fold1/tf_efficientnetv2_b2_fold_1_model_epoch_5_0.4393_0.278.pth',
]
def pfbeta_np(labels, preds, beta=1):
    preds = preds.clip(0, 1)
    y_true_count = labels.sum()
    ctp = preds[labels==1].sum()
    cfp = preds[labels==0].sum()
    beta_squared = beta * beta
    c_precision = ctp / (ctp + cfp)
    c_recall = ctp / y_true_count
    if (c_precision > 0 and c_recall > 0):
        result = (1 + beta_squared) * (c_precision * c_recall) / (beta_squared * c_precision + c_recall)
        return result
    else:
        return 0.0
criterion = nn.CrossEntropyLoss().to(CFG.device)
best_metric = 0
torch.cuda.empty_cache()
def objective(trial):
#     a1 = 0.036839841333967636 
#     a2 = 0.6490629183820655
#     a3 = 0.3140972402839668
#     a2 = 0.47142151346976024 
#     a3 = 0.3596277792186039
#     a1 = trial.suggest_uniform('a1', 0.01, 0.99)
#     a2 = 1-a1
    a1 = trial.suggest_uniform('a1', 0.001, 0.99)
    a2 = trial.suggest_uniform('a2', 0.0009, 1-a1-0.001)
    a3 = trial.suggest_uniform('a3', 0.0009, 1-a1-a2-0.001)
    a4 = trial.suggest_loguniform('a4', 0.0009, 1-a1-a2-a3-0.001)
    a5 = trial.suggest_loguniform('a5', 0.0009, 1-a1-a2-a3-0.001)
    a6 = trial.suggest_loguniform('a6', 0.0009, 1-a1-a2-a3-0.001)
    a7 = 1-a1-a2-a3-a4-a5-a6
    # a1 = trial.suggest_uniform('a1', 0.001, 0.99)
    # a2 = trial.suggest_uniform('a2', 0.0009, 1-a1)
    # a3 = trial.suggest_uniform('a3', 0.0007, 1-a1-a2)
    # a4 = trial.suggest_loguniform('a4', 0.0005, 1-a1-a2-a3)
    # a5 = trial.suggest_loguniform('a5', 0.00003, 1-a1-a2-a3-a4)
    # a6 = trial.suggest_loguniform('a6', 0.00009, 1-a1-a2-a3-a4-a5)
    # a7 = 1-a1-a2-a3-a4-a5-a6
    state_dict = None
    for i in iteration:
        f = i
        f = torch.load(f, map_location=lambda storage, loc: storage)
        if state_dict is None:
            print("none: ", i)
            state_dict = f['state_dict']
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = f['state_dict'][k]*a1
        elif i== 'fold1/tf_efficientnetv2_b2_fold_1_model_epoch_8_0.4569_0.264.pth': 
            print("hehe", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a2*f['state_dict'][k]
        elif i=='fold1/tf_efficientnetv2_b2_fold_1_model_epoch_6_0.4530_0.274.pth':
            print("noob", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a3*f['state_dict'][k]
        elif i=='fold1/tf_efficientnetv2_b2_fold_1_model_epoch_5_0.4444_0.409.pth':
            print("noob", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a4*f['state_dict'][k]
                
        elif i=='fold1/tf_efficientnetv2_b2_fold_1_model_epoch_6_0.4432_0.319.pth':
            print("noob", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a5*f['state_dict'][k]
                
        elif i=='fold1/tf_efficientnetv2_b2_fold_1_model_epoch_7_0.4430_0.474.pth': 
            print("hehe", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a6*f['state_dict'][k]
        elif i=='fold1/tf_efficientnetv2_b2_fold_1_model_epoch_6_0.4403_0.422.pth': 
            print("hehe", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a7*f['state_dict'][k]        
        # elif i=='fold1/tf_efficientnetv2_b2_fold_1_model_epoch_5_0.4393_0.278.pth': 
        #     print("hehe", i)
        #     key = list(f['state_dict'].keys())
        #     for k in key:
        #         state_dict[k] = state_dict[k] + a7*f['state_dict'][k]
    print(a1, a2, a3, a4, a5, a6, a7)
    # for k in key:
    #     state_dict[k] = state_dict[k] / len(iteration)
    print('')

    # print(out_file)
    torch.save({'state_dict': state_dict}, out_file)

    model = Model(model_name=CFG.model_name).to(CFG.device)
    checkpoint = torch.load("swa_model_fold1_5.pth")
    model.load_state_dict(checkpoint['state_dict'])
#     model = nn.DataParallel(model)

    loss_valid, valid_preds, _ = valid_fn_two(valid_loader, model, criterion, CFG.device)
    valid_preds = valid_preds[:, 1]
    valid_df['prediction_id'] = valid_df['patient_id'].astype(str) + '_' + valid_df['laterality'].astype(str)
    valid_preds = np.array(valid_preds).flatten()
    
    valid_df['raw_pred'] = valid_preds
    LOGGER.info(f"Valid loss:{loss_valid:.4f}")
    grp_df = valid_df.groupby('prediction_id')['raw_pred', 'cancer'].mean()
    grp_df['cancer'] = grp_df['cancer'].astype(np.int)
    valid_labels_mean = grp_df['cancer'].values
    valid_preds_mean = grp_df['raw_pred'].values
    # print(valid_labels[:5], valid_preds_mean[:5])
    val_metric_mean = pfbeta(valid_labels_mean, valid_preds_mean)
    LOGGER.info(f"Val metric mean prob: {val_metric_mean:.4f}")
    best_metric_mean_at_epoch = 0
    best_metric = 0
    
    best_threshold_mean = 0
    best_auc = 0
    best_cf = None
    for i in np.arange(0.001, 0.599, 0.001):
        valid_argmax = (valid_preds_mean>i).astype(np.int32)
        val_metric = pfbeta_np(valid_labels_mean, valid_argmax)
        val_acc = accuracy_score(valid_labels_mean, valid_argmax)
        val_f1 = f1_score(valid_labels_mean, valid_argmax)
        val_auc = roc_auc_score(valid_labels_mean, valid_argmax)
        cf = confusion_matrix(valid_labels_mean, valid_argmax)
        if val_metric> best_metric:
            best_metric = val_metric
            # best_metric_mean_at_epoch = val_metric
            best_threshold_mean = i
            best_auc = val_auc
            best_cf = cf
    if best_metric>0.505:
        state = {'state_dict': model.state_dict()}
        path = f'swa_{CFG.model_name}_fold_{fold}_model_{best_metric:.4f}_{best_threshold_mean:.4f}.pth'
        torch.save(state, path)
    
    LOGGER.info(f"Best metric at: {best_metric:.4f} {best_threshold_mean:.4f}  {best_auc:.4f}")
    LOGGER.info(f"Cf: {best_cf}")
    return best_metric

study = optuna.create_study(direction='maximize', sampler = TPESampler(seed=1))
study.optimize(func=objective, n_trials=150)
study.best_params

: 

In [None]:
def pfbeta_np(labels, preds, beta=1):
    preds = preds.clip(0, 1)
    y_true_count = labels.sum()
    ctp = preds[labels==1].sum()
    cfp = preds[labels==0].sum()
    beta_squared = beta * beta
    c_precision = ctp / (ctp + cfp)
    c_recall = ctp / y_true_count
    if (c_precision > 0 and c_recall > 0):
        result = (1 + beta_squared) * (c_precision * c_recall) / (beta_squared * c_precision + c_recall)
        return result
    else:
        return 0.0
# fold=3
# valid_df = df[df['fold']==fold].reset_index(drop=True)
# valid_dataset = BreastDataset(valid_df, transforms=data_transforms['valid'])

valid_loader = DataLoader(valid_dataset, batch_size = CFG.valid_bs, 
                                num_workers=1, shuffle=False, drop_last=False)
set_seed(1)
out_file = 'swa_model_fold3_5.pth' 
iteration = [
    # 'fold1/tf_efficientnetv2_b2_fold_1_model_epoch_8_0.4625_0.367.pth',
    # 'fold1/tf_efficientnetv2_b2_fold_1_model_epoch_10_0.4766_0.251.pth',
    # 'fold1/tf_efficientnetv2_b2_fold_1_model_epoch_12_0.4824_0.297.pth',
    # 'fold1/tf_efficientnetv2_b2_fold_1_model_epoch_13_0.4771_0.241.pth',
    # 'fold1/tf_efficientnetv2_b2_fold_1_model_epoch_15_0.4878_0.242.pth'
    'foldsensemble/tf_efficientnetv2_b2_fold_3_model_epoch_6_0.4648_0.444.pth',
    'fold3/tf_efficientnetv2_b2_fold_3_model_epoch_7_0.4545_0.354.pth',
    'fold3/tf_efficientnetv2_b2_fold_3_model_epoch_2_0.4528_0.320.pth',
    'foldsensemble/tf_efficientnetv2_b2_fold_3_model_epoch_8_0.4471_0.371.pth',
    'fold3/tf_efficientnetv2_b2_fold_3_model_epoch_4_0.4379_0.392.pth',
    'fold3/tf_efficientnetv2_b2_fold_3_model_epoch_3_0.4317_0.364.pth',
    # 'fold3/tf_efficientnetv2_b2_fold_3_model_epoch_3_0.4224_0.288.pth',
    # 'foldsensemble/tf_efficientnetv2_b2_fold_3_model_epoch_7_0.4192_0.270.pth',
    # 'foldsensemble/tf_efficientnetv2_b2_fold_3_model_epoch_4_0.4103_0.343.pth',
    # 'fold3/tf_efficientnetv2_b2_fold_3_model_epoch_2_0.4528_0.320.pth',
    # 'fold3/tf_efficientnetv2_b2_fold_3_model_epoch_3_0.4317_0.364.pth',
    # 'fold3/tf_efficientnetv2_b2_fold_3_model_epoch_4_0.4311_0.361.pth',
    # 'fold3/tf_efficientnetv2_b2_fold_3_model_epoch_6_0.4304_0.332.pth'
]

criterion = nn.CrossEntropyLoss().to(CFG.device)
best_metric = 0
torch.cuda.empty_cache()
def objective(trial):
#     a2 = 0.12003546043452194 
#     a3 = 0.8649578775769542
#     a1 = 0.020317850755860567 
#     a2 = 0.1293785181217534 
#     a3 = 0.850303631122386
    # a1 = 0.2
    # a2 = 0.2    
    # a3 = 0.2
    # a4 = 0.2
    # a5 = 0.2
    # a1 = 1
    a1 = trial.suggest_uniform('a1', 0.001, 0.99)
    a2 = trial.suggest_uniform('a2', 0.0009, 1-a1-0.001)
    a3 = trial.suggest_loguniform('a3', 0.00009, 1-a1-a2-0.001)
    # a4 = 1-a1-a2-a3
    a4 = trial.suggest_loguniform('a4', 0.000009, 1-a1-a2-a3-0.001)
    # a5 = 1-a1-a2-a3-a4
    a5 = trial.suggest_loguniform('a5', 0.0000009, 1-a1-a2-a3-a4-0.001)
    a6 = 1-a1-a2-a3-a4-a5
    # a4 = 1-a1-a2-a3
    # a1 = 0.4700450486328235 
    # a2 = 0.23862687145742947 
    # a3 = 0.2913280799097471
    state_dict = None
    for i in iteration:
        f = i
        # print(f)
        f = torch.load(f, map_location=lambda storage, loc: storage)
        if state_dict is None:
            print("none: ", i)
            state_dict = f['state_dict']
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = f['state_dict'][k]*a1
        elif i=='fold3/tf_efficientnetv2_b2_fold_3_model_epoch_7_0.4545_0.354.pth': 
            print("hehe", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a2*f['state_dict'][k]
        elif i=='fold3/tf_efficientnetv2_b2_fold_3_model_epoch_2_0.4528_0.320.pth': 
            print("hehe", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a3*f['state_dict'][k]
        elif i=='foldsensemble/tf_efficientnetv2_b2_fold_3_model_epoch_8_0.4471_0.371.pth':
            print("noob", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a4*f['state_dict'][k]
        elif i=='fold3/tf_efficientnetv2_b2_fold_3_model_epoch_4_0.4379_0.392.pth':
            print("noobie", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a5*f['state_dict'][k]
        elif i=='fold3/tf_efficientnetv2_b2_fold_3_model_epoch_3_0.4317_0.364.pth':
            print("noobie", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a6*f['state_dict'][k]
    print(a1, a2, a3, a4, a5, a6)
    # for k in key:
    #     state_dict[k] = state_dict[k] / len(iteration)
    print('')

    # print(out_file)
    torch.save({'state_dict': state_dict}, out_file)

    model = Model(model_name=CFG.model_name).to(CFG.device)
    checkpoint = torch.load("swa_model_fold3_5.pth")
    model.load_state_dict(checkpoint['state_dict'])
#     model = nn.DataParallel(model)

    loss_valid, valid_preds, _ = valid_fn_two(valid_loader, model, criterion, CFG.device)
    valid_preds = valid_preds[:, 1]
    valid_df['prediction_id'] = valid_df['patient_id'].astype(str) + '_' + valid_df['laterality'].astype(str)
    valid_preds = np.array(valid_preds).flatten()
    
    valid_df['raw_pred'] = valid_preds
    LOGGER.info(f"Valid loss:{loss_valid:.4f}")
    grp_df = valid_df.groupby('prediction_id')['raw_pred', 'cancer'].mean()
    grp_df['cancer'] = grp_df['cancer'].astype(np.int)
    valid_labels_mean = grp_df['cancer'].values
    valid_preds_mean = grp_df['raw_pred'].values
    # print(valid_labels[:5], valid_preds_mean[:5])
    val_metric_mean = pfbeta(valid_labels_mean, valid_preds_mean)
    LOGGER.info(f"Val metric mean prob: {val_metric_mean:.4f}")
    best_metric_mean_at_epoch = 0
    best_metric = 0
    
    best_threshold_mean = 0
    best_auc = 0
    best_cf = None
    for i in np.arange(0.001, 0.599, 0.001):
        valid_argmax = (valid_preds_mean>i).astype(np.int32)
        val_metric = pfbeta_np(valid_labels_mean, valid_argmax)
        val_acc = accuracy_score(valid_labels_mean, valid_argmax)
        val_f1 = f1_score(valid_labels_mean, valid_argmax)
        val_auc = roc_auc_score(valid_labels_mean, valid_argmax)
        cf = confusion_matrix(valid_labels_mean, valid_argmax)
        if val_metric> best_metric:
            best_metric = val_metric
            # best_metric_mean_at_epoch = val_metric
            best_threshold_mean = i
            best_auc = val_auc
            best_cf = cf
    if best_metric>0.51:
        state = {'state_dict': model.state_dict()}
        path = f'swa_{CFG.model_name}_fold_{fold}_model_{best_metric:.4f}_{best_threshold_mean:.3f}.pth'
        torch.save(state, path)
    
    LOGGER.info(f"Best metric at: {best_metric:.4f} {best_threshold_mean:.4f}  {best_auc:.4f}")
    LOGGER.info(f"Cf: {best_cf}")
    torch.cuda.empty_cache()
    return best_metric

study = optuna.create_study(direction='maximize', sampler = TPESampler(seed=666))
study.optimize(func=objective, n_trials=200)
study.best_params

: 

: 

: 

: 

: 