In [None]:
import numpy as np
import pandas as pd
import random
from glob import glob
import os, shutil
from tqdm import tqdm
tqdm.pandas()
import time
import copy
import joblib
from collections import defaultdict
import gc
from IPython import display as ipd
import math
# visualization
import cv2
from glob import glob
# Sklearn
from sklearn.model_selection import StratifiedKFold, KFold, StratifiedGroupKFold
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score, confusion_matrix, roc_curve
import timm
# PyTorch 
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler
import torch.nn.functional as F
from torch.optim.swa_utils import AveragedModel, SWALR
from transformers import get_cosine_schedule_with_warmup
from collections import defaultdict
# import matplotlib.pyplot as plt
# Albumentations for augmentations
import albumentations as A
import albumentations
import albumentations as albu
from albumentations.pytorch import ToTensorV2
from datetime import datetime
import warnings
warnings.filterwarnings("ignore")

In [None]:
class CFG:
    seed = 1
    model_name = "tf_efficientnetv2_b2"
    train_bs = 16
    valid_bs = train_bs*4
    image_size = 1024
    epochs = 25
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(CFG.device)

In [None]:
df = pd.read_csv("/kaggle/input/10folds/train_10folds.csv")
df.head()

In [None]:
def init_logger(log_file='train1.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()
now = datetime.now()
datetime_now = now.strftime("%m/%d/%Y, %H:%M:%S")
LOGGER.info(f"Date :{datetime_now}")

In [None]:
from albumentations import DualTransform
image_size = 1024
def isotropically_resize_image(img, size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC):
    h, w = img.shape[:2]
    if max(w, h) == size:
        return img
    if w > h:
        scale = size / w
        h = h * scale
        w = size
    else:
        scale = size / h
        w = w * scale
        h = size
    interpolation = interpolation_up if scale > 1 else interpolation_down
    resized = cv2.resize(img, (int(w), int(h)), interpolation=interpolation)
    return resized


class IsotropicResize(DualTransform):
    def __init__(self, max_side, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC,
                 always_apply=False, p=1):
        super(IsotropicResize, self).__init__(always_apply, p)
        self.max_side = max_side
        self.interpolation_down = interpolation_down
        self.interpolation_up = interpolation_up

    def apply(self, img, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC, **params):
        return isotropically_resize_image(img, size=self.max_side, interpolation_down=interpolation_down,
                                          interpolation_up=interpolation_up)

    def apply_to_mask(self, img, **params):
        return self.apply(img, interpolation_down=cv2.INTER_NEAREST, interpolation_up=cv2.INTER_NEAREST, **params)

    def get_transform_init_args_names(self):
        return ("max_side", "interpolation_down", "interpolation_up")
    
data_transforms = {
    "train": A.Compose([
        # A.Resize(image_size, image_size),
        # IsotropicResize(max_side = image_size),
        # A.PadIfNeeded(min_height=image_size, min_width=image_size, border_mode=cv2.BORDER_CONSTANT),
        # A.RandomBrightnessContrast(),
        A.VerticalFlip(p=0.5),   
        A.ColorJitter(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.05, rotate_limit=10, p=0.5),
        A.HorizontalFlip(p=0.5),
        A.Cutout(max_h_size=int(image_size*0.1), max_w_size=int(image_size*0.1), num_holes=5, p=0.5), 
        # A.OneOf([
        #         A.OpticalDistortion(),
        #         A.IAAPiecewiseAffine(),
        #     ], p=0.1),
        # A.OneOf([
        #     A.GaussNoise(),
        #     A.MotionBlur(blur_limit=(3, 5)),
        # ], p=0.1),
        # A.ColorJitter(),
        # A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.05, rotate_limit=10, p=0.5),
        # A.HorizontalFlip(p=0.5),
        # A.Cutout(max_h_size=102, max_w_size=102, num_holes=5, p=0.5),
        # A.CLAHE(p=1.0),
        # albumentations.HorizontalFlip(p=0.5),
        # # albumentations.VerticalFlip(p=0.5),
        # albumentations.RandomBrightness(limit=0.2, p=0.75),
        # albumentations.RandomContrast(limit=0.2, p=0.75),

        # albumentations.OneOf([
        #     albumentations.OpticalDistortion(distort_limit=1.),
        #     albumentations.GridDistortion(num_steps=5, distort_limit=1.),
        # ], p=0.75),

        # albumentations.HueSaturationValue(hue_shift_limit=40, sat_shift_limit=40, val_shift_limit=0, p=0.75),
        # albumentations.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.3, rotate_limit=30, border_mode=0, p=0.75),
        # A.Cutout(always_apply=False, p=0.5, num_holes=1, max_h_size=409, max_w_size=409),
        # A.OneOf([ 
        # A.OpticalDistortion(distort_limit=1.0), 
        # A.GridDistortion(num_steps=5, distort_limit=1.),
        # A.ElasticTransform(alpha=3), ], p=0.2),
        # A.OneOf([
        #     # A.GaussNoise(var_limit=[10, 50]),
        #     A.GaussianBlur(),
        #     A.MotionBlur(),
        #     A.MedianBlur(), ], p=0.2),
        # A.OneOf([
        #     A.GridDistortion(num_steps=5, distort_limit=0.05, p=1.0),
        #     A.OpticalDistortion(distort_limit=0.05, shift_limit=0.05, p=1.0),
        #     A.ElasticTransform(alpha=1, sigma=50, alpha_affine=50, p=1.0)
        # ], p=0.25),
        # A.CoarseDropout(max_holes=8, max_height=image_size//20, max_width=image_size//20,
        #                  min_holes=5, fill_value=0, mask_fill_value=0, p=0.5),
        # A.Normalize(mean=0, std=1),
        ToTensorV2(),], p=1.0),
    
    "valid": A.Compose([
        # IsotropicResize(max_side =image_size),
        # A.PadIfNeeded(min_height=image_size, min_width=image_size, border_mode=cv2.BORDER_CONSTANT),
        # A.Normalize(mean=0, std=1),
        # A.Resize(image_size, image_size),
        ToTensorV2(),
        ], p=1.0)
}

LOGGER.info(f"train transform{data_transforms['train']}")


In [None]:
def pad(array, target_shape):
    return np.pad(
        array,
        [(0, target_shape[i] - array.shape[i]) for i in range(len(array.shape))],
        "constant",
    )

def load_img2(img_path):
    image = cv2.imread(img_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return image
class BreastDataset(Dataset):
    def __init__(self, df, transforms=None):
        self.df = df
        self.transforms = transforms
        
    def __getitem__(self, index):
        row = self.df.iloc[index]
        if (os.path.exists(f"/kaggle/input/27300next/output/{row.patient_id}_{row.image_id}.png")):
            img_path = f"/kaggle/input/27300next/output/{row.patient_id}_{row.image_id}.png"
        else:
            img_path = f"/kaggle/input/1024bicubic/output/{row.patient_id}_{row.image_id}.png"
        img = load_img2(img_path)
        label = row['cancer']
        # img = np.transpose(img, (2, 0, 1))
        data = self.transforms(image=img)
        img  = data['image']
        # img = img/255
        return torch.tensor(img).float(), torch.tensor(label).long()
        
    def __len__(self):
        return len(self.df)
    
fold0 = df[df['fold']==0]
train_dataset = BreastDataset(fold0, transforms = data_transforms['train'])
image, label = train_dataset[0]
print(image.shape, label)
print(image.max())

In [None]:

class ModelOld(nn.Module):
    def __init__(self, model_name):
        super().__init__()
        # ,drop_rate = 0.3, drop_path_rate = 0.2
        self.backbone = timm.create_model(CFG.model_name, pretrained=False,drop_rate = 0.3, drop_path_rate = 0.2)
        self.fc = nn.Linear(self.backbone.classifier.in_features,2)
        self.dropout = nn.Dropout(0.5)
        self.backbone.classifier = nn.Identity()
    def forward(self, x):
        x = self.backbone(x)
        x = self.fc(self.dropout(x))
        return x


In [None]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))

In [None]:

def valid_fn_two(val_dataloader, model, criterion, device):
    losses = AverageMeter()
    model.eval()
    truth = []
    preds = []
    valid_labels = []
    start = end = time.time()
    pbar = tqdm(enumerate(val_dataloader), total=len(val_dataloader), desc='Val')
    for step, (images, labels) in pbar:
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)
        batch_size = labels.size(0)
        with torch.no_grad():
            outputs = model(images)
        valid_labels.append(labels.cpu().numpy())
        loss = criterion(outputs, labels)
#         loss = bi_tempered_logistic_loss(outputs, labels, t1=0.8, t2 = 1.4)
        losses.update(loss.item(), batch_size)
#         print(outputs)
        preds.append(F.softmax(outputs).to('cpu').numpy())
        mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0
        pbar.set_postfix(eval_loss=f'{losses.avg:0.4f}',
                        gpu_mem=f'{mem:0.2f} GB')
    predictions = np.concatenate(preds)
    valid_labels = np.concatenate(valid_labels)
    return losses.avg, predictions, valid_labels


In [None]:

def pfbeta(labels, predictions, beta=1):
    y_true_count = 0
    ctp = 0
    cfp = 0

    for idx in range(len(labels)):
        prediction = min(max(predictions[idx], 0), 1)
        if (labels[idx]):
            y_true_count += 1
            ctp += prediction
        else:
            cfp += prediction

    beta_squared = beta * beta
    c_precision = ctp / (ctp + cfp)
    c_recall = ctp / y_true_count
    if (c_precision > 0 and c_recall > 0):
        result = (1 + beta_squared) * (c_precision * c_recall) / (beta_squared * c_precision + c_recall)
        return result
    else:
        return 0

def pfbeta_np(labels, preds, beta=1):
    preds = preds.clip(0, 1)
    y_true_count = labels.sum()
    ctp = preds[labels==1].sum()
    cfp = preds[labels==0].sum()
    beta_squared = beta * beta
    c_precision = ctp / (ctp + cfp)
    c_recall = ctp / y_true_count
    if (c_precision > 0 and c_recall > 0):
        result = (1 + beta_squared) * (c_precision * c_recall) / (beta_squared * c_precision + c_recall)
        return result
    else:
        return 0.0
    
def dfs_freeze(module):
    for param in module.parameters():
        param.requires_grad = False
        
def dfs_unfreeze(module):
    for param in module.parameters():
        param.requires_grad = True
    
def set_seed(seed = 42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    print('> SEEDING DONE')

def sigmoid(x):
  return 1 / (1 + math.exp(-x))
def valid_fn_two(val_dataloader, model, criterion, device):
    losses = AverageMeter()
    model.eval()
    preds = []
    pbar = tqdm(enumerate(val_dataloader), total=len(val_dataloader), desc='Val')
    for step, (images, labels) in pbar:
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)
        batch_size = labels.size(0)
        with torch.no_grad():
            with autocast(enabled=True):
                outputs = model(images)
                loss = criterion(outputs, labels)
#         loss = bi_tempered_logistic_loss(outputs, labels, t1=0.8, t2 = 1.4)
        losses.update(loss.item(), batch_size)
#         print(outputs)
        preds.append(F.softmax(outputs).to('cpu').numpy())
        mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0
        pbar.set_postfix(eval_loss=f'{losses.avg:0.4f}',
                        gpu_mem=f'{mem:0.2f} GB')
    predictions = np.concatenate(preds)
    return losses.avg, predictions
set_seed(1)
gc.collect()
torch.cuda.empty_cache()
for fold in [6]:
    LOGGER.info("5 fold")
    LOGGER.info(f"Fold: {fold}")
    LOGGER.info(f"Model name: {CFG.model_name}")
    valid_df = df[df['fold']==fold].reset_index(drop=True)
    LOGGER.info(f"Len valid df: {len(valid_df)}")
    
    valid_dataset = BreastDataset(valid_df, transforms=data_transforms['valid'])

    valid_loader = DataLoader(valid_dataset, batch_size = CFG.valid_bs, 
                                  num_workers=1, shuffle=False, pin_memory=True, drop_last=False)
    # model = Model(model_name=CFG.model_name).to(device)
    best_f1 = 0
    best_metric = 0
    total_epoch = 30
    warmup = 1
#     model = ModelOld(model_name=CFG.model_name).to(CFG.device)
#     checkpoint = torch.load("/kaggle/input/10folds/tf_efficientnetv2_b2_fold_0_model_epoch_2_0.5510_0.151.pth")
#     model.load_state_dict(checkpoint['state_dict'])
#     criterion = nn.CrossEntropyLoss().to(CFG.device)
#     LOGGER.info(f"Train bs: {CFG.train_bs}")
#     # LOGGER.info(f"Model: {model}")
#     LOGGER.info(f"{model.__class__.__name__}")
#     LOGGER.info(f"optimizer: {optimizer}")
#     LOGGER.info(f"total_epoch :{total_epoch}")
#     LOGGER.info(f"Warmup: {warmup}")
#     for epoch in range(1, total_epoch+1):
#         LOGGER.info(f"Epoch: {epoch}/{total_epoch}")
#         # loss_train = train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, CFG.device)
#         loss_valid, valid_preds = valid_fn_two(valid_loader, model, criterion, CFG.device)
#         valid_preds = valid_preds[:, 1]
#         valid_df['prediction_id'] = valid_df['patient_id'].astype(str) + '_' + valid_df['laterality'].astype(str)
#         valid_preds = np.array(valid_preds).flatten()
        
#         valid_df['raw_pred'] = valid_preds
#         LOGGER.info(f"Valid loss:{loss_valid:.4f}")
#         # LOGGER.info(f"Train loss:{loss_train:.4f}, Valid loss:{loss_valid:.4f}")
#         # print(valid_df.head())
#         grp_df = valid_df.groupby('prediction_id')['raw_pred', 'cancer'].mean()
#         grp_df['cancer'] = grp_df['cancer'].astype(np.int)
#         valid_labels_mean = grp_df['cancer'].values
#         valid_preds_mean = grp_df['raw_pred'].values
#         # print(valid_labels[:5], valid_preds_mean[:5])
#         val_metric_mean = pfbeta(valid_labels_mean, valid_preds_mean)
#         LOGGER.info(f"Val metric mean prob: {val_metric_mean:.4f}")
#         best_metric_mean_at_epoch = 0
#         best_metric1 = 0
#         best_threshold_mean = 0
#         best_auc = 0
#         best_cf = None
#         for i in np.arange(0.001, 0.599, 0.001):
#             valid_argmax = (valid_preds_mean>i).astype(np.int32)
#     #             print(valid_argmax)
#             # val_metric = pfbeta(valid_labels_mean, valid_argmax)
#             val_metric1 = pfbeta_np(valid_labels_mean, valid_argmax)
#             val_acc = accuracy_score(valid_labels_mean, valid_argmax)
#             val_f1 = f1_score(valid_labels_mean, valid_argmax)
#             val_auc = roc_auc_score(valid_labels_mean, valid_argmax)
#             cf = confusion_matrix(valid_labels_mean, valid_argmax)
#             if val_metric1> best_metric1:
#                 best_metric1 = val_metric1
#                 # best_metric_mean_at_epoch = val_metric
#                 best_threshold_mean = i
#                 best_auc = val_auc
#                 best_cf = cf
#         LOGGER.info(f"Best metric at epoch {epoch}: {best_metric1:.4f} {best_threshold_mean:.4f}  {best_auc:.4f}")
#         LOGGER.info(f"Cf: {best_cf}")
#         if best_metric1> best_metric:

#             LOGGER.info(f"Model improve: {best_metric:.4f} -> {best_metric1:.4f}")
#             best_metric = best_metric1
#         state = {'epoch': epoch, 'state_dict': model.state_dict()}
#         # state = {'epoch': epoch, 'state_dict': model.state_dict(),'optimizer': optimizer.state_dict(), 'scheduler':scheduler.state_dict()}
#         path = f'foldonefive/{CFG.model_name}_fold_{fold}_model_epoch_{epoch}_{best_metric1:.4f}_{best_threshold_mean:.3f}.pth'
#         torch.save(state, path)

        

In [None]:
import optuna
from optuna.samplers import TPESampler

def valid_fn_two(val_dataloader, model, criterion, device):
    losses = AverageMeter()
    model.eval()
    preds = []
    pbar = tqdm(enumerate(val_dataloader), total=len(val_dataloader), desc='Val')
    for step, (images, labels) in pbar:
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)
        batch_size = labels.size(0)
        with torch.no_grad():
            with autocast(enabled=True):
                outputs = model(images)
                loss = criterion(outputs, labels)
#         loss = bi_tempered_logistic_loss(outputs, labels, t1=0.8, t2 = 1.4)
        losses.update(loss.item(), batch_size)
#         print(outputs)
        preds.append(F.softmax(outputs).to('cpu').numpy())
        mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0
        pbar.set_postfix(eval_loss=f'{losses.avg:0.4f}',
                        gpu_mem=f'{mem:0.2f} GB')
    predictions = np.concatenate(preds)
    return losses.avg, predictions

In [None]:

# set_seed(1)
# out_file = 'swa_model_fold0_10.pth' 
# iteration = [
#     '/kaggle/input/10folds/tf_efficientnetv2_b2_fold_0_model_epoch_2_0.5510_0.151.pth',
#     '/kaggle/input/10folds/tf_efficientnetv2_b2_fold_0_model_epoch_13_0.5750_0.437.pth',
#     '/kaggle/input/10folds/tf_efficientnetv2_b2_fold_0_model_epoch_11_0.5610_0.440.pth'
# ]

# criterion = nn.CrossEntropyLoss().to(CFG.device)
# best_metric = 0
# torch.cuda.empty_cache()
# def objective(trial):
#     a1 = 0.036839841333967636 
#     a2 = 0.6490629183820655
#     a3 = 0.3140972402839668
# #     a2 = 0.47142151346976024 
# #     a3 = 0.3596277792186039
# #     a1 = trial.suggest_uniform('a1', 0.01, 0.79)
# #     a2 = 1-a1
# #     a1 = trial.suggest_uniform('a1', 0.01, 0.99)
# #     a2 = trial.suggest_uniform('a2', 0.01, 1-a1)
# #     a3 = 1-a1-a2
#     state_dict = None
#     for i in iteration:
#         f = i
#         print(f)
#         f = torch.load(f, map_location=lambda storage, loc: storage)
#         if state_dict is None:
#             print("none: ", i)
#             state_dict = f['state_dict']
#             key = list(f['state_dict'].keys())
#             for k in key:
#                 state_dict[k] = f['state_dict'][k]*a1
#         elif i=='/kaggle/input/10folds/tf_efficientnetv2_b2_fold_0_model_epoch_13_0.5750_0.437.pth': 
#             print("hehe", i)
#             key = list(f['state_dict'].keys())
#             for k in key:
#                 state_dict[k] = state_dict[k] + a2*f['state_dict'][k]
#         elif i=='/kaggle/input/10folds/tf_efficientnetv2_b2_fold_0_model_epoch_11_0.5610_0.440.pth':
#             print("noob", i)
#             key = list(f['state_dict'].keys())
#             for k in key:
#                 state_dict[k] = state_dict[k] + a3*f['state_dict'][k]
#     print(a1, a2, a3)
#     # for k in key:
#     #     state_dict[k] = state_dict[k] / len(iteration)
#     print('')

#     # print(out_file)
#     torch.save({'state_dict': state_dict}, out_file)

#     model = ModelOld(model_name=CFG.model_name).to(CFG.device)
#     checkpoint = torch.load("swa_model_fold0_10.pth")
#     model.load_state_dict(checkpoint['state_dict'])
# #     model = nn.DataParallel(model)

#     loss_valid, valid_preds = valid_fn_two(valid_loader, model, criterion, CFG.device)
#     valid_preds = valid_preds[:, 1]
#     valid_df['prediction_id'] = valid_df['patient_id'].astype(str) + '_' + valid_df['laterality'].astype(str)
#     valid_preds = np.array(valid_preds).flatten()
    
#     valid_df['raw_pred'] = valid_preds
#     LOGGER.info(f"Valid loss:{loss_valid:.4f}")
#     grp_df = valid_df.groupby('prediction_id')['raw_pred', 'cancer'].mean()
#     grp_df['cancer'] = grp_df['cancer'].astype(np.int)
#     valid_labels_mean = grp_df['cancer'].values
#     valid_preds_mean = grp_df['raw_pred'].values
#     # print(valid_labels[:5], valid_preds_mean[:5])
#     val_metric_mean = pfbeta(valid_labels_mean, valid_preds_mean)
#     LOGGER.info(f"Val metric mean prob: {val_metric_mean:.4f}")
#     best_metric_mean_at_epoch = 0
#     best_metric = 0
    
#     best_threshold_mean = 0
#     best_auc = 0
#     best_cf = None
#     for i in np.arange(0.001, 0.599, 0.001):
#         valid_argmax = (valid_preds_mean>i).astype(np.int32)
#         val_metric = pfbeta_np(valid_labels_mean, valid_argmax)
#         val_acc = accuracy_score(valid_labels_mean, valid_argmax)
#         val_f1 = f1_score(valid_labels_mean, valid_argmax)
#         val_auc = roc_auc_score(valid_labels_mean, valid_argmax)
#         cf = confusion_matrix(valid_labels_mean, valid_argmax)
#         if val_metric> best_metric:
#             best_metric = val_metric
#             # best_metric_mean_at_epoch = val_metric
#             best_threshold_mean = i
#             best_auc = val_auc
#             best_cf = cf
#     state = {'state_dict': model.state_dict()}
#     path = f'swa_{CFG.model_name}_fold_{fold}_model_{best_metric:.4f}_{best_threshold_mean:.4f}.pth'
#     torch.save(state, path)
    
#     LOGGER.info(f"Best metric at: {best_metric:.4f} {best_threshold_mean:.4f}  {best_auc:.4f}")
#     LOGGER.info(f"Cf: {best_cf}")
#     return best_metric

# study = optuna.create_study(direction='maximize', sampler = TPESampler(seed=1))
# study.optimize(func=objective, n_trials=1)
# study.best_params
# # # 0.5563409550491111 0.4436590449508889 fold 0
# # # 0.12634002523631388 0.8351954705276587 0.03846450423602743 0.5393 
# # # 0.583301614081906 0.3673525472043472 0.04934583871374687 fold 2 0.50
# # # 0.1689507073116359 0.47142151346976024 0.3596277792186039 fold 2 0.5055 0.5055 0.3670  0.7261

In [None]:
# set_seed(1)
# out_file = 'swa_model_fold2_10.pth' 
# iteration = [
#     '/kaggle/input/10folds/tf_efficientnetv2_b2_fold_2_model_epoch_5_0.4598_0.286.pth',
#     '/kaggle/input/10folds/tf_efficientnetv2_b2_fold_2_model_epoch_3_0.4615_0.250.pth',
#     '/kaggle/input/10folds/tf_efficientnetv2_b2_fold_2_model_epoch_7_0.4848_0.266.pth'
# ]

# criterion = nn.CrossEntropyLoss().to(CFG.device)
# best_metric = 0
# torch.cuda.empty_cache()
# def objective(trial):
# #     a1 = 0.015006661988523864 
# #     a2 = 0.12003546043452194 
# #     a3 = 0.8649578775769542
#     a1 = 0.020317850755860567 
#     a2 = 0.1293785181217534 
#     a3 = 0.850303631122386
# #     a1 = 0.12634002523631388
# #     a2 = 0.8351954705276587
# #     a3 = 0.03846450423602743
# #     a1 = trial.suggest_uniform('a1', 0.01, 0.99)
# #     a2 = trial.suggest_uniform('a2', 0.01, 1-a1)
# #     a3 = 1-a1-a2
#     state_dict = None
#     for i in iteration:
#         f = i
#         print(f)
#         f = torch.load(f, map_location=lambda storage, loc: storage)
#         if state_dict is None:
#             print("none: ", i)
#             state_dict = f['state_dict']
#             key = list(f['state_dict'].keys())
#             for k in key:
#                 state_dict[k] = f['state_dict'][k]*a1
#         elif i=='/kaggle/input/10folds/tf_efficientnetv2_b2_fold_2_model_epoch_3_0.4615_0.250.pth': 
#             print("hehe", i)
#             key = list(f['state_dict'].keys())
#             for k in key:
#                 state_dict[k] = state_dict[k] + a2*f['state_dict'][k]
#         elif i=='/kaggle/input/10folds/tf_efficientnetv2_b2_fold_2_model_epoch_7_0.4848_0.266.pth':
#             print("noob", i)
#             key = list(f['state_dict'].keys())
#             for k in key:
#                 state_dict[k] = state_dict[k] + a3*f['state_dict'][k]
#     print(a1, a2, a3)
#     # for k in key:
#     #     state_dict[k] = state_dict[k] / len(iteration)
#     print('')

#     # print(out_file)
#     torch.save({'state_dict': state_dict}, out_file)

#     model = ModelOld(model_name=CFG.model_name).to(CFG.device)
#     checkpoint = torch.load("swa_model_fold2_10.pth")
#     model.load_state_dict(checkpoint['state_dict'])
# #     model = nn.DataParallel(model)

#     loss_valid, valid_preds = valid_fn_two(valid_loader, model, criterion, CFG.device)
#     valid_preds = valid_preds[:, 1]
#     valid_df['prediction_id'] = valid_df['patient_id'].astype(str) + '_' + valid_df['laterality'].astype(str)
#     valid_preds = np.array(valid_preds).flatten()
    
#     valid_df['raw_pred'] = valid_preds
#     LOGGER.info(f"Valid loss:{loss_valid:.4f}")
#     grp_df = valid_df.groupby('prediction_id')['raw_pred', 'cancer'].mean()
#     grp_df['cancer'] = grp_df['cancer'].astype(np.int)
#     valid_labels_mean = grp_df['cancer'].values
#     valid_preds_mean = grp_df['raw_pred'].values
#     # print(valid_labels[:5], valid_preds_mean[:5])
#     val_metric_mean = pfbeta(valid_labels_mean, valid_preds_mean)
#     LOGGER.info(f"Val metric mean prob: {val_metric_mean:.4f}")
#     best_metric_mean_at_epoch = 0
#     best_metric = 0
    
#     best_threshold_mean = 0
#     best_auc = 0
#     best_cf = None
#     for i in np.arange(0.001, 0.599, 0.001):
#         valid_argmax = (valid_preds_mean>i).astype(np.int32)
#         val_metric = pfbeta_np(valid_labels_mean, valid_argmax)
#         val_acc = accuracy_score(valid_labels_mean, valid_argmax)
#         val_f1 = f1_score(valid_labels_mean, valid_argmax)
#         val_auc = roc_auc_score(valid_labels_mean, valid_argmax)
#         cf = confusion_matrix(valid_labels_mean, valid_argmax)
#         if val_metric> best_metric:
#             best_metric = val_metric
#             # best_metric_mean_at_epoch = val_metric
#             best_threshold_mean = i
#             best_auc = val_auc
#             best_cf = cf
#     state = {'state_dict': model.state_dict()}
#     path = f'swa_{CFG.model_name}_fold_{fold}_model_{best_metric:.4f}_{best_threshold_mean:.3f}.pth'
#     torch.save(state, path)
    
#     LOGGER.info(f"Best metric at: {best_metric:.4f} {best_threshold_mean:.4f}  {best_auc:.4f}")
#     LOGGER.info(f"Cf: {best_cf}")
#     return best_metric

# study = optuna.create_study(direction='maximize', sampler = TPESampler(seed=666))
# study.optimize(func=objective, n_trials=1)
# study.best_params

In [None]:
# set_seed(1)
# out_file = 'swa_model_fold1_10.pth' 
# iteration = [
#     '/kaggle/input/10folds/tf_efficientnetv2_b2_fold_1_model_epoch_3_0.5055_0.360.pth',
#     '/kaggle/input/10folds/tf_efficientnetv2_b2_fold_1_model_epoch_5_0.4865_0.324.pth',
# #     '/kaggle/input/10folds/tf_efficientnetv2_b2_fold_2_model_epoch_7_0.4848_0.266.pth'
# ]

# criterion = nn.CrossEntropyLoss().to(CFG.device)
# best_metric = 0
# torch.cuda.empty_cache()
# def objective(trial):
#     a1 = 0.696428379420678 
#     a2 = 0.30357162057932197
# #     a1 = 0.1689507073116359 
# #     a2 = 0.47142151346976024 
# #     a3 = 0.3596277792186039
# #     a1 = trial.suggest_uniform('a1', 0.01, 0.99)
# #     a2 = 1-a1
# #     a2 = trial.suggest_uniform('a2', 0.1, 1-a1)
# #     a3 = 1-a1-a2
#     state_dict = None
#     for i in iteration:
#         f = i
#         print(f)
#         f = torch.load(f, map_location=lambda storage, loc: storage)
#         if state_dict is None:
#             print("none: ", i)
#             state_dict = f['state_dict']
#             key = list(f['state_dict'].keys())
#             for k in key:
#                 state_dict[k] = f['state_dict'][k]*a1
#         elif i=='/kaggle/input/10folds/tf_efficientnetv2_b2_fold_1_model_epoch_5_0.4865_0.324.pth': 
#             print("hehe", i)
#             key = list(f['state_dict'].keys())
#             for k in key:
#                 state_dict[k] = state_dict[k] + a2*f['state_dict'][k]
# #         elif i=='/kaggle/input/10folds/tf_efficientnetv2_b2_fold_2_model_epoch_7_0.4848_0.266.pth':
# #             print("noob", i)
# #             key = list(f['state_dict'].keys())
# #             for k in key:
# #                 state_dict[k] = state_dict[k] + a3*f['state_dict'][k]
#     print(a1, a2)
#     # for k in key:
#     #     state_dict[k] = state_dict[k] / len(iteration)
#     print('')

#     # print(out_file)
#     torch.save({'state_dict': state_dict}, out_file)

#     model = ModelOld(model_name=CFG.model_name).to(CFG.device)
#     checkpoint = torch.load("/kaggle/input/10folds/tf_efficientnetv2_b2_fold_1_model_epoch_3_0.5055_0.360.pth")
#     model.load_state_dict(checkpoint['state_dict'])
# #     model = nn.DataParallel(model)

#     loss_valid, valid_preds = valid_fn_two(valid_loader, model, criterion, CFG.device)
#     valid_preds = valid_preds[:, 1]
#     valid_df['prediction_id'] = valid_df['patient_id'].astype(str) + '_' + valid_df['laterality'].astype(str)
#     valid_preds = np.array(valid_preds).flatten()
    
#     valid_df['raw_pred'] = valid_preds
#     LOGGER.info(f"Valid loss:{loss_valid:.4f}")
#     grp_df = valid_df.groupby('prediction_id')['raw_pred', 'cancer'].mean()
#     grp_df['cancer'] = grp_df['cancer'].astype(np.int)
#     valid_labels_mean = grp_df['cancer'].values
#     valid_preds_mean = grp_df['raw_pred'].values
#     # print(valid_labels[:5], valid_preds_mean[:5])
#     val_metric_mean = pfbeta(valid_labels_mean, valid_preds_mean)
#     LOGGER.info(f"Val metric mean prob: {val_metric_mean:.4f}")
#     best_metric_mean_at_epoch = 0
#     best_metric = 0
    
#     best_threshold_mean = 0
#     best_auc = 0
#     best_cf = None
#     for i in np.arange(0.001, 0.599, 0.001):
#         valid_argmax = (valid_preds_mean>i).astype(np.int32)
#         val_metric = pfbeta_np(valid_labels_mean, valid_argmax)
#         val_acc = accuracy_score(valid_labels_mean, valid_argmax)
#         val_f1 = f1_score(valid_labels_mean, valid_argmax)
#         val_auc = roc_auc_score(valid_labels_mean, valid_argmax)
#         cf = confusion_matrix(valid_labels_mean, valid_argmax)
#         if val_metric> best_metric:
#             best_metric = val_metric
#             # best_metric_mean_at_epoch = val_metric
#             best_threshold_mean = i
#             best_auc = val_auc
#             best_cf = cf
#     state = {'state_dict': model.state_dict()}
#     path = f'swa_{CFG.model_name}_fold_{fold}_model_{best_metric:.4f}_{best_threshold_mean:.4f}.pth'
#     torch.save(state, path)
    
#     LOGGER.info(f"Best metric at: {best_metric:.4f} {best_threshold_mean:.4f}  {best_auc:.4f}")
#     LOGGER.info(f"Cf: {best_cf}")
#     return best_metric

# study = optuna.create_study(direction='maximize', sampler = TPESampler(seed=666))
# study.optimize(func=objective, n_trials=40)
# study.best_params
# # 0.696428379420678 0.30357162057932197 fold 1
# # 0.5563409550491111 0.4436590449508889 fold 0
# # 0.12634002523631388 0.8351954705276587 0.03846450423602743 0.5393 
# # 0.583301614081906 0.3673525472043472 0.04934583871374687 fold 2 0.50
# # 0.1689507073116359 0.47142151346976024 0.3596277792186039 fold 2 0.5055 0.5055 0.3670  0.7261

In [None]:
set_seed(1)
out_file = 'swa_model_fold6_10.pth' 
iteration = [
    '/kaggle/input/10folds/tf_efficientnetv2_b2_fold_6_model_epoch_6_0.5128_0.307.pth',
    '/kaggle/input/10folds/tf_efficientnetv2_b2_fold_6_model_epoch_7_0.5385_0.423.pth',
    '/kaggle/input/10folds/tf_efficientnetv2_b2_fold_6_model_epoch_9_0.5135_0.338.pth'
]

criterion = nn.CrossEntropyLoss().to(CFG.device)
best_metric = 0
torch.cuda.empty_cache()
def objective(trial):
#     a1 = 0.015006661988523864 
#     a2 = 0.12003546043452194 
#     a3 = 0.8649578775769542
#     a1 = 0.020317850755860567 
#     a2 = 0.1293785181217534 
#     a3 = 0.850303631122386
#     a1 = 0.12634002523631388
#     a2 = 0.8351954705276587
#     a3 = 0.03846450423602743
    a1 = trial.suggest_uniform('a1', 0.01, 0.99)
    a2 = trial.suggest_uniform('a2', 0.01, 1-a1)
    a3 = 1-a1-a2
    state_dict = None
    for i in iteration:
        f = i
        print(f)
        f = torch.load(f, map_location=lambda storage, loc: storage)
        if state_dict is None:
            print("none: ", i)
            state_dict = f['state_dict']
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = f['state_dict'][k]*a1
        elif i=='/kaggle/input/10folds/tf_efficientnetv2_b2_fold_6_model_epoch_7_0.5385_0.423.pth': 
            print("hehe", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a2*f['state_dict'][k]
        elif i=='/kaggle/input/10folds/tf_efficientnetv2_b2_fold_6_model_epoch_9_0.5135_0.338.pth':
            print("noob", i)
            key = list(f['state_dict'].keys())
            for k in key:
                state_dict[k] = state_dict[k] + a3*f['state_dict'][k]
    print(a1, a2, a3)
    # for k in key:
    #     state_dict[k] = state_dict[k] / len(iteration)
    print('')

    # print(out_file)
    torch.save({'state_dict': state_dict}, out_file)

    model = ModelOld(model_name=CFG.model_name).to(CFG.device)
    checkpoint = torch.load("swa_model_fold6_10.pth")
    model.load_state_dict(checkpoint['state_dict'])
#     model = nn.DataParallel(model)

    loss_valid, valid_preds = valid_fn_two(valid_loader, model, criterion, CFG.device)
    valid_preds = valid_preds[:, 1]
    valid_df['prediction_id'] = valid_df['patient_id'].astype(str) + '_' + valid_df['laterality'].astype(str)
    valid_preds = np.array(valid_preds).flatten()
    
    valid_df['raw_pred'] = valid_preds
    LOGGER.info(f"Valid loss:{loss_valid:.4f}")
    grp_df = valid_df.groupby('prediction_id')['raw_pred', 'cancer'].mean()
    grp_df['cancer'] = grp_df['cancer'].astype(np.int)
    valid_labels_mean = grp_df['cancer'].values
    valid_preds_mean = grp_df['raw_pred'].values
    # print(valid_labels[:5], valid_preds_mean[:5])
    val_metric_mean = pfbeta(valid_labels_mean, valid_preds_mean)
    LOGGER.info(f"Val metric mean prob: {val_metric_mean:.4f}")
    best_metric_mean_at_epoch = 0
    best_metric = 0
    
    best_threshold_mean = 0
    best_auc = 0
    best_cf = None
    for i in np.arange(0.001, 0.599, 0.001):
        valid_argmax = (valid_preds_mean>i).astype(np.int32)
        val_metric = pfbeta_np(valid_labels_mean, valid_argmax)
        val_acc = accuracy_score(valid_labels_mean, valid_argmax)
        val_f1 = f1_score(valid_labels_mean, valid_argmax)
        val_auc = roc_auc_score(valid_labels_mean, valid_argmax)
        cf = confusion_matrix(valid_labels_mean, valid_argmax)
        if val_metric> best_metric:
            best_metric = val_metric
            # best_metric_mean_at_epoch = val_metric
            best_threshold_mean = i
            best_auc = val_auc
            best_cf = cf
    state = {'state_dict': model.state_dict()}
    path = f'swa_{CFG.model_name}_fold_{fold}_model_{best_metric:.4f}_{best_threshold_mean:.3f}.pth'
    torch.save(state, path)
    
    LOGGER.info(f"Best metric at: {best_metric:.4f} {best_threshold_mean:.4f}  {best_auc:.4f}")
    LOGGER.info(f"Cf: {best_cf}")
    return best_metric

study = optuna.create_study(direction='maximize', sampler = TPESampler(seed=666))
study.optimize(func=objective, n_trials=40)
study.best_params