In [2]:
!git clone https://github.com/lessw2020/Ranger-Deep-Learning-Optimizer.git
!pip install adamp
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')
sys.path.append('./Ranger-Deep-Learning-Optimizer/ranger')

fatal: destination path 'Ranger-Deep-Learning-Optimizer' already exists and is not an empty directory.
Collecting adamp
  Using cached adamp-0.3.0-py3-none-any.whl
Installing collected packages: adamp
Successfully installed adamp-0.3.0


In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from tqdm.notebook import tqdm
from pprint import pprint
import cv2, glob, time, random, os, ast, random
import warnings
warnings.filterwarnings("ignore")

import timm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingWarmRestarts, CosineAnnealingLR, OneCycleLR
from torch.optim import Adam, AdamW, SGD

import albumentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

from adamp import AdamP
from ranger import Ranger  # this is from ranger.py
from ranger913A import RangerVA  # this is from ranger913A.py
from rangerqh import RangerQH  # this is from rangerqh.py

from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import StratifiedKFold

In [4]:
# get the list of pretrained models
model_names = timm.list_models()
pprint(model_names)

['adv_inception_v3',
 'cspdarknet53',
 'cspdarknet53_iabn',
 'cspresnet50',
 'cspresnet50d',
 'cspresnet50w',
 'cspresnext50',
 'cspresnext50_iabn',
 'darknet53',
 'densenet121',
 'densenet121d',
 'densenet161',
 'densenet169',
 'densenet201',
 'densenet264',
 'densenet264d_iabn',
 'densenetblur121d',
 'dla34',
 'dla46_c',
 'dla46x_c',
 'dla60',
 'dla60_res2net',
 'dla60_res2next',
 'dla60x',
 'dla60x_c',
 'dla102',
 'dla102x',
 'dla102x2',
 'dla169',
 'dpn68',
 'dpn68b',
 'dpn92',
 'dpn98',
 'dpn107',
 'dpn131',
 'eca_vovnet39b',
 'ecaresnet18',
 'ecaresnet50',
 'ecaresnet50d',
 'ecaresnet50d_pruned',
 'ecaresnet101d',
 'ecaresnet101d_pruned',
 'ecaresnetlight',
 'ecaresnext26tn_32x4d',
 'efficientnet_b0',
 'efficientnet_b1',
 'efficientnet_b1_pruned',
 'efficientnet_b2',
 'efficientnet_b2_pruned',
 'efficientnet_b2a',
 'efficientnet_b3',
 'efficientnet_b3_pruned',
 'efficientnet_b3a',
 'efficientnet_b4',
 'efficientnet_b5',
 'efficientnet_b6',
 'efficientnet_b7',
 'efficientnet_b8',


<a id = "cont"></a>
## CFG

In [5]:
BATCH_SIZE = 8 # 8 for bigger architectures
VAL_BATCH_SIZE = 16
EPOCHS = 13 # train upto 10 epochs
IMG_SIZE = 640 # 384 for bigger architectures
if BATCH_SIZE == 8:
    ITER_FREQ = 400
else:
    ITER_FREQ = 200
NUM_WORKERS = 8
MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]
SEED = 1111
N_FOLDS = 5
TR_FOLDS = [0,1,2,3,4]
START_FOLD = 0

target_cols=['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal',
                 'NGT - Abnormal', 'NGT - Borderline', 'NGT - Incompletely Imaged', 'NGT - Normal', 
                 'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal',
                 'Swan Ganz Catheter Present']

MODEL_PATH = '../input/stage2-8th-cv-9449/Stage2_resnet200d_320_fold_0_epoch_8.pth'
MODEL_ARCH = 'resnet200d_320' # tf_efficientnet_b4_ns, tf_efficientnet_b5_ns, resnext50_32x4d, seresnet152d
TEACHER_MODEL_PATH = '../input/stage1-resnet200d-320/stage1_resnet200d_320_fold_4_epoch_4.pth'
WEIGHTS = [0.5, 1]

LR = 5e-4
MIN_LR = 1e-6 # SAM, CosineAnnealingWarmRestarts
WEIGHT_DECAY = 1e-6
MOMENTUM = 0.9
T_0 = EPOCHS # SAM, CosineAnnealingWarmRestarts
MAX_NORM = 1000
T_MAX = 5
ITERS_TO_ACCUMULATE = 1

BASE_OPTIMIZER = SGD #for SAM, Ranger
OPTIMIZER = 'Ranger' # Ranger, AdamW, AdamP, SGD, SAM

SCHEDULER = 'CosineAnnealingLR' # ReduceLROnPlateau, CosineAnnealingLR, CosineAnnealingWarmRestarts, OneCycleLR
SCHEDULER_UPDATE = 'epoch' # batch

CRITERION = 'BCE' # CrossEntropyLoss, TaylorSmoothedLoss, LabelSmoothedLoss
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
class AverageMeter(object):
    def __init__(self):
        self.reset()
        
    def reset(self):
        self.val = 0
        self.sum = 0
        self.avg = 0
        self.count = 0
        
    def update(self, val, n=1):
        self.val = val
        self.sum += val*n
        self.count += n
        self.avg = self.sum / self.count

def seed_torch(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_torch(SEED)

def macro_multilabel_auc(label, pred):
    aucs = []
    for i in range(len(target_cols)):
        aucs.append(roc_auc_score(label[:, i], pred[:, i]))
    print(np.round(aucs, 4))
    return np.mean(aucs)

In [7]:
TRAIN_DIR = '../input/ranzcr-clip-catheter-line-classification/train/'
train_df = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/train.csv')
folds = pd.read_csv('../input/ranzcr-folds/train_folds.csv')
train_annotations = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/train_annotations.csv')

In [8]:
COLOR_MAP = {'ETT - Abnormal': (255, 0, 0),
             'ETT - Borderline': (0, 255, 0),
             'ETT - Normal': (0, 0, 255),
             'NGT - Abnormal': (255, 255, 0),
             'NGT - Borderline': (255, 0, 255),
             'NGT - Incompletely Imaged': (0, 255, 255),
             'NGT - Normal': (128, 0, 0),
             'CVC - Abnormal': (0, 128, 0),
             'CVC - Borderline': (0, 0, 128),
             'CVC - Normal': (128, 128, 0),
             'Swan Ganz Catheter Present': (128, 0, 128),
            }


class RanzcrDataset(Dataset):
    def __init__(self, df, df_annotations, use_annot=False, annot_size=50, transform=None):
        self.df = df
        self.df_annotations = df_annotations
        self.use_annot = use_annot
        self.annot_size = annot_size
        self.image_id = df['StudyInstanceUID'].values
        self.labels = df[target_cols].values
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image_id = self.image_id[idx]
        file_path = f'{TRAIN_DIR}{image_id}.jpg'
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        labels = torch.tensor(self.labels[idx]).float()
        if self.use_annot:
            image_annot = image.copy()
            query_string = f"StudyInstanceUID == '{image_id}'"
            df = self.df_annotations.query(query_string)
            for i, row in df.iterrows():
                label = row["label"]
                data = np.array(ast.literal_eval(row["data"]))
                for d in data:
                    image_annot[d[1]-self.annot_size//2:d[1]+self.annot_size//2,
                                d[0]-self.annot_size//2:d[0]+self.annot_size//2,
                                :] = COLOR_MAP[label]
            if self.transform:
                augmented = self.transform(image=image, image_annot=image_annot)
                image = augmented['image']
                image_annot = augmented['image_annot']
            return image, image_annot, labels
        else:
            if self.transform:
                augmented = self.transform(image=image)
                image = augmented['image']
            return image, labels

In [9]:
def get_transform(*, train=True):
    
    if train:
        return A.Compose([
            A.RandomResizedCrop(IMG_SIZE, IMG_SIZE, scale=(0.85, 1.0)),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(p=0.2, brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2)),
            A.HueSaturationValue(p=0.2, hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2),
            A.ShiftScaleRotate(p=0.2, shift_limit=0.0625, scale_limit=0.2, rotate_limit=20),
            A.CoarseDropout(p=0.2),
            A.Cutout(p=0.2, max_h_size=16, max_w_size=16, fill_value=(0., 0., 0.), num_holes=16),
            A.Normalize(mean=MEAN, std=STD),
            ToTensorV2(),
        ], additional_targets={'image_annot': 'image'})
    else:
        return A.Compose([
#             A.CenterCrop(IMG_SIZE, IMG_SIZE),
            A.Resize(IMG_SIZE, IMG_SIZE),
            A.Normalize(mean=MEAN, std=STD, max_pixel_value=255.0, p=1.0),
            ToTensorV2(),
        ])

## Model

In [10]:
class SeResnet152D(nn.Module): 
    def __init__(self, model_arch, n_classes, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_arch, pretrained=pretrained)
        n_features = self.model.fc.in_features
        self.model.global_pool = nn.Identity()
        self.model.fc = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(n_features, n_classes)

    def forward(self, x):
        bs = x.size(0)
        features = self.model(x)
        pooled_features = self.pooling(features).view(bs, -1)
        output = self.fc(pooled_features)
        return output

class CustomResNet200D(nn.Module):
    def __init__(self, model_arch, n_classes, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_arch, pretrained=False)
        n_features = self.model.fc.in_features
        self.model.fc = nn.Linear(n_features, n_classes)
        if pretrained:
            pretrained_path = '../input/startingpointschestx/resnet200d_320_chestx.pth'
            checkpoint = torch.load(pretrained_path)['model']
            for key in list(checkpoint.keys()):
                if 'model.' in key:
                    checkpoint[key.replace('model.', '')] = checkpoint[key]
                    del checkpoint[key]
            self.model.load_state_dict(checkpoint) 
            print(f'load {model_arch} pretrained model')
        n_features = self.model.fc.in_features
        self.model.global_pool = nn.Identity()
        self.model.fc = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(n_features, n_classes)

    def forward(self, x):
        bs = x.size(0)
        features = self.model(x)
        pooled_features = self.pooling(features).view(bs, -1)
        output = self.fc(pooled_features)
        return features, pooled_features, output

In [11]:
class CustomLoss(nn.Module):
    def __init__(self, weights=[1, 1]):
        super(CustomLoss, self).__init__()
        self.weights = weights
        
    def forward(self, teacher_features, features, y_pred, labels):
        consistency_loss = nn.MSELoss()(teacher_features.view(-1), features.view(-1))
        cls_loss = nn.BCEWithLogitsLoss()(y_pred, labels)
        loss = self.weights[0] * consistency_loss + self.weights[1] * cls_loss
        return loss

[Back to CFG(Click here)](#cont)

In [12]:
def GetCriterion(criterion_name, criterion=None):

    if criterion_name == 'CrossEntropyLoss':
        criterion = nn.CrossEntropyLoss()
    elif criterion_name == 'LabelSmoothingLoss':
        criterion = LabelSmoothingLoss()
#     elif criterion_name == 'FocalLoss':
#         criterion = FocalLoss()
#     elif criterion_name == 'FocalCosineLoss':
#         criterion = FocalCosineLoss()
    elif criterion_name == 'TaylorCrossEntropyLoss':
        criterion = TaylorCrossEntropyLoss()
    elif criterion_name == 'TaylorSmoothedLoss':
        criterion = TaylorSmoothedLoss()
    elif criterion_name == 'CutMix':
        criterion = CutMixCriterion(criterion)
    elif criterion_name == 'SnapMix':
        criterion = SnapMixLoss()
    elif criterion_name == 'CustomLoss':
        criterion = CustomLoss(WEIGHTS)
    elif criterion_name == 'BCE':
        criterion = nn.BCEWithLogitsLoss()
    return criterion
    
    
def GetScheduler(scheduler_name, optimizer, batches=None):
    #['ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts', 'OneCycleLR', 'GradualWarmupSchedulerV2']
    if scheduler_name == 'OneCycleLR':
        return torch.optim.lr_scheduler.OneCycleLR(optimizer,max_lr = 1e-2,epochs = EPOCHS,
                                                   steps_per_epoch = batches+1,pct_start = 0.1)
    if scheduler_name == 'CosineAnnealingWarmRestarts':
        return torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0 = T_0, T_mult=1,
                                                                    eta_min=MIN_LR, last_epoch=-1)
    elif scheduler_name == 'CosineAnnealingLR':
        return torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=T_MAX, eta_min=0, last_epoch=-1)
    elif scheduler_name == 'ReduceLROnPlateau':
        return torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=1, threshold=0.0001,
                                                          cooldown=0, min_lr=MIN_LR)
#     elif scheduler_name == 'GradualWarmupSchedulerV2':
#         return GradualWarmupSchedulerV2(optimizer=optimizer)
    
def GetOptimizer(optimizer_name,parameters):
    #['Adam','Ranger']
    if optimizer_name == 'Adam':
#         if CFG.scheduler_name == 'GradualWarmupSchedulerV2':
#             return torch.optim.Adam(parameters, lr=CFG.LR_START, weight_decay=CFG.weight_decay, amsgrad=False)
#         else:
        return torch.optim.Adam(parameters, lr=LR, weight_decay=WEIGHT_DECAY, amsgrad=False)
    elif optimizer_name == 'AdamW':
#         if CFG.scheduler_name == 'GradualWarmupSchedulerV2':
#             return torch.optim.AdamW(parameters, lr=CFG.LR_START, weight_decay=CFG.weight_decay, amsgrad=False)
#         else:
        return torch.optim.Adam(parameters, lr=LR, weight_decay=WEIGHT_DECAY, amsgrad=False)
    elif optimizer_name == 'AdamP':
#         if CFG.scheduler_name == 'GradualWarmupSchedulerV2':
#             return AdamP(parameters, lr=CFG.LR_START, weight_decay=CFG.weight_decay)
#         else:
        return AdamP(parameters, lr=LR, weight_decay=WEIGHT_DECAY)
    elif optimizer_name == 'Ranger':
        return Ranger(parameters, lr = LR, alpha = 0.5, k = 6, N_sma_threshhold = 5, 
                      betas = (0.95,0.999), weight_decay=WEIGHT_DECAY)
    elif optimizer_name == 'SAM':
        return SAM(parameters, BASE_OPTIMIZER, lr=0.1, momentum=0.9,weight_decay=0.0005)
    
    elif optimizer_name == 'AdamP':
        return AdamP(parameters, lr=LR, weight_decay=WEIGHT_DECAY)

# Train and validation functions

In [13]:
def train_fn(model, dataloader, teacher_model, device, epoch, optimizer, criterion, scheduler):
    
    data_time = AverageMeter()
    batch_time = AverageMeter()
    losses = AverageMeter()
    accuracies = AverageMeter()
    
    model.train()
    # https://pytorch.org/docs/stable/notes/amp_examples.html#gradient-accumulation
    scaler = GradScaler()
    start_time = time.time()
    loader = tqdm(dataloader, total=len(dataloader))
    for step, (images, images_annot, labels) in enumerate(loader):
        
        with torch.no_grad():
            teacher_features, _, _ = teacher_model(images_annot.to(device))

        images = images.to(device).float()
        labels = labels.to(device)
        data_time.update(time.time() - start_time)

        with autocast():
            features, _, output = model(images)
            loss = criterion(teacher_features, features, output, labels)
#             output = model(images)
#             loss = criterion(output, labels)
            losses.update(loss.item(), BATCH_SIZE)
        
            scaler.scale(loss).backward()
            grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm = MAX_NORM)

            if (step+1) % ITERS_TO_ACCUMULATE == 0:
                scaler.step(optimizer)
                # Update the scale for next iteration.
                scaler.update()
                optimizer.zero_grad()
        
        if scheduler is not None and SCHEDULER_UPDATE == 'batch':
            scheduler.step()

        batch_time.update(time.time() - start_time)
        start_time = time.time()
        
        if step % ITER_FREQ == 0:
            
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Batch Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s),\t'
                  'Data Time {data_time.val:.3f}s ({data_time.avg:.3f}s)\t'
                  'Loss: {loss.val:.4f} ({loss.avg:.4f})'.format((epoch+1),
                                                                  step, len(dataloader),
                                                                  batch_time=batch_time,
                                                                  data_time=data_time,
                                                                  loss=losses))
                                                                             #accuracy=accuracies))
        # To check the loss real-time while iterating over data.   'Accuracy {accuracy.val:.4f} ({accuracy.avg:.4f})'
        loader.set_description(f'Training Epoch {epoch+1}/{EPOCHS}')
        loader.set_postfix(loss=losses.avg) #accuracy=accuracies.avg)
#         del images, labels
    if scheduler is not None and SCHEDULER_UPDATE == 'epoch':
        scheduler.step()
        
    return losses.avg#, accuracies.avg

In [14]:
def valid_fn(epoch, model, criterion, val_loader, device, scheduler):
    
    model.eval()
    losses = AverageMeter()
    accuracies = AverageMeter()
    PREDS = []
    TARGETS = []
    loader = tqdm(val_loader, total=len(val_loader))
    with torch.no_grad():  # without torch.no_grad() will make the CUDA run OOM.
        for step, (images, labels) in enumerate(loader):

            images = images.to(device)
            labels = labels.to(device)

#             output = model(images)
            _, _, output = model(images)
            loss = criterion(output, labels)
            losses.update(loss.item(), BATCH_SIZE)
            PREDS += [output.sigmoid()]
            TARGETS += [labels.detach().cpu()]
#             accuracy = (F.softmax(output).argmax(dim=1) == labels).float().mean()
#             accuracies.update(accuracy.item(), VAL_BATCH_SIZE)
            loader.set_description(f'Validating Epoch {epoch+1}/{EPOCHS}')
            loader.set_postfix(loss=losses.avg)#, accuracy=accuracies.avg)
#             del images, labels
    PREDS = torch.cat(PREDS).cpu().numpy()
    TARGETS = torch.cat(TARGETS).cpu().numpy()
    roc_auc = macro_multilabel_auc(TARGETS, PREDS)
    if scheduler is not None:
        scheduler.step()
        
    return losses.avg, roc_auc# accuracies.avg

[Back to CFG(Click here)](#cont)

# Main

In [15]:
def engine(device, folds, fold, model_path=None):
    
    trn_idx = folds[folds['kfold'] != fold].index
    val_idx = folds[folds['kfold'] == fold].index

    train_folds = folds.loc[trn_idx].reset_index(drop=True)
    valid_folds = folds.loc[val_idx].reset_index(drop=True)
    train_folds = train_folds[train_folds['StudyInstanceUID'].isin(train_annotations['StudyInstanceUID'].unique())].reset_index(drop=True)

    train_data = RanzcrDataset(train_folds, train_annotations, use_annot=True, transform=get_transform())
    val_data = RanzcrDataset(valid_folds, train_annotations, use_annot=False, transform=get_transform(train=False))            
    
    train_loader = DataLoader(train_data,
                              batch_size=BATCH_SIZE, 
                              shuffle=True, 
                              num_workers=NUM_WORKERS,
                              pin_memory=True, # enables faster data transfer to CUDA-enabled GPUs.
                              drop_last=True)
    val_loader = DataLoader(val_data, 
                            batch_size=VAL_BATCH_SIZE,
                            num_workers=NUM_WORKERS,
                            shuffle=False, 
                            pin_memory=True,
                            drop_last=False)
    
    teacher_model = CustomResNet200D(MODEL_ARCH, 11, pretrained=False)
    teacher_model.load_state_dict(torch.load(TEACHER_MODEL_PATH))
    for param in teacher_model.parameters():
        param.requires_grad = False
    teacher_model.eval()
    teacher_model.to(device)
    model = CustomResNet200D(MODEL_ARCH, 11, pretrained=True)
    if model_path is not None:
        model.load_state_dict(torch.load(model_path))
        START_EPOCH = int((model_path.split('_')[-1]).split('.')[0])
    else:
        model = CustomResNet200D(MODEL_ARCH, 11, pretrained=True)
        START_EPOCH = 0
    model.to(device)
    
    params = filter(lambda p: p.requires_grad, model.parameters())    
    optimizer = GetOptimizer(OPTIMIZER, params)
    
    train_criterion = CustomLoss(weights=WEIGHTS).to(device)        
    val_criterion = GetCriterion(CRITERION).to(device)
    
    scheduler = GetScheduler(SCHEDULER, optimizer, BATCH_SIZE)
    
    loss = []
    accuracy = []
    for epoch in range(START_EPOCH, EPOCHS):
        
        epoch_start = time.time()        
        avg_loss = train_fn(model, train_loader, teacher_model, device, epoch, optimizer, train_criterion, scheduler)

        torch.cuda.empty_cache()
        avg_val_loss, roc_auc_score = valid_fn(epoch, model, val_criterion, val_loader, device, scheduler)
        epoch_end = time.time() - epoch_start
        
        print(f'Validation ROC AUC Score after epoch {epoch+1}: {roc_auc_score:.4f}')
        loss.append(avg_loss)
#         accuracy.append(avg_accuracy)
        
        content = f'Fold {fold} Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} roc_auc_score: {roc_auc_score:.4f} time: {epoch_end:.0f}s'
        with open(f'Stage2_{MODEL_ARCH}_{OPTIMIZER}_{CRITERION}.txt', 'a') as appender:
            appender.write(content + '\n')                                         # avg_train_accuracy: {avg_accuracy:.4f}
        

        torch.save(model.state_dict(), f'Stage2_{MODEL_ARCH}_fold_{fold}_epoch_{(epoch+1)}.pth')
#         torch.save(model, f'GPU_{MODEL_ARCH}_fold_{fold}_epoch_{(epoch+1)}')
        torch.cuda.empty_cache()
    
    return loss#{'loss':loss, 'accuracy':accuracy}

In [16]:
if __name__ == '__main__':
    
    if MODEL_PATH is not None:
        START_FOLD = int(MODEL_PATH.split('_')[-3])
    
    for fold in range(START_FOLD, N_FOLDS):
        if fold == 1:
            break
        print(f'===== Fold {fold} Starting =====')
        fold_start = time.time()
        logs = engine(DEVICE, folds, fold, MODEL_PATH)
        print(f'Time taken in fold {fold}: {time.time()-fold_start}')

===== Fold 0 Starting =====
load resnet200d_320 pretrained model
Ranger optimizer loaded. 
Gradient Centralization usage = True
GC applied to both conv and fc layers


  0%|          | 0/900 [00:00<?, ?it/s]

Epoch: [9][0/900]	Batch Time 13.657s (13.657s),	Data Time 10.906s (10.906s)	Loss: 1.8372 (1.8372)
Epoch: [9][400/900]	Batch Time 1.748s (1.749s),	Data Time 0.385s (0.412s)	Loss: 1.8969 (2.5320)
Epoch: [9][800/900]	Batch Time 1.686s (1.734s),	Data Time 0.384s (0.399s)	Loss: 2.0083 (2.4886)


  0%|          | 0/377 [00:00<?, ?it/s]

[0.9913 0.943  0.9851 0.9309 0.9373 0.98   0.9843 0.9016 0.8317 0.8939
 0.991 ]
Validation ROC AUC Score after epoch 9: 0.9427


  0%|          | 0/900 [00:00<?, ?it/s]

Epoch: [10][0/900]	Batch Time 11.887s (11.887s),	Data Time 7.220s (7.220s)	Loss: 3.3960 (3.3960)
Epoch: [10][400/900]	Batch Time 1.688s (1.744s),	Data Time 0.385s (0.404s)	Loss: 1.6818 (2.3861)
Epoch: [10][800/900]	Batch Time 1.688s (1.730s),	Data Time 0.385s (0.395s)	Loss: 3.7827 (2.4237)


  0%|          | 0/377 [00:00<?, ?it/s]

[0.9843 0.9519 0.9811 0.9161 0.937  0.9828 0.9839 0.9089 0.8284 0.8924
 0.9968]
Validation ROC AUC Score after epoch 10: 0.9421


  0%|          | 0/900 [00:00<?, ?it/s]

Epoch: [11][0/900]	Batch Time 12.587s (12.587s),	Data Time 8.366s (8.366s)	Loss: 1.5569 (1.5569)
Epoch: [11][400/900]	Batch Time 1.713s (1.748s),	Data Time 0.385s (0.407s)	Loss: 2.6164 (2.4866)
Epoch: [11][800/900]	Batch Time 1.718s (1.735s),	Data Time 0.384s (0.397s)	Loss: 8.1711 (2.4093)


  0%|          | 0/377 [00:00<?, ?it/s]

[0.982  0.9495 0.9746 0.9161 0.9269 0.9821 0.9808 0.91   0.8329 0.8901
 0.9967]
Validation ROC AUC Score after epoch 11: 0.9401


  0%|          | 0/900 [00:00<?, ?it/s]

Epoch: [12][0/900]	Batch Time 10.991s (10.991s),	Data Time 7.064s (7.064s)	Loss: 1.4934 (1.4934)
Epoch: [12][400/900]	Batch Time 1.833s (1.743s),	Data Time 0.384s (0.404s)	Loss: 5.3226 (2.4103)
Epoch: [12][800/900]	Batch Time 1.806s (1.731s),	Data Time 0.384s (0.395s)	Loss: 0.7345 (2.4364)


  0%|          | 0/377 [00:00<?, ?it/s]

[0.9689 0.9411 0.9845 0.9056 0.9259 0.98   0.9826 0.9103 0.8349 0.8995
 0.9991]
Validation ROC AUC Score after epoch 12: 0.9393


  0%|          | 0/900 [00:00<?, ?it/s]

Epoch: [13][0/900]	Batch Time 9.777s (9.777s),	Data Time 6.739s (6.739s)	Loss: 4.7465 (4.7465)
Epoch: [13][400/900]	Batch Time 1.819s (1.739s),	Data Time 0.384s (0.402s)	Loss: 0.9148 (2.4646)
Epoch: [13][800/900]	Batch Time 1.689s (1.729s),	Data Time 0.384s (0.394s)	Loss: 2.2120 (2.4352)


  0%|          | 0/377 [00:00<?, ?it/s]

[0.9827 0.9506 0.9862 0.8992 0.9287 0.9826 0.9853 0.9147 0.8326 0.8999
 0.6693]
Validation ROC AUC Score after epoch 13: 0.9120
Time taken in fold 0: 9770.604644298553
