In [None]:
!pip install pretrainedmodels
!pip install torchtoolbox

In [None]:
import time
time1 = time.time()
import os
import gc
gc.enable()
import glob
import datetime

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from torch.nn.parameter import Parameter
from torch.utils.data import BatchSampler, DataLoader, Dataset, SequentialSampler

import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from tqdm import tqdm
from pathlib import Path

import torchtoolbox
from torchtoolbox.tools import summary

import pretrainedmodels
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from catalyst.data.sampler import BalanceClassSampler

import sklearn
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GroupKFold, StratifiedKFold

import warnings
warnings.simplefilter("ignore")
warnings.filterwarnings("ignore",category=DeprecationWarning)

def seed_everything(seed=2020):
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    print("Seeding completed..")
seed_everything()
time2 = time.time()
print("Importing libraries done.! Total time taken {:.2f}sec.".format(time2-time1))

In [None]:
DATA_PATH = '../input/melanoma-merged-external-data-512x512-jpeg'
df_folds = pd.read_csv(f'{DATA_PATH}/folds.csv', low_memory=False)
TRAIN_PATH = f'{DATA_PATH}/512x512-dataset-melanoma/512x512-dataset-melanoma/'

In [None]:
def get_train_transforms():
    return A.Compose([
            A.RandomSizedCrop(min_max_height=(400, 400), height=512, width=512, p=0.5),
            A.RandomRotate90(p=0.5),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.Resize(height=512, width=512, p=1),
            A.Cutout(num_holes=8, max_h_size=64, max_w_size=64, fill_value=0, p=0.5),
            ToTensorV2(p=1.0),                  
        ], p=1.0)

def get_valid_transforms():
    return A.Compose([
            A.Resize(height=512, width=512, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.0)

In [None]:
def onehot(size, target):
    vec = torch.zeros(size, dtype=torch.float32)
    vec[target] = 1.
    return vec

class DatasetRetriever(Dataset):

    def __init__(self, image_ids, labels, transforms=None):
        super().__init__()
        self.image_ids = image_ids
        self.labels = labels
        self.transforms = transforms

    def __getitem__(self, idx: int):
        image_id = self.image_ids[idx]
        image = cv2.imread(f'{TRAIN_PATH}/{image_id}.jpg', cv2.IMREAD_COLOR)
        image = image.astype(np.float32) / 255.0

        label = self.labels[idx]

        if self.transforms:
            sample = {'image': image}
            sample = self.transforms(**sample)
            image = sample['image']

        target = onehot(2, label)
        return image, target

    def __len__(self) -> int:
        return self.image_ids.shape[0]

    def get_labels(self):
        return list(self.labels)

### Metrics

In [None]:
from sklearn import metrics

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [None]:
class RocAucMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.y_true = np.array([0,1])
        self.y_pred = np.array([0.5,0.5])
        self.score = 0

    def update(self, y_true, y_pred):
        y_true = y_true.cpu().numpy().argmax(axis=1).clip(min=0, max=1).astype(int)
        # y_pred = 1 - nn.functional.softmax(y_pred, dim=1).data.cpu().numpy()[:,0]
        y_pred = nn.functional.softmax(y_pred, dim=1).data.cpu().numpy()[:,1]
        self.y_true = np.hstack((self.y_true, y_true))
        self.y_pred = np.hstack((self.y_pred, y_pred))
        self.score = sklearn.metrics.roc_auc_score(self.y_true, self.y_pred)

    @property
    def avg(self):
        return self.score

In [None]:
class APScoreMeter(RocAucMeter):
    def __init__(self):
        super(APScoreMeter, self).__init__()

    def update(self, y_true, y_pred):
        y_true = y_true.cpu().numpy().argmax(axis=1).clip(min=0, max=1).astype(int)
        y_pred = nn.functional.softmax(y_pred, dim=1).data.cpu().numpy()[:,1]
        self.y_true = np.hstack((self.y_true, y_true))
        self.y_pred = np.hstack((self.y_pred, y_pred))
        self.score = sklearn.metrics.average_precision_score(self.y_true, self.y_pred)

### Loss and Label Smoothing

In [None]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, logits=False, reduce=True):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.logits = logits
        self.reduce = reduce
        
    def forward(self, inputs, targets):
        if self.logits:
            BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets)
        else:
            BCE_loss = F.binary_cross_entropy(inputs, targets)
        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss
        if self.reduce:
            return torch.mean(F_loss)
        else:
            return F_loss

In [None]:
class LabelSmoothing(nn.Module):
    def __init__(self, smoothing=0.1):
        super(LabelSmoothing, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        
    def forward(self, x, target):
        if self.training:
            x = x.float()
            target = target.float()
            logprobs = F.log_softmax(x, dim=-1)
            
            nll_loss = -logprobs * target
            nll_loss = nll_loss.sum(-1)
            smooth_loss = -logprobs.mean(dim=1)
            
            loss = self.confidence * nll_loss + self.smoothing * smooth_loss
            return loss.mean()
        else:
            return F.cross_entropy(x, target)

### Model

In [None]:
def gem(x, p=3, eps=1e-6):
    return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)

class GeM(nn.Module):
    'Gemeralized Mean Pooling'
    def __init__(self, p=3, eps=1e-6):
        super(GeM,self).__init__()
        self.p = Parameter(torch.ones(1)*p)
        self.eps = eps
    def forward(self, x):
        return gem(x, p=self.p, eps=self.eps)       
    def __repr__(self):
        return self.__class__.__name__ + '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + ', ' + 'eps=' + str(self.eps) + ')'
    
class Conv2d_ws(nn.Conv2d):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1,padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'):
        super(nn.Conv2d, self).__init__(in_channels, out_channels, kernel_size, stride,padding, dilation, bias=True ,padding_mode='zeros',
                                       groups=1, output_padding='zeros', transposed=False)
    def forward(self, x):
        weight = self.weight
        weight_mean = weight.mean(dim=1, keepdim=True).mean(dim=2,
                                  keepdim=True).mean(dim=3, keepdim=True)
        weight = weight - weight_mean
        std = weight.view(weight.size(0), -1).std(dim=1).view(-1, 1, 1, 1) + 1e-5
        weight = weight / std.expand_as(weight)
        return F.conv2d(x, weight, self.bias, self.stride,
                        self.padding, self.dilation, self.groups)

In [None]:
def convert_to_conv2d(model):
    'conv2d with weight standardization'
    for child_name, child in model.named_children():
        if child_name not in ['fc1','fc2']:
            if isinstance(child, nn.Conv2d):
                in_feat = child.in_channels
                out_feat = child.out_channels
                ker_size = child.kernel_size
                stride = child.stride
                padding = child.padding
                dilation = child.dilation
                groups = child.groups
                setattr(model, child_name, Conv2d_ws(in_channels=in_feat, out_channels=out_feat, kernel_size=ker_size, stride=stride,padding = padding, dilation=dilation, groups=groups))
            else:
                convert_to_conv2d(child)

In [None]:
class SEResNext50_32x4d(nn.Module):
    def __init__(self, weight_standardization=False, generalized_mean_pooling=False, pretrained=False, dropout_num=0, dropout_p=0.5, classes=2):
        '''
        params:
        weight_standardization: implement conv2d weight standardization
        generalized_mean_pooling: implement generalized mean pooling
        pretrained: pretrained weights
        drop_num: number of dropout layers for multi-sample dropout
        dropout_p: dropout probability for multi-sample dropout
        classes: number of target classes
        '''
        
        super(SEResNext50_32x4d, self).__init__()
        self.model = pretrainedmodels.__dict__['se_resnext50_32x4d'](pretrained=None)
        if pretrained:
            self.model.load_state_dict(torch.load(
                '../input/pretrained-model-weights-pytorch/se_resnext50_32x4d-a260b3a4.pth'
            ))
        if weight_standardization:
            convert_to_conv2d(self.model)
        if generalized_mean_pooling:
            self.model.avg_pool = GeM()
        else:
            self.model.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.model.last_linear = nn.Linear(in_features=self.model.last_linear.in_features, out_features=classes)
        self.fc = nn.Linear(in_features=2048, out_features=classes)
        self.dropouts = nn.ModuleList([nn.Dropout(dropout_p) for _ in range(dropout_num)])
        
    def forward(self, inputs):
        features = self.model(inputs)
        if len(self.dropouts) == 0:
            return features
            # outputs = self.fc(features)
            # return outputs
        else:
            for i, dropout in enumerate(self.dropouts):
                if i==0:
                    outputs = dropout(features)
                    outputs = outputs.view(outputs.size(0),-1)
                    outputs = self.fc(outputs)
                else:
                    temp_out = dropout(features)
                    temp_out = temp_out.view(temp_out.size(0),-1)
                    outputs = outputs + self.fc(temp_out)
            return outputs

### Fitter / Trainer

In [None]:
def is_lastLinear(name):
    return "last_linear" in name

class Fitter(object):
    def __init__(self, model, device, config, folder):
        self.config = config
        self.epoch = 0
        
        self.base_dir = f'./{folder}'
        if not os.path.exists(self.base_dir):
            os.makedirs(self.base_dir)
        self.log_path = f'{self.base_dir}/log.txt'
        
        self.best_score = 0
        self.best_loss = 10**5
        self.best_ap = 0
        
        self.model = model
        self.device = device
        self.model.to(self.device)
        self.model = nn.DataParallel(self.model)
        
        # differential learning rate and weight decay
        param_optimizer = list(self.model.named_parameters())
        no_decay_layers = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {
                "params": [param for name, param in param_optimizer if not is_lastLinear(name) and \
                           not any(layer in name for layer in no_decay_layers)], 
                "lr": self.config.learning_rate,
                'weight_decay': 0.001
            },
            {
                "params": [param for name, param in param_optimizer if not is_lastLinear(name) and \
                           any(layer in name for layer in no_decay_layers)], 
                "lr": self.config.learning_rate,
                'weight_decay': 0.0
            },
            {
                "params": [param for name, param in param_optimizer if is_lastLinear(name)],
                "lr": self.config.learning_rate * 10,
            },
        ]
        self.optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=self.config.learning_rate, weight_decay=0.0)
        self.scheduler = self.config.Scheduler(self.optimizer, **self.config.scheduler_params)
        
        #self.criterion = FocalLoss(logits=True).to(self.device)
        self.criterion = LabelSmoothing().to(self.device)
        
        if self.config.verbose:
            self.log(f'Fitter initialized. Device is {self.device}.')
    
    def fit(self, train_loader, validation_loader):
        
        for epoch in range(self.config.n_epochs):
            if self.config.verbose:
                learning_rate_1 = self.optimizer.param_groups[0]['lr']
                learning_rate_2 = self.optimizer.param_groups[-1]['lr']
                timestamp = datetime.datetime.utcnow().isoformat()
                self.log(f"\n{timestamp}\nLR: {learning_rate_1}, {learning_rate_2}")
            
            time1 = time.time()
            summary_loss, roc_auc_scores, ap_scores = self.train_one_epoch(train_loader)
            if self.config.verbose:
                self.log(f"[TRAIN RESULT]: Epoch: {self.epoch}, Summary Loss: {summary_loss.avg:.3f}, Roc Auc: {roc_auc_scores.avg:.3f}, Average Precision: {ap_scores.avg:.3f}, Time: {(time.time()-time1):.2f}secs.")
            
            if self.config.train_scheduler:
                self.scheduler.step(metrics=summary_loss.avg)
            
            time2 = time.time()
            summary_loss, roc_auc_scores, ap_scores = self.validation(validation_loader)
            if self.config.verbose:
                self.log(f"[VALIDATION RESULT]: Epoch: {self.epoch}, Summary Loss: {summary_loss.avg:.3f}, Roc Auc: {roc_auc_scores.avg:.3f}, Average Precision: {ap_scores.avg:.3f}, Time: {(time.time()-time2):.2f}secs.")
            if self.config.validation_scheduler:
                self.scheduler.step(metrics=summary_loss.avg)
            
            if summary_loss.avg < self.best_loss:
                self.best_loss = summary_loss.avg
                self.save_model(f'{self.base_dir}/best_loss_checkpoint-{str(self.epoch).zfill(3)}epoch.bin')
                for path in sorted(glob.glob(f'{self.base_dir}/best-loss-checkpoint-*epoch.bin'))[:-2]:
                    os.remove(path)
            if roc_auc_scores.avg > self.best_score:
                self.best_score = roc_auc_scores.avg
                self.save_model(f'{self.base_dir}/best-score-checkpoint-{str(self.epoch).zfill(3)}epoch.bin')
                for path in sorted(glob.glob(f'{self.base_dir}/best-score-checkpoint-*epoch.bin'))[:-2]:
                    os.remove(path)
            if ap_scores.avg > self.best_ap:
                self.best_ap = ap_scores.avg
                self.save_model(f'{self.base_dir}/best-ap-checkpoint-{str(self.epoch).zfill(3)}epoch.bin')
                for path in sorted(glob.glob(f'{self.base_dir}/best-ap-checkpoint-*epoch.bin'))[:-2]:
                    os.remove(path)
            
            
            self.epoch +=1
    
    def train_one_epoch(self, train_loader):
        self.model.train()
        time1 = time.time()
        
        summary_loss = AverageMeter()
        roc_auc_scores = RocAucMeter()
        ap_scores = APScoreMeter()
        
        for step, (images, targets) in enumerate(train_loader):
            if self.config.verbose:
                if step % self.config.verbose_step == 0:
                    print(
                        f'Train Step {step}/{len(train_loader)}, ' + \
                        f'Summary Loss: {summary_loss.avg:.3f}, ROC AUC: {roc_auc_scores.avg:.3f}, AP: {ap_scores.avg:.3f} ' + \
                        f'Time: {(time.time() - time1):.2f}', end='\r'
                    )
            
            images = torch.tensor(images, device=self.device, dtype=torch.float32)
            targets = torch.tensor(targets, device=self.device, dtype=torch.float32)
            batch_size = images.shape[0]
            
            self.optimizer.zero_grad()
            outputs = self.model(images)
            loss = self.criterion(outputs, targets)
            loss.backward()
            self.optimizer.step()
            
            summary_loss.update(loss.detach().item(), batch_size)
            roc_auc_scores.update(targets, outputs)
            ap_scores.update(targets, outputs)
            
        return summary_loss, roc_auc_scores, ap_scores
    
    def validation(self, val_loader):
        self.model.eval()
        time1 = time.time()
        
        summary_loss = AverageMeter()
        roc_auc_scores = RocAucMeter()
        ap_scores = APScoreMeter()
        
        for step, (images, targets) in enumerate(val_loader):
            if self.config.verbose:
                if step % self.config.verbose_step == 0:
                    print(
                        f'Val Step {step}/{len(val_loader)}, ' + \
                        f'summary Loss: {summary_loss.avg:.3f}, ROC AUC: {roc_auc_scores.avg:.3f}, AP: {ap_scores.avg:.3f} ' + \
                        f'Time: {(time.time() - time1):.2f}', end='\r'
                    )
            with torch.no_grad():
                images = torch.tensor(images, device=self.device, dtype=torch.float32)
                targets = torch.tensor(targets, device=self.device, dtype=torch.float32)
                batch_size = images.shape[0]
                
                outputs = self.model(images)
                loss = self.criterion(outputs, targets)
                
                roc_auc_scores.update(targets, outputs)
                ap_scores.update(targets, outputs)
                summary_loss.update(loss.detach().item(), batch_size)
        
        return summary_loss, roc_auc_scores, ap_scores
    
    def save_model(self, path):
        self.model.eval()
        torch.save(self.model.state_dict(), path)
    
    def save(self, path):
        self.model.eval()
        torch.save({
            'model_state_dict': self.model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'scheduler_state_dict': self.scheduler.state_dict(),
            'best_score': self.best_score,
            'best_ap': self.best_ap,
            'best_loss': self.best_loss,
            'epoch': self.epoch,
        }, path)
    
    def load(self, path):
        checkpoint = torch.load(path)
        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        self.scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        self.best_score = checkpoint['best_score']
        self.best_ap = checkpoint['best_ap']
        self.best_loss = checkpoint['best_loss']
        self.epoch = checkpoint['epoch']
        
    def log(self, message):
        if self.config.verbose:
            print(message)
        with open(self.log_path, 'a+') as logger:
            logger.write(f'{message}\n')

In [None]:
class TrainGlobalConfig:
    num_workers = 2
    
    batch_size = 16
    n_epochs = 15
    learning_rate = 1e-4
    
    verbose = True
    verbose_step = 1
    
    train_scheduler = True
    validation_scheduler = True
    
    Scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau
    scheduler_params = dict(
        mode = 'min',
        factor = 0.8,
        patience = 1,
        verbose = False, 
        threshold = 0.0001,
        threshold_mode = 'abs',
        cooldown = 0,
        min_lr = 1e-8,
        eps = 1e-08
    )

In [None]:
model = SEResNext50_32x4d(pretrained=True, generalized_mean_pooling=False, weight_standardization=False, dropout_num=0, dropout_p=0.5)
fitter = Fitter(model=model, device = torch.device("cuda"), config=TrainGlobalConfig, folder='base_state')
BASE_STATE_PATH = f'{fitter.base_dir}/base_state.bin'
fitter.save(BASE_STATE_PATH)

In [None]:
def train_fold(fold_number):
    train_dataset = DatasetRetriever(image_ids=df_folds[df_folds['fold']!=fold_number].image_id.values,
                                   labels=df_folds[df_folds['fold']!=fold_number].target.values,
                                   transforms=get_train_transforms())
    
    df_val = df_folds[(df_folds['fold']==fold_number)&(df_folds['source']=='ISIC20')]
    validation_dataset = DatasetRetriever(image_ids=df_val.image_id.values,
                                   labels=df_val.target.values,
                                   transforms=get_valid_transforms())
    
    train_loader = DataLoader(train_dataset,
                              sampler= BalanceClassSampler(labels=train_dataset.get_labels(), 
                                                         mode='downsampling'),
                              batch_size= TrainGlobalConfig.batch_size,
                              pin_memory= False,
                              drop_last=True,
                              num_workers=TrainGlobalConfig.num_workers
                             )
    val_loader = DataLoader(validation_dataset,
                            sampler= SequentialSampler(validation_dataset),
                            batch_size= TrainGlobalConfig.batch_size,
                            pin_memory= False,
                            shuffle=False,
                            drop_last=False,
                            num_workers=TrainGlobalConfig.num_workers
                           )
    
    fitter = Fitter(model=model, device=torch.device("cuda"), config=TrainGlobalConfig, folder=f'fold{fold_number}')
    fitter.load(BASE_STATE_PATH)
    fitter.fit(train_loader=train_loader, validation_loader=val_loader)

In [None]:
for fold_number in range(5):
    train_fold(fold_number=fold_number)