In [None]:
!pip install -U git+https://github.com/albumentations-team/albumentations > /dev/null
!pip install timm > /dev/null
!pip install pytorch_toolbelt > /dev/null
!pip install tensorboardX > /dev/null
!pip install catalyst==20.4.2 > /dev/null

!pip install torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html > /dev/null

In [None]:
IMAGE_SIZE = "384x384"

import pandas as pd

train_csv = pd.read_csv(f"../input/melanomaprocesseddf/train_c30_v1.csv")
train_2019_csv = pd.read_csv(f"../input/melanomaprocesseddf/train2019_c30_v1.csv")
malignant_csv = pd.read_csv(f"../input/melanomaprocesseddf/malignant_c30_v1.csv")
test_csv = pd.read_csv(f"../input/melanomaprocesseddf/test_c30_v1.csv")

train_image_path = f"../input/jpeg-melanoma-{IMAGE_SIZE}/train"
test_image_path = f"../input/jpeg-melanoma-{IMAGE_SIZE}/test"
train_2019_path = f"../input/jpeg-isic2019-{IMAGE_SIZE}/train"
malignant_path = f"../input/malignant-v2-{IMAGE_SIZE}/jpeg384"

train_csv["image_path"] = train_csv["image_name"].apply(lambda x: f"{train_image_path}/{x}.jpg") 
test_csv["image_path"] = test_csv["image_name"].apply(lambda x: f"{test_image_path}/{x}.jpg")
train_2019_csv["image_path"] = train_2019_csv["image_name"].apply(lambda x: f"{train_2019_path}/{x}.jpg") 
malignant_csv["image_path"] = malignant_csv["image_name"].apply(lambda x: f"{malignant_path}/{x}.jpg")

train_csv.to_csv("train_csv.csv", index=False)
train_2019_csv.to_csv("train_2019_csv.csv", index=False)
malignant_csv.to_csv("malignat_csv.csv", index=False)
test_csv.to_csv("test_csv.csv", index=False)

In [None]:
%%writefile classifiers.py

import warnings
warnings.filterwarnings('ignore')

from functools import partial

import numpy as np
import torch
from timm.models import skresnext50_32x4d
from timm.models import dpn92, dpn131
from timm.models.dpn import dpn92, dpn131
from timm.models.efficientnet import tf_efficientnet_b4_ns, tf_efficientnet_b3_ns, \
    tf_efficientnet_b5_ns, tf_efficientnet_b2_ns, tf_efficientnet_b6_ns, tf_efficientnet_b7_ns, tf_efficientnet_b0_ns
#from timm.models.senet import seresnext50_32x4d
from timm.models.densenet import densenet201
from torch import nn
from torch.nn.modules.dropout import Dropout
from torch.nn.modules.linear import Linear
from torch.nn.modules.pooling import AdaptiveAvgPool2d, AdaptiveMaxPool2d

encoder_params = {
    "densenet201" : {
        "features": 1920,
        "init_op": partial(densenet201, pretrained=True)
    },
    "dpn92" : {
        "features": 2688,
        "init_op": partial(dpn92, pretrained=True)
    },
    "dpn131": {
        "features": 2688,
        "init_op": partial(dpn131, pretrained=True)
    },
    "tf_efficientnet_b0_ns": {
        "features": 1280,
        "init_op": partial(tf_efficientnet_b0_ns, pretrained=True, drop_path_rate=0.2)
    },
    "tf_efficientnet_b3_ns": {
        "features": 1536,
        "init_op": partial(tf_efficientnet_b3_ns, pretrained=True, drop_path_rate=0.2)
    },
    "tf_efficientnet_b2_ns": {
        "features": 1408,
        "init_op": partial(tf_efficientnet_b2_ns, pretrained=True, drop_path_rate=0.2)
    },
    "tf_efficientnet_b4_ns": {
        "features": 1792,
        "init_op": partial(tf_efficientnet_b4_ns, pretrained=True, drop_path_rate=0.5)
    },
    "tf_efficientnet_b5_ns": {
        "features": 2048,
        "init_op": partial(tf_efficientnet_b5_ns, pretrained=True, drop_path_rate=0.2)
    },
    "tf_efficientnet_b4_ns_03d": {
        "features": 1792,
        "init_op": partial(tf_efficientnet_b4_ns, pretrained=True, drop_path_rate=0.3)
    },
    "tf_efficientnet_b5_ns_03d": {
        "features": 2048,
        "init_op": partial(tf_efficientnet_b5_ns, pretrained=True, drop_path_rate=0.3)
    },
    "tf_efficientnet_b5_ns_04d": {
        "features": 2048,
        "init_op": partial(tf_efficientnet_b5_ns, pretrained=True, drop_path_rate=0.4)
    },
    "tf_efficientnet_b6_ns": {
        "features": 2304,
        "init_op": partial(tf_efficientnet_b6_ns, pretrained=True, drop_path_rate=0.2)
    },
    "tf_efficientnet_b7_ns": {
        "features": 2560,
        "init_op": partial(tf_efficientnet_b7_ns, pretrained=True, drop_path_rate=0.2)
    },
    "tf_efficientnet_b6_ns_04d": {
        "features": 2304,
        "init_op": partial(tf_efficientnet_b6_ns, pretrained=True, drop_path_rate=0.4)
    },
    #"se50": {
    #    "features": 2048,
    #    "init_op": partial(seresnext50_32x4d, pretrained=True)
    #},
    "sk50": {
        "features": 2048,
        "init_op": partial(skresnext50_32x4d, pretrained=True)
    },

}


class MelanomaClassifier(nn.Module):
    def __init__(self, encoder, dropout_rate=0.0) -> None:
        super().__init__()
        self.encoder = encoder_params[encoder]["init_op"]()
        self.avg_pool = AdaptiveAvgPool2d((1, 1))
        self.dropout = Dropout(dropout_rate)
        self.fc = Linear(encoder_params[encoder]["features"], 1)

    def forward(self, x):
        x = self.encoder.forward_features(x)
        x = self.avg_pool(x).flatten(1)
        x = self.dropout(x)
        x = self.fc(x)
        return x

class MelanomaClassifierMeta(nn.Module):
    def __init__(self, encoder, num_meta=30, dropout_rate=0.0) -> None:
        super().__init__()
        self.encoder = encoder_params[encoder]["init_op"]()
        self.avg_pool = AdaptiveAvgPool2d((1, 1))
        self.max_pool = AdaptiveMaxPool2d((1, 1))
        self.dropout = Dropout(dropout_rate)
        self.fc = Linear(num_meta + encoder_params[encoder]["features"], 1)

    def forward(self, x, m):
        x = self.encoder.forward_features(x)
        x = self.avg_pool(x).flatten(1)
        x = torch.cat((x, m), dim=1)
        x = self.dropout(x)
        x = self.fc(x)
        return x

class MelanomaClassifier2(nn.Module):
    def __init__(self, encoder, dropout_rate=0.0) -> None:
        super().__init__()
        self.encoder = encoder_params[encoder]["init_op"]()
        self.avg_pool = AdaptiveAvgPool2d((1, 1))
        self.max_pool = AdaptiveMaxPool2d((1, 1))
        self.dropout = Dropout(dropout_rate)
        self.fc = Linear(2 * encoder_params[encoder]["features"], 1)

    def forward(self, x):
        x = self.encoder.forward_features(x)
        x1 = self.avg_pool(x).flatten(1)
        x2 = self.max_pool(x).flatten(1)
        x = torch.cat((x1, x2), dim=1)
        x = self.dropout(x)
        x = self.fc(x)
        return x



In [None]:
%%writefile dataset.py

import warnings
warnings.filterwarnings('ignore')

import math
import os
import random
import sys
import traceback

import cv2
import numpy as np
import pandas as pd
import skimage.draw
from albumentations import ImageCompression, OneOf, GaussianBlur, Blur
from albumentations.augmentations.functional import image_compression, rot90
from albumentations.pytorch.functional import img_to_tensor
from scipy.ndimage import binary_erosion, binary_dilation
from skimage import measure
from torch.utils.data import Dataset

import torch
import torch.nn.functional as F
#import dlib


class MelanomaClassifierDataset(Dataset):
    def __init__(
        self,
        df,
        fold=0,
        label_smoothing=0.01,
        normalize={"mean": [0.485, 0.456, 0.406],
                    "std": [0.229, 0.224, 0.225]},
        mode="train",
        transforms=None,
        target_transforms=None,
        data_root=None
    ):
        super().__init__()
        self.df = df
        self.fold = fold
        self.mode = mode
        self.label_smoothing = label_smoothing
        self.normalize = normalize
        self.transforms = transforms
        self.target_transforms = target_transforms
        self.data_root = data_root

        self.image_name = self.df["image_path"].values
        self.label = self.df["target"].values
        self.kmeans = self.df["anatom_label"].values
    
    def __getitem__(self, index: int):

        image_name, label = self.image_name[index], self.label[index]
        if self.mode == "train":
            label = np.clip(label, self.label_smoothing, 1 - self.label_smoothing)
        #print(image_name)
        image = cv2.imread(f"{image_name}", cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transforms:
            image = self.transforms(image=image)["image"]
            
        if label > 0.5:
            if self.target_transforms:
                image = self.target_transforms(image=image)["image"]


        image = img_to_tensor(image, self.normalize)

        


        kmeans = F.one_hot(torch.tensor(self.kmeans[index]), 7)

        return {
            "image_name": image_name,
            "image": image,
            "label": label,
            "meta" : kmeans
        }
    
    def __len__(self):
        return len(self.image_name)
    
    def __get_labels__(self):
        return list(map(round, self.label.tolist()))


class MelanomaClassifierDatasetTest(Dataset):
    def __init__(
        self,
        df,
        normalize={"mean": [0.485, 0.456, 0.406],
                    "std": [0.229, 0.224, 0.225]},
        transforms=None,
        data_root=None
    ):
        super().__init__()
        self.df = df
        self.normalize = normalize
        self.transforms = transforms
        self.data_root = data_root

        self.image_name = self.df["image_path"]
        self.kmeans = self.df["anatom_label"].values
    
    def __getitem__(self, index: int):
        
        image = cv2.imread(f"{self.image_name[index]}", cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transforms:
            image = self.transforms(image=image)["image"]

        image = img_to_tensor(image, self.normalize)
        kmeans = F.one_hot(torch.tensor(self.kmeans[index]), 7)

        return {
            "image_name": self.image_name[index],
            "image": image,
            "meta" : kmeans
        }
    
    def __len__(self):
        return len(self.image_name)


In [None]:
%%writefile utils.py

import warnings
warnings.filterwarnings('ignore')

import cv2
import numpy as np
import sklearn

from timm.optim import AdamW
from torch import optim
from torch.optim import lr_scheduler
from torch.optim.rmsprop import RMSprop
from torch.optim.adamw import AdamW
from torch.optim.lr_scheduler import MultiStepLR, CyclicLR

from schedulers import ExponentialLRScheduler, PolyLR, LRStepScheduler

cv2.ocl.setUseOpenCL(False)
cv2.setNumThreads(0)

class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

class RocAucMeter(object):
    def __init__(self):
        self.reset()
    
    def reset(self):
        self.y_true = [0,1]
        self.y_pred = [0.5, 0.5]
        self.score = 0
    
    def update(self, y_true, y_pred):
        self.y_true.extend(y_true.cpu().detach().numpy().round().tolist())
        self.y_pred.extend(y_pred.cpu().detach().numpy().reshape(-1).tolist())
        y_pred1 = np.array(self.y_pred)
        y_true1 = np.array(self.y_true)
        y_pred1[np.isnan(y_pred1)]=0.5
        self.score = sklearn.metrics.roc_auc_score(y_true1, y_pred1)
        #self.acc = sklearn.metrics.accuracy_score(y_true1, y_pred1.round())
        #self.log_loss = sklearn.metrics.log_loss(y_true1, y_pred1)
    @property
    def avg(self):
        return self.score#, self.acc, self.log_loss

def create_optimizer(optimizer_config, model, master_params=None):
    """Creates optimizer and schedule from configuration

    Parameters
    ----------
    optimizer_config : dict
        Dictionary containing the configuration options for the optimizer.
    model : Model
        The network model.
    
    Returns
    -------
    optimizer : Optimizer
        The optimizer
    scheduler : LRScheduler
        The learning rate scheduler
    """
    if optimizer_config.get("classifier_lr", -1) != -1:
        # Separate classifier parameters from all others
        net_params = []
        classifier_params = []
        for k, v in model.named_parameters():
            if not v.requires_grad:
                continue
            if k.find("encoder") != -1:
                net_params.append(v)
            else:
                classifier_params.append(v)
        params = [
            {"params": net_params},
            {"params": classifier_params, "lr": optimizer_config["classifier_lr"]},
        ]
    else:
        if master_params:
            params = master_params
        else:
            params = model.parameters()
    
    if optimizer_config["type"] == "SGD":
        optimizer = optim.SGD(params,
                              lr=optimizer_config["learning_rate"],
                              momentum=optimizer_config["momentum"],
                              weight_decay=optimizer_config["weight_decay"],
                              nesterov=optimizer_config["nesterov"])
    elif optimizer_config["type"] == "FusedSGD":
        optimizer = FusedSGD(params,
                             lr=optimizer_config["learning_rate"],
                             momentum=optimizer_config["momentum"],
                             weight_decay=optimizer_config["weight_decay"],
                             nesterov=optimizer_config["nesterov"])
    elif optimizer_config["type"] == "Adam":
        optimizer = optim.Adam(params,
                               lr=optimizer_config["learning_rate"],
                               weight_decay=optimizer_config["weight_decay"])
    elif optimizer_config["type"] == "FusedAdam":
        optimizer = FusedAdam(params,
                              lr=optimizer_config["learning_rate"],
                              weight_decay=optimizer_config["weight_decay"])
    elif optimizer_config["type"] == "AdamW":
        optimizer = AdamW(params,
                               lr=optimizer_config["learning_rate"],
                               weight_decay=optimizer_config["weight_decay"])
    elif optimizer_config["type"] == "RmsProp":
        optimizer = RMSprop(params,
                               lr=optimizer_config["learning_rate"],
                               weight_decay=optimizer_config["weight_decay"])
    else:
        raise KeyError("unrecognized optimizer {}".format(optimizer_config["type"]))
    

    if optimizer_config["schedule"]["type"] == "step":
        scheduler = LRStepScheduler(optimizer, **optimizer_config["schedule"]["params"])
    elif optimizer_config["schedule"]["type"] == "clr":
        scheduler = CyclicLR(optimizer, **optimizer_config["schedule"]["params"])
    elif optimizer_config["schedule"]["type"] == "multistep":
        scheduler = MultiStepLR(optimizer, **optimizer_config["schedule"]["params"])
    elif optimizer_config["schedule"]["type"] == "exponential":
        scheduler = ExponentialLRScheduler(optimizer, **optimizer_config["schedule"]["params"])
    elif optimizer_config["schedule"]["type"] == "poly":
        scheduler = PolyLR(optimizer, **optimizer_config["schedule"]["params"])
    elif optimizer_config["schedule"]["type"] == "constant":
        scheduler = lr_scheduler.LambdaLR(optimizer, lambda epoch: 1.0)
    elif optimizer_config["schedule"]["type"] == "linear":
        def linear_lr(it):
            return it * optimizer_config["schedule"]["params"]["alpha"] + optimizer_config["schedule"]["params"]["beta"]

        scheduler = lr_scheduler.LambdaLR(optimizer, linear_lr)
    
    return optimizer, scheduler



In [None]:
%%writefile schedulers.py

import warnings
warnings.filterwarnings('ignore')

from bisect import bisect_right

from torch.optim.lr_scheduler import _LRScheduler

class LRStepScheduler(_LRScheduler):
    def __init__(self, optimizer, steps, last_epoch=-1):
        self.lr_steps = steps
        super().__init__(optimizer, last_epoch)
    
    def get_lr(self):
        pos = max(bisect_right([x for x, y in self.lr_steps], self.last_epoch) - 1, 0)
        return [self.lr_steps[pos][1] if self.lr_steps[pos][0] <= self.last_epoch else base_lr for base_lr in self.base_lrs]


class PolyLR(_LRScheduler):
    """Sets the learning rate of each parameter group according to poly learning rate policy
    """
    def __init__(self, optimizer, max_iter=90000, power=0.9, last_epoch=-1):
        self.max_iter = max_iter
        self.power = power
        super(PolyLR, self).__init__(optimizer, last_epoch)

    def get_lr(self):
        self.last_epoch = (self.last_epoch + 1) % self.max_iter
        return [base_lr * ((1 - float(self.last_epoch) / self.max_iter) ** (self.power)) for base_lr in self.base_lrs]



class ExponentialLRScheduler(_LRScheduler):
    """Decays the learning rate of each parameter group by gamma every epoch.
    When last_epoch=-1, sets initial lr as lr.
    Args:
        optimizer (Optimizer): Wrapped optimizer.
        gamma (float): Multiplicative factor of learning rate decay.
        last_epoch (int): The index of last epoch. Default: -1.
    """

    def __init__(self, optimizer, gamma, last_epoch=-1):
        self.gamma = gamma
        super(ExponentialLRScheduler, self).__init__(optimizer, last_epoch)

    def get_lr(self):
        if self.last_epoch <= 0:
            return self.base_lrs
        return [base_lr * self.gamma**self.last_epoch for base_lr in self.base_lrs]




In [None]:
%%writefile losses.py

import warnings
warnings.filterwarnings('ignore')

from typing import Any

from pytorch_toolbelt.losses import BinaryFocalLoss
from torch import nn
from torch.nn.modules.loss import BCEWithLogitsLoss

class WeightedLosses(nn.Module):
    def __init__(self, losses, weights):
        super().__init__()
        self.losses = losses
        self.weights = weights
    
    def forward(self, *input: Any, **kwargs: Any):
        cum_loss = 0
        for loss, w in zip(self.losses, self.weights):
            cum_loss += w * loss.forward(*input, **kwargs)
        return cum_loss

class BinaryCrossentropy(BCEWithLogitsLoss):
    pass


class FocalLoss(BinaryFocalLoss):
    def __init__(self, alpha=None, gamma=3, ignore_index=None, reduction="mean", normalized=False,
                 reduced_threshold=None):
        super().__init__(alpha, gamma, ignore_index, reduction, normalized, reduced_threshold)

In [None]:
%%writefile albu.py

import random

import cv2
import numpy as np
import albumentations as A
from albumentations import DualTransform, ImageOnlyTransform
from albumentations.augmentations.functional import crop
from albumentations.augmentations import functional as F
from PIL import Image, ImageOps, ImageEnhance

def train_transforms(size=300):
    return A.Compose([
        #HairRemove(p=0.33),
        #MaskLandmarks(p=0.5),
        #ShadeGrayCC(p=1),
        #A.ImageCompression(quality_lower=80, quality_upper=100, p=0.1),
        #A.GaussianBlur(blur_limit=3, p=0.05),
        #A.GaussNoise(p=0.05),
        #RandomEraser(),
        
        A.OneOf([
            A.VerticalFlip(),
            A.HorizontalFlip(),
            A.Flip(),
            A.Rotate()
        ], p=0.5),
        A.Transpose(p=0.2),
        A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=15),
        A.RandomRotate90(p=0.1),
        A.OneOf([
            A.RandomGridShuffle(p=0.1),
            A.Cutout(num_holes=8, max_h_size=size//8, max_w_size=size//8, fill_value=0, p=0.2),
            A.CoarseDropout(max_holes=4, max_height=size//8, max_width=size//8, p=0.2),
            A.GridDropout(p=0.2),
            RandomEraser(p=0.2),
            BitMask(size=size, p=0.1),
        ], p=0.11),
        A.RandomBrightness(limit=(-0.2,0.2), p=0.1),
        A.OneOf([
            IsotropicResize(max_side=size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC),
            IsotropicResize(max_side=size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_LINEAR),
            IsotropicResize(max_side=size, interpolation_down=cv2.INTER_LINEAR, interpolation_up=cv2.INTER_LINEAR),
        ], p=1),
        A.PadIfNeeded(min_height=size, min_width=size, border_mode=cv2.BORDER_CONSTANT),
        #RandomAugMix()
    ])

def target_transforms(size=300):
    return A.Compose([
        A.ImageCompression(quality_lower=80, quality_upper=100, p=0.1),
        A.GaussianBlur(blur_limit=3, p=0.05),
        A.GaussNoise(p=0.05),
        A.OneOf([
            A.RandomGridShuffle(p=0.1),
            A.Cutout(num_holes=8, max_h_size=size//8, max_w_size=size//8, fill_value=0, p=0.2),
            A.CoarseDropout(max_holes=4, max_height=size//8, max_width=size//8, p=0.2),
            A.GridDropout(p=0.2),
            RandomEraser(p=0.2),
            BitMask(size=size, p=0.1),
        ], p=0.33)
    ])
    
def valid_transforms(size=300):
    return A.Compose([
        #ShadeGrayCC(p=1),
        IsotropicResize(max_side=size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC),
        A.PadIfNeeded(min_height=size, min_width=size, border_mode=cv2.BORDER_CONSTANT),
    ])

def test_transforms(size=300):
    return A.Compose([
        #HairRemove(p=0.33),
        #MaskLandmarks(p=0.2),
        #ShadeGrayCC(p=0.5),
        #A.ImageCompression(quality_lower=80, quality_upper=100, p=0.1),
        #A.GaussianBlur(blur_limit=3, p=0.05),
        #A.GaussNoise(p=0.05),
        A.OneOf([
            A.VerticalFlip(),
            A.HorizontalFlip(),
            A.Flip(),
            A.Rotate()
        ], p=0.5),
        A.Transpose(p=0.2),
        A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=15),
        A.RandomRotate90(p=0.1),
        A.RandomBrightness(limit=(-0.2,0.2), p=0.1),
        A.OneOf([
            IsotropicResize(max_side=size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC),
            IsotropicResize(max_side=size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_LINEAR),
            IsotropicResize(max_side=size, interpolation_down=cv2.INTER_LINEAR, interpolation_up=cv2.INTER_LINEAR),
        ], p=1),
        A.PadIfNeeded(min_height=size, min_width=size, border_mode=cv2.BORDER_CONSTANT),
    ])
    

def isotropically_resize_image(img, size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC):
    h, w = img.shape[:2]
    if max(w, h) == size:
        return img
    if w > h:
        scale = size / w
        h = h * scale
        w = size
    else:
        scale = size / h
        w = w * scale
        h = size
    interpolation = interpolation_up if scale > 1 else interpolation_down
    resized = cv2.resize(img, (int(w), int(h)), interpolation=interpolation)
    return resized

class IsotropicResize(DualTransform):
    def __init__(self, max_side, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC,
                 always_apply=False, p=1):
        super(IsotropicResize, self).__init__(always_apply, p)
        self.max_side = max_side
        self.interpolation_down = interpolation_down
        self.interpolation_up = interpolation_up

    def apply(self, img, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC, **params):
        return isotropically_resize_image(img, size=self.max_side, interpolation_down=interpolation_down,
                                          interpolation_up=interpolation_up)

    def apply_to_mask(self, img, **params):
        return self.apply(img, interpolation_down=cv2.INTER_NEAREST, interpolation_up=cv2.INTER_NEAREST, **params)

    def get_transform_init_args_names(self):
        return ("max_side", "interpolation_down", "interpolation_up")



class Resize4xAndBack(ImageOnlyTransform):
    def __init__(self, always_apply=False, p=0.5):
        super(Resize4xAndBack, self).__init__(always_apply, p)

    def apply(self, img, **params):
        h, w = img.shape[:2]
        scale = random.choice([2, 4])
        img = cv2.resize(img, (w // scale, h // scale), interpolation=cv2.INTER_AREA)
        img = cv2.resize(img, (w, h),
                         interpolation=random.choice([cv2.INTER_CUBIC, cv2.INTER_LINEAR, cv2.INTER_NEAREST]))
        return img


class RandomSizedCropNonEmptyMaskIfExists(DualTransform):

    def __init__(self, min_max_height, w2h_ratio=[0.7, 1.3], always_apply=False, p=0.5):
        super(RandomSizedCropNonEmptyMaskIfExists, self).__init__(always_apply, p)

        self.min_max_height = min_max_height
        self.w2h_ratio = w2h_ratio

    def apply(self, img, x_min=0, x_max=0, y_min=0, y_max=0, **params):
        cropped = crop(img, x_min, y_min, x_max, y_max)
        return cropped

    @property
    def targets_as_params(self):
        return ["mask"]

    def get_params_dependent_on_targets(self, params):
        mask = params["mask"]
        mask_height, mask_width = mask.shape[:2]
        crop_height = int(mask_height * random.uniform(self.min_max_height[0], self.min_max_height[1]))
        w2h_ratio = random.uniform(*self.w2h_ratio)
        crop_width = min(int(crop_height * w2h_ratio), mask_width - 1)
        if mask.sum() == 0:
            x_min = random.randint(0, mask_width - crop_width + 1)
            y_min = random.randint(0, mask_height - crop_height + 1)
        else:
            mask = mask.sum(axis=-1) if mask.ndim == 3 else mask
            non_zero_yx = np.argwhere(mask)
            y, x = random.choice(non_zero_yx)
            x_min = x - random.randint(0, crop_width - 1)
            y_min = y - random.randint(0, crop_height - 1)
            x_min = np.clip(x_min, 0, mask_width - crop_width)
            y_min = np.clip(y_min, 0, mask_height - crop_height)

        x_max = x_min + crop_height
        y_max = y_min + crop_width
        y_max = min(mask_height, y_max)
        x_max = min(mask_width, x_max)
        return {"x_min": x_min, "x_max": x_max, "y_min": y_min, "y_max": y_max}

    def get_transform_init_args_names(self):
        return "min_max_height", "height", "width", "w2h_ratio"


def prepare_bit_masks(mask):
    h, w = mask.shape
    mid_w = w // 2
    mid_h = w // 2
    masks = []
    ones = np.ones_like(mask)
    ones[:mid_h] = 0
    masks.append(ones)
    ones = np.ones_like(mask)
    ones[mid_h:] = 0
    masks.append(ones)
    ones = np.ones_like(mask)
    ones[:, :mid_w] = 0
    masks.append(ones)
    ones = np.ones_like(mask)
    ones[:, mid_w:] = 0
    masks.append(ones)
    ones = np.ones_like(mask)
    ones[:mid_h, :mid_w] = 0
    ones[mid_h:, mid_w:] = 0
    masks.append(ones)
    ones = np.ones_like(mask)
    ones[:mid_h, mid_w:] = 0
    ones[mid_h:, :mid_w] = 0
    masks.append(ones)
    return masks

class BitMask(ImageOnlyTransform):
    def __init__(self, size=300 ,always_apply=False, p=0.5):
        super(BitMask, self).__init__(always_apply, p)
        mask = np.zeros((size,size), dtype=np.uint8)
        self.masks = prepare_bit_masks(mask)

    def apply(self, img, **params):
        
        bitmap_msk = random.choice(self.masks)
        #if np.count_nonzero(mask * bitmap_msk) > 20:
        #mask *= bitmap_msk
        try:
            img *= np.expand_dims(bitmap_msk, axis=-1)
        except:
            img = img
        
        return img

class HairRemove(ImageOnlyTransform):
    def __init__(self, always_apply=False, p=0.5):
        super(HairRemove, self).__init__(always_apply, p)

    def apply(self, img, **params):

        try:
        
            gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            
            th = np.random.randint(5,10)
            # Kernel for the morphological filtering
            kernel = cv2.getStructuringElement(1,(17,17))
            
            # Perform the blackHat filtering on the grayscale image to find the 
            # hair countours
            blackhat = cv2.morphologyEx(gry, cv2.MORPH_BLACKHAT, kernel)

            # intensify the hair countours in preparation for the inpainting 
            # algorithm
            ret,thresh2 = cv2.threshold(blackhat,th,255,cv2.THRESH_BINARY)
            img = cv2.inpaint(img, thresh2,1, cv2.INPAINT_TELEA)
        except:
            img = img
        
        return img

class ShadeGrayCC(ImageOnlyTransform):
    def __init__(self, power=6, gamma=None, always_apply=False, p=0.5):
        super(ShadeGrayCC, self).__init__(always_apply, p)
        
        self.power = power
        self.gamma = gamma

    def apply(self, img, **params):

        try:
        
            img_dtype = img.dtype
            
            if self.gamma is not None:
                look_up_table = np.ones((256,1), dtype='uint8') * 0
                for i in range(256):
                    look_up_table[i][0] = 255 * pow(i/255, 1/self.gamma)
                img = cv2.LUT(img, look_up_table)
            
            img = img.astype("float32")
            img_power = np.power(img, self.power)
            rgb_vec = np.power(np.mean(img_power, (0,1)), 1/self.power)
            rgb_norm = np.sqrt(np.sum(np.power(rgb_vec, 2.0)))
            rgb_vec = rgb_vec/rgb_norm
            rgb_vec = 1/(rgb_vec*np.sqrt(3))
            img = np.multiply(img, rgb_vec)

            img.astype(img_dtype)
        except:
            img = img
        
        return img

class RandomEraser(ImageOnlyTransform):
    def __init__(self, always_apply=False, p=0.5):
        super(RandomEraser, self).__init__(always_apply, p)
        

    def apply(self, img, **params):
        
        try:

            img_h, img_w, img_c = img.shape
            
            s_l=0.02; s_h=0.4; r_1=0.3; r_2=1/0.3; v_l=0; v_h=255
            
            while True:
                s = np.random.uniform(s_l, s_h) * img_h * img_w
                r = np.random.uniform(r_1, r_2)
                w = int(np.sqrt(s / r))
                h = int(np.sqrt(s * r))
                left = np.random.randint(0, img_w)
                top = np.random.randint(0, img_h)

                if left + w <= img_w and top + h <= img_h:
                    break

            if np.random.rand() > 0.5:
                c = np.random.uniform(v_l, v_h, (h, w, img_c))
            else:
                c = np.random.uniform(v_l, v_h)

            img[top:top + h, left:left + w, :] = c
        except:
            img = img
        
        
        return img

class MaskLandmarks(ImageOnlyTransform):
    def __init__(self, always_apply=False, p=0.5):
        super(MaskLandmarks, self).__init__(always_apply, p)
        

    def apply(self, img, **params):

        try:
        
            img_gry = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) 
        
            # Otsu's thresholding
            ret2,th2 = cv2.threshold(img_gry,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)

            retval, thresh_gray = cv2.threshold(th2, thresh=100, maxval=255, \
                                        type=cv2.THRESH_BINARY_INV)

            contours, hierarchy = cv2.findContours(thresh_gray,cv2.RETR_LIST, \
                                            cv2.CHAIN_APPROX_SIMPLE)

            # Find object with the biggest bounding box
            mx = (0,0,0,0)      # biggest bounding box so far
            mx_area = 0
            for cont in contours:
                x,y,w,h = cv2.boundingRect(cont)
                area = w*h
                if area > mx_area:
                    mx = x,y,w,h
                    mx_area = area
                    
            x,y,w,h = mx
            
            img = img[y:y+h, x:x+w]
        except:
            img = img
        
        return img


def int_parameter(level, maxval):
    """Helper function to scale `val` between 0 and maxval .
    Args:
    level: Level of the operation that will be between [0, `PARAMETER_MAX`].
    maxval: Maximum value that the operation can have. This will be scaled to
      level/PARAMETER_MAX.
    Returns:
    An int that results from scaling `maxval` according to `level`.
    """
    return int(level * maxval / 10)


def float_parameter(level, maxval):
    """Helper function to scale `val` between 0 and maxval.
    Args:
    level: Level of the operation that will be between [0, `PARAMETER_MAX`].
    maxval: Maximum value that the operation can have. This will be scaled to
      level/PARAMETER_MAX.
    Returns:
    A float that results from scaling `maxval` according to `level`.
    """
    return float(level) * maxval / 10.


def sample_level(n):
    return np.random.uniform(low=0.1, high=n)


def autocontrast(pil_img, _):
    return ImageOps.autocontrast(pil_img)


def equalize(pil_img, _):
    return ImageOps.equalize(pil_img)


def posterize(pil_img, level):
    level = int_parameter(sample_level(level), 4)
    return ImageOps.posterize(pil_img, 4 - level)


def rotate(pil_img, level):
    degrees = int_parameter(sample_level(level), 30)
    if np.random.uniform() > 0.5:
        degrees = -degrees
    return pil_img.rotate(degrees, resample=Image.BILINEAR)


def solarize(pil_img, level):
    level = int_parameter(sample_level(level), 256)
    return ImageOps.solarize(pil_img, 256 - level)


def shear_x(pil_img, level):
    level = float_parameter(sample_level(level), 0.3)
    if np.random.uniform() > 0.5:
        level = -level
    return pil_img.transform(pil_img.size,
                           Image.AFFINE, (1, level, 0, 0, 1, 0),
                           resample=Image.BILINEAR)


def shear_y(pil_img, level):
    level = float_parameter(sample_level(level), 0.3)
    if np.random.uniform() > 0.5:
        level = -level
    return pil_img.transform(pil_img.size,
                           Image.AFFINE, (1, 0, 0, level, 1, 0),
                           resample=Image.BILINEAR)


def translate_x(pil_img, level):
    level = int_parameter(sample_level(level), pil_img.size[0] / 3)
    if np.random.random() > 0.5:
        level = -level
    return pil_img.transform(pil_img.size,
                           Image.AFFINE, (1, 0, level, 0, 1, 0),
                           resample=Image.BILINEAR)


def translate_y(pil_img, level):
    level = int_parameter(sample_level(level), pil_img.size[0] / 3)
    if np.random.random() > 0.5:
        level = -level
    return pil_img.transform(pil_img.size,
                           Image.AFFINE, (1, 0, 0, 0, 1, level),
                           resample=Image.BILINEAR)


# operation that overlaps with ImageNet-C's test set
def color(pil_img, level):
    level = float_parameter(sample_level(level), 1.8) + 0.1
    return ImageEnhance.Color(pil_img).enhance(level)


# operation that overlaps with ImageNet-C's test set
def contrast(pil_img, level):
    level = float_parameter(sample_level(level), 1.8) + 0.1
    return ImageEnhance.Contrast(pil_img).enhance(level)


# operation that overlaps with ImageNet-C's test set
def brightness(pil_img, level):
    level = float_parameter(sample_level(level), 1.8) + 0.1
    return ImageEnhance.Brightness(pil_img).enhance(level)


# operation that overlaps with ImageNet-C's test set
def sharpness(pil_img, level):
    level = float_parameter(sample_level(level), 1.8) + 0.1
    return ImageEnhance.Sharpness(pil_img).enhance(level)


augmentations = [
    autocontrast, equalize, posterize, rotate, solarize, shear_x, shear_y,
    translate_x, translate_y
]

augmentations_all = [
    autocontrast, equalize, posterize, rotate, solarize, shear_x, shear_y,
    translate_x, translate_y, color, contrast, brightness, sharpness
]

def normalize(image):
    """Normalize input image channel-wise to zero mean and unit variance."""
    return image - 127

def apply_op(image, op, severity):
    #   image = np.clip(image, 0, 255)
    pil_img = Image.fromarray(image)  # Convert to PIL.Image
    pil_img = op(pil_img, severity)
    return np.asarray(pil_img)

def augment_and_mix(image, severity=3, width=3, depth=-1, alpha=1.):
    """Perform AugMix augmentations and compute mixture.
    Args:
    image: Raw input image as float32 np.ndarray of shape (h, w, c)
    severity: Severity of underlying augmentation operators (between 1 to 10).
    width: Width of augmentation chain
    depth: Depth of augmentation chain. -1 enables stochastic depth uniformly
      from [1, 3]
    alpha: Probability coefficient for Beta and Dirichlet distributions.
    Returns:
    mixed: Augmented and mixed image.
    """
    ws = np.float32(
      np.random.dirichlet([alpha] * width))
    m = np.float32(np.random.beta(alpha, alpha))

    mix = np.zeros_like(image).astype(np.float32)
    for i in range(width):
        image_aug = image.copy()
        depth = depth if depth > 0 else np.random.randint(1, 4)
        for _ in range(depth):
            op = np.random.choice(augmentations)
            image_aug = apply_op(image_aug, op, severity)
        # Preprocessing commutes since all coefficients are convex
        mix += ws[i] * image_aug
#         mix += ws[i] * normalize(image_aug)

    mixed = (1 - m) * image + m * mix
#     mixed = (1 - m) * normalize(image) + m * mix
    return mixed


class RandomAugMix(ImageOnlyTransform):

    def __init__(self, severity=3, width=3, depth=-1, alpha=1., always_apply=False, p=0.5):
        super().__init__(always_apply, p)
        self.severity = severity
        self.width = width
        self.depth = depth
        self.alpha = alpha

    def apply(self, image, **params):
        image = augment_and_mix(
            image,
            self.severity,
            self.width,
            self.depth,
            self.alpha
        )
        return image

In [None]:
%%writefile train.py

import warnings
warnings.filterwarnings('ignore')

import argparse
import json
import os
from collections import defaultdict
from sklearn.model_selection import KFold
from catalyst.data.sampler import DistributedSampler, BalanceClassSampler
from torch import topk

import numpy as np
import pandas as pd

import classifiers
import losses
from losses import WeightedLosses
from dataset import *
from config import args
from utils import create_optimizer, AverageMeter, RocAucMeter
from albu import *


import torch
import torch.nn as nn
from torch.backends import cudnn
from torch.nn import DataParallel
from torch.utils.data import DataLoader
from tqdm import tqdm
import torch.distributed as dist

torch.backends.cudnn.benchmark = True

os.environ["MKL_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
os.environ["OMP_NUM_THREADS"] = "1"
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ["CUDA_VISIBLE_DEVICES"] = '0'

#from apex import amp
#from apex.parallel import DistributedDataParallel, convert_syncbn_model


def train_epoch(args, model, train_loader, optimizer, scheduler, loss_functions, device, epoch):
    losses = AverageMeter()
    scores = RocAucMeter()

    model.train()
    scaler = torch.cuda.amp.GradScaler()

    t = tqdm(train_loader)
    for i, sample in enumerate(t):
        imgs = sample["image"].to(device)
        labels = sample["label"].to(device)
        meta = sample["meta"].to(device)

        if args.mixup and args.cutmix:
            if np.random.rand(1) > 0.5:
                imgs, targets_a, targets_b, lam = mixup_data(imgs, labels,
                                                        args.alpha, True)
            else:
                imgs, targets_a, targets_b, lam = cutmix_data(imgs, labels,
                                                        args.alpha, True)
            
            imgs, targets_a, targets_b = map(torch.autograd.Variable, (imgs,
                                                      targets_a, targets_b))
        elif args.mixup:
            imgs, targets_a, targets_b, lam = mixup_data(imgs, labels,
                                                        args.alpha, True)
            imgs, targets_a, targets_b = map(torch.autograd.Variable, (imgs,
                                                      targets_a, targets_b))

        elif args.cutmix:
            imgs, targets_a, targets_b, lam = cutmix_data(imgs, labels,
                                                       args.beta, True)
            
            imgs, targets_a, targets_b = map(torch.autograd.Variable, (imgs,
                                                      targets_a, targets_b))


        optimizer.zero_grad()

        # Casts operations to mixed precision
        with torch.cuda.amp.autocast():
            outputs = model(imgs)
            if args.mixup or args.cutmix:
                loss = mixup_criterion(loss_functions["classifier_loss"], outputs, targets_a, targets_b, lam)
            else:
                loss = loss_functions["classifier_loss"](outputs, labels.view(-1, 1))
        #loss = loss_fn(outputs, labels)

        bs = imgs.size(0)
        scores.update(labels, torch.sigmoid(outputs))
        losses.update(loss.item(), bs)

        if args.fp16:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            # Scales the loss, and calls backward()
            # to create scaled gradients
            scaler.scale(loss).backward()
            
            # Uncales gradients and calls
            # or skips optimizer.step()
            scaler.step(optimizer)

            # Updates the scale for next iteration
            scaler.update()

        #torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), 1)
        #optimizer.step()
        #scheduler.step()

        t.set_description(f"Train E:{epoch} - Loss:{losses.avg:0.4f} - AUC:{scores.avg:0.4f} ")

    t.close()
    return scores.avg, losses.avg

def valid_epoch(args, model, valid_loader,loss_functions, device, epoch):
    losses = AverageMeter()
    scores = RocAucMeter()

    model.eval()
    with torch.no_grad():
        t = tqdm(valid_loader)
        for i, sample in enumerate(t):
            imgs = sample["image"].to(device)
            labels = sample["label"].to(device)
            meta = sample["meta"].to(device)

            outputs = model(imgs)
            #loss = loss_functions["classifier_loss"](outputs, labels.view(-1, 1))
            #loss = loss_fn(outputs, labels)

            bs = imgs.size(0)
            scores.update(labels, torch.sigmoid(outputs))
            #losses.update(loss.item(), bs)

            t.set_description(f"Valid E:{epoch} - AUC:{scores.avg:0.4f} ")

    t.close()
    return scores.avg

def test_epoch(args, model, test_loader, device):

    probs = []
    image_names = []

    model.eval()
    t = tqdm(test_loader)
    with torch.no_grad():
        for i, sample in enumerate(t):
            imgs = sample["image"].to(device)
            meta = sample["meta"].to(device)
            img_names = sample["image_name"]


            out = model(imgs)
            preds = torch.sigmoid(out).cpu().numpy().tolist()
            

            probs.extend(preds)
            image_names.extend(img_names)
    
    t.close()
    return probs, image_names


lr_start   = 0.000005
lr_max     = 0.00000125 * args.batch_size
lr_min     = 0.000001
lr_ramp_ep = 5
lr_sus_ep  = 0
lr_decay   = 0.8

def lrfn(epoch):
    if epoch < args.LR_RAMPUP_EPOCHS:
        lr = (args.LR_MAX - args.LR_START) / args.LR_RAMPUP_EPOCHS * epoch + args.LR_START
    elif epoch < args.LR_RAMPUP_EPOCHS + args.LR_SUSTAIN_EPOCHS:
        lr = args.LR_MAX
    else:
        lr = (args.LR_MAX - args.LR_MIN) * args.LR_EXP_DECAY**(epoch - args.LR_RAMPUP_EPOCHS - args.LR_SUSTAIN_EPOCHS) + args.LR_MIN
    return lr

def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = lrfn(epoch)
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr 


def loss_fn(output, target):
    return nn.BCEWithLogitsLoss()(output, target.view(-1, 1))

def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

def mixup_data(x, y, alpha=1.0, use_cuda=True):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    if use_cuda:
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a.view(-1, 1)) + (1 - lam) * criterion(pred, y_b.view(-1, 1))


def cutmix_data(x, y, beta=1.0, use_cuda=True):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    
    lam = np.random.beta(beta, beta)
    

    batch_size = x.size()[0]
    if use_cuda:
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)
    
    target_a = y
    target_b = y[index]

    bbx1, bby1, bbx2, bby2 = rand_bbox(x.size(), lam)
    x[:, :, bbx1:bbx2, bby1:bby2] = x[index, :, bbx1:bbx2, bby1:bby2]

    # adjust lambda to exactly match pixel ratio
    lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (x.size()[-1] * x.size()[-2]))

    return x, target_a, target_b, lam

def cutmix_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a.view(-1, 1)) + (1 - lam) * criterion(pred, y_b.view(-1, 1))


def main(fold, idxT, idxV):

    # Setting seed
    seed = args.seed
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    args.fold = fold
    args.save_path = os.path.join(args.output_dir, args.exp_name)
    os.makedirs(args.save_path, exist_ok=True)

    model = classifiers.__dict__[args.network](encoder=args.encoder)

    train_df = pd.read_csv(args.train_csv)
    train_2019_df = pd.read_csv(args.train_2019_csv)
    test_df = pd.read_csv(args.test_csv)
    

    train_folds = train_df[train_df.kfold.isin(idxT)]
    valid_folds = train_df[train_df.kfold.isin(idxV)]
    
    
    if args.train_fn:
        train_folds = train_folds[train_folds.difference > args.train_fn]
        print("FN 2020 :", train_folds.shape)
    
    if args.train_fp:
        train_folds = train_folds[train_folds.difference < args.train_fp]
        print("FP 2020 :", train_folds.shape)

    if args.malignant:
        malignant_df2 = pd.read_csv(args.malignant_csv)

        train_folds = train_folds.append(malignant_df2)
        print("Melignant :", train_folds.shape)

    if args.psudo_label:
        psudo_df = pd.read_csv(args.psudo_csv)
        train_folds = train_folds.append(psudo_df)
        print("Sudo :",train_folds.shape)

    if args.train_2019:
        train_2019_folds = train_2019_df[train_2019_df.tfrecord != -1]
        #train_2019_folds = train_2019_df[train_2019_df.tfrecord.isin([i*2 for i in range(15)])]
        #train_2019_folds = train_2019_df[train_2019_df.diagnosis.isin(['NV', 'MEL'])]
        if args.train_2019_fn:
            train_2019_folds = train_2019_folds[train_2019_folds.difference > args.train_2019_fn]
            print("FN 2019 :",train_folds.shape)
        
        if args.train_2019_fp:
            train_2019_folds = train_2019_folds[train_2019_folds.difference < args.train_2019_fp]
            print("FP 2019 :", train_2019_folds.shape)

        train_folds = train_folds.append(train_2019_folds)
        

    train_dataset = MelanomaClassifierDataset(
        df=train_folds,
        mode="train",
        label_smoothing=args.label_smoothing,
        normalize=args.normalize,
        transforms=train_transforms(size=args.size),
        #target_transforms=target_transforms(size=args.size),
        data_root=args.train_image_path
    )

    valid_dataset = MelanomaClassifierDataset(
        df=valid_folds,
        mode="valid",
        label_smoothing=args.label_smoothing,
        normalize=args.normalize,
        transforms=valid_transforms(size=args.size),
        data_root=args.train_image_path
    )

    test_dataset = MelanomaClassifierDatasetTest(
        df=test_df,
        normalize=args.normalize,
        transforms=valid_transforms(size=args.size),
        data_root=args.test_image_path   
    )

    tta_dataset = MelanomaClassifierDatasetTest(
        df=test_df,
        normalize=args.normalize,
        transforms=test_transforms(size=args.size),
        data_root=args.test_image_path   
    )

    loss_fn = []
    weights = []
    for loss_name, weight in args.losses.items():
        loss_fn.append(losses.__dict__[loss_name](reduction="mean"))
        weights.append(weight)
    
    loss = WeightedLosses(loss_fn, weights)
    loss_functions = {"classifier_loss": loss}
    optimizer, scheduler = create_optimizer(args.optimizer, model)

    device = "cuda"
    model = model.cuda()

    if args.fp16:
        model, optimizer = amp.initialize(model, optimizer,
                                          opt_level=args.opt_level,
                                          loss_scale='dynamic')

    #loss_functions = None

    

    
    """
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=args.learning_rate
    )
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        patience=3,
        threshold=0.001,
        mode="max"
    )
    """
    """
    train_sampler = torch.utils.data.DistributedSampler(
        train_dataset,
        num_replicas=xm.xrt_world_size(),
        rank=xm.get_ordinal(),
        shuffle=True
    )
    """
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        #sampler=BalanceClassSampler(labels=train_dataset.__get_labels__(), mode="upsampling"),
        shuffle=True,
        drop_last=True,
        num_workers=4
    )

    """
    valid_sampler = torch.utils.data.DistributedSampler(
        valid_dataset,
        num_replicas=xm.xrt_world_size(),
        rank=xm.get_ordinal(),
        shuffle=False
    )
    """
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=args.batch_size * 2,
        #sampler=valid_sampler,
        drop_last=False,
        num_workers=4
    )
    """
    test_sampler = torch.utils.data.DistributedSampler(
        test_dataset,
        num_replicas=xm.xrt_world_size(),
        rank=xm.get_ordinal(),
        shuffle=False
    )
    """
    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=args.batch_size * 2,
        #sampler=test_sampler,
        drop_last=False,
        num_workers=4
    )

    tta_loader = torch.utils.data.DataLoader(
        tta_dataset,
        batch_size=args.batch_size * 2,
        #sampler=test_sampler,
        drop_last=False,
        num_workers=4
    )
    
    best_auc = 0
    
    print("Training started ..... ")

    test_preds = []

    #if args.fold == 2:
    #    model.load_state_dict(torch.load(os.path.join(args.save_path, f"fold-{args.fold}.bin")))
    #    model.to(device)
    #    start_epoch = 4
    #    best_auc = 0.9262593624711052
    #else:
    start_epoch = 0
    
    for epoch in range(start_epoch, args.epochs):

        
        if epoch >= args.cutmix_mixup_epoch:
            args.cutmix = True
            args.mixup = True
        else:
            args.cutmix = False
            args.mixup = False
        

        
        adjust_learning_rate(optimizer, epoch)
        
        train_auc, train_loss = train_epoch(
            args,
            model,
            train_loader,
            optimizer,
            scheduler,
            loss_functions,
            device,
            epoch
        )

        if epoch >= 0:

            #para_loader = pl.ParallelLoader(valid_loader, [device])
            valid_auc = valid_epoch(
                args,
                model,
                valid_loader,
                loss_functions,
                device,
                epoch
            )
            print(f"Epoch : {epoch} - AUC : {valid_auc}")

            if valid_auc > best_auc:
                print(f"###***### Model Improved from {best_auc} to {valid_auc}")
                torch.save(model.state_dict(), os.path.join(args.save_path, f"fold-{args.fold}.bin"))
                best_auc = valid_auc
            
            if epoch >= 0:

                preds, img_names = test_epoch(
                    args,
                    model,
                    tta_loader,
                    device
                )
                #test_preds.append(preds)

                #final_test_preds = np.mean(test_preds, axis=0)
                np.save(os.path.join(args.save_path, f"test-pred-fold-{args.fold}-epoch-{epoch}.npy"), preds)        

    
    model.load_state_dict(torch.load(os.path.join(args.save_path, f"fold-{args.fold}.bin")))
    model.to(device)    
    preds_list = []
    for epoch in range(args.TTA):

        preds, img_names = test_epoch(
            args,
            model,
            tta_loader,
            device
        )
        preds_list.append(preds)
    final_preds = np.mean(preds_list, axis=0)
    np.save(os.path.join(args.save_path, f"tta-pred-fold-{args.fold}.npy"), final_preds)
    
    model.load_state_dict(torch.load(os.path.join(args.save_path, f"fold-{args.fold}.bin")))
    model.to(device)

    preds, img_names = test_epoch(
        args,
        model,
        valid_loader,
        device
    )

    off_df = pd.DataFrame({
        "image_name": img_names,
        "prediction": preds
    })

    off_df.to_csv(os.path.join(args.save_path, f"off-pred-fold-{args.fold}.csv"), index=False)


if __name__ == "__main__":

    skf = KFold(n_splits=args.folds, shuffle=True, random_state=args.seed)
    for fold, (idxT, idxV) in enumerate(skf.split(np.arange(15))):
        if fold >= 0:
            print("#"*20); print(f"#### FOLD {fold}");
            main(fold, idxT, idxV)


In [None]:
%%writefile config.py

class args:

    exp_name = "Final_NS_D201_384_224_5"
    output_dir = "output"
    train_image_path = "train"
    test_image_path = "test"

    network = "MelanomaClassifier"
    encoder = "densenet201"

    train_csv = "train_csv.csv"
    train_2019_csv = "train_2019_csv.csv"
    test_csv = "test_csv.csv"

    label_smoothing = 0.0
    size = 224
    normalize = {
        "mean": [0.485, 0.456, 0.406],
        "std": [0.229, 0.224, 0.225]
    }


    epochs = 5

    seed = 2020

    folds = 5

    losses = {
        "BinaryCrossentropy": 1
    }
    optimizer = {
        "type": "Adam",
        "momentum": 0.9,
        "weight_decay": 1e-5,
        "learning_rate": 0.256,
        "nesterov": True,

        "schedule": {
            "type": "poly",
            "mode": "step",
            "epoch": 5,
            "params": {"max_iter": 1500}
        }
    }
    batch_size = 32

    TTA = 10

    # CUSTOM LEARNING SCHEUDLE
    LR_START = 0.00001
    LR_MAX = 0.00005
    LR_MIN = 0.00001
    LR_RAMPUP_EPOCHS = 5
    LR_SUSTAIN_EPOCHS = 0
    LR_EXP_DECAY = .8

    learning_rate = 0.00002

    fp16 = False
    opt_level = 'O3'

    cutmix_mixup_epoch = 4

    mixup = False
    alpha = 1

    cutmix = False
    beta = 0.1

    psudo_label = False
    psudo_csv = "drive/My Drive/SIIM-ISIC Melanoma Classification/input/psudo_label.csv"


    malignant_csv = "malignat_csv.csv"
    malignant = True
    train_2019 = True

    train_2019_fn = -0.7
    train_fn = -0.8

    train_2019_fp = 0.7
    train_fp = 0.8

In [None]:
!python3 train.py