# Forked!
Original : https://www.kaggle.com/saife245/melanoma-detail-analysis-eda-ip-augmentation-model

    Another great EDA kernel: https://www.kaggle.com/parulpandey/melanoma-classification-eda-starter
    Also: https://www.kaggle.com/nroman/melanoma-pytorch-starter-efficientnet

For this kernel, I'm also studying PyTorch.
https://www.kaggle.com/zzy990106/pytorch-5-fold-efficientnet-baseline

There's also apparently a library for pre-trained models / architectures: for PyTorch Timm

AutoAugment
https://arxiv.org/abs/1805.09501


    Pre-processed with external images: From Roman
    https://www.kaggle.com/nroman/melanoma-external-malignant-256

## TODO:
    1. Add metadata as features - age, sex, anatomy site
    2. EDA - RandomChoice -> random brightness, contrast, gamma, color jitter
    3. EDA - CLAHE (https://albumentations.readthedocs.io/en/latest/api/augmentations.html)
    4. Cutout - https://albumentations.readthedocs.io/en/latest/api/augmentations.html
    5. Saliency Maps - https://medium.com/datadriveninvestor/visualizing-neural-networks-using-saliency-maps-in-pytorch-289d8e244ab4


## History
    1. Baseline - efficientnet b1, 224x224 images, cross entropy loss with 1-2 weighting (0.86)
    2. Upsample to 256 x 256 images (0.87)
        2.1 Add
            a. Add Color Jitter
            b. Random Erasure
    3. Cross Entropy Weighting 10 - 1... Plateau patience = 2 (0.85)
    4. Label smoothing... Plateau patience = 3... Remove rotation augmentation (0.88)
    5. Add hair transformation... Resize to 240... switch to torch toolbox, label smoothing (0.87)
    6. Add metadata features, test time augmentation (0.897)
    7. Efficient Net (vanilla pretrained), TTA=5, hairs up to 10
    7. More metadata features, more test time augmentation 
    6. rollback to 256...patience to 3...epochs to 12... batch size to 128
    2. Switch to efficient_net_pytorch
    
    3. Switch to 256 MB images


## Other things:
    1. Oversample and equalize classes

In [None]:
!pip install efficientnet_pytorch timm torchtoolbox

# Imports

In [None]:
import os
import gc
import json
import math
import cv2
import PIL
import re
import numpy as np
import pandas as pd
from PIL import Image
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
#from sklearn.metrics import cohen_kappa_score, accuracy_score
import scipy
from tqdm import tqdm
%matplotlib inline
#from keras.preprocessing import image
import glob
import tensorflow.keras.applications.densenet as dense
from kaggle_datasets import KaggleDatasets
import seaborn as sns
sns.set_style('whitegrid')

import missingno as msno

from plotly.offline import iplot
import cufflinks
cufflinks.go_offline()
cufflinks.set_config_file(world_readable=True, theme='pearl')

In [None]:
import numpy as np 
import pandas as pd 
import os
import cv2
import torch.nn.init as init
import torch
import torch.nn as nn
from PIL import Image, ImageFilter
from sklearn.model_selection import train_test_split, StratifiedKFold
from torch.utils.data import Dataset
# from torchvision import transforms
import torchtoolbox.transform as transforms
# from albumentations.augmentations.transforms import Cutout
# from torchtoolbox.transform import Cutout

from torch.optim import Adam, SGD, RMSprop
import time
from torch.autograd import Variable
import torch.functional as F
from tqdm import tqdm
from sklearn import metrics
import urllib
import pickle
import cv2
import torch.nn.functional as F
from torchvision import models
import seaborn as sns
import random
import timm
from sklearn.metrics import roc_auc_score
import sys
# sys.path.append('../input/autoaug')
# from auto_augment import AutoAugment, Cutout

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
train_path = '../input/melanoma-external-malignant-256/train/train/'
test_path = '../input/melanoma-external-malignant-256/test/test/'

# train = pd.read_csv('/kaggle/input/siim-isic-melanoma-classification/train.csv')
train = pd.read_csv('/kaggle/input/melanoma-external-malignant-256/train_concat.csv')
test = pd.read_csv('/kaggle/input/siim-isic-melanoma-classification/test.csv')

print('Train: ', train.shape)
print("Test:", test.shape)

## Metadata features
(From Roman)

In [None]:
# from Roman

# One-hot encoding of anatom_site_general_challenge feature
all_anatom = pd.concat([train['anatom_site_general_challenge'], test['anatom_site_general_challenge']], ignore_index=True)
dummies = pd.get_dummies(all_anatom, dummy_na=True, dtype=np.uint8, prefix='site')
train = pd.concat([train, dummies.iloc[:train.shape[0]]], axis=1)
test = pd.concat([test, dummies.iloc[train.shape[0]:].reset_index(drop=True)], axis=1)

# Sex features
train['sex'] = train['sex'].map({'male': 1, 'female': 0})
test['sex'] = test['sex'].map({'male': 1, 'female': 0})
train['sex'] = train['sex'].fillna(-1)
test['sex'] = test['sex'].fillna(-1)

# Age features
train['age_approx'] /= train['age_approx'].max()
test['age_approx'] /= test['age_approx'].max()
train['age_approx'] = train['age_approx'].fillna(0)
test['age_approx'] = test['age_approx'].fillna(0)

# Shall we use patient ID?
# train['patient_id'] = train['patient_id'].fillna(0)

meta_features = ['sex', 'age_approx'] + [col for col in train.columns if 'site_' in col]
meta_features.remove('anatom_site_general_challenge')

# EfficientNet modeling

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True
    
seed_everything(2020)

mean_images = [0.485, 0.456, 0.406]
std_images = [0.229, 0.224, 0.225]

num_classes = 2
bs = 64
lr = 1e-3
# IMG_SIZE = 224
IMG_SIZE = 256
n_epochs = 12
patience = 3

# positive weight
pos_weight = torch.Tensor(2).to(device)
pos_weight[0] = 1
pos_weight[1] = 1

# MODEL_NAME = 'tf_efficientnet_b1_ns'
MODEL_NAME = 'tf_efficientnet_b1'
PRETRAINED=True

sample = pd.read_csv('../input/siim-isic-melanoma-classification/sample_submission.csv')

In [None]:
class MyDataset(Dataset):
    
    def __init__(self, dataframe, transform=None, test_phase=False, meta_features = None):
        self.df = dataframe
        self.transform = transform
        self.test_phase = test_phase
        self.meta_features = meta_features
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        p = self.df.image_name.values[idx]
        meta = np.array(self.df.iloc[idx][self.meta_features].values, dtype=np.float32)
        
        if self.test_phase == False:
            p_path = train_path + p + '.jpg'
        else:
            p_path = test_path + p + '.jpg'
            
        image = cv2.imread(p_path)
        # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#         image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
#         image = transforms.ToPILImage()(image)
        
        if self.transform:
            image = self.transform(image)
        
        
        if not self.test_phase:
            label = self.df.target.values[idx]
            return image, meta, label
        if self.test_phase:
            return image, meta

    
class AverageMeter:
    """
    Computes and stores the average and current value
    """
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

## Roman's work

In [None]:
class AdvancedHairAugmentation:
    """
    Impose an image of a hair to the target image

    Args:
        hairs (int): maximum number of hairs to impose
        hairs_folder (str): path to the folder with hairs images
    """

    def __init__(self, hairs: int = 10, hairs_folder: str = ""):
        self.hairs = hairs
        self.hairs_folder = hairs_folder

    def __call__(self, img):
        """
        Args:
            img (PIL Image): Image to draw hairs on.

        Returns:
            PIL Image: Image with drawn hairs.
        """
        n_hairs = random.randint(0, self.hairs)
        
        if not n_hairs:
            return img
        
        height, width, _ = img.shape  # target image width and height
        hair_images = [im for im in os.listdir(self.hairs_folder) if 'png' in im]
        
        for _ in range(n_hairs):
            hair = cv2.imread(os.path.join(self.hairs_folder, random.choice(hair_images)))
            hair = cv2.flip(hair, random.choice([-1, 0, 1]))
            hair = cv2.rotate(hair, random.choice([0, 1, 2]))

            h_height, h_width, _ = hair.shape  # hair image width and height
            roi_ho = random.randint(0, img.shape[0] - hair.shape[0])
            roi_wo = random.randint(0, img.shape[1] - hair.shape[1])
            roi = img[roi_ho:roi_ho + h_height, roi_wo:roi_wo + h_width]

            # Creating a mask and inverse mask
            img2gray = cv2.cvtColor(hair, cv2.COLOR_BGR2GRAY)
            ret, mask = cv2.threshold(img2gray, 10, 255, cv2.THRESH_BINARY)
            mask_inv = cv2.bitwise_not(mask)

            # Now black-out the area of hair in ROI
            img_bg = cv2.bitwise_and(roi, roi, mask=mask_inv)

            # Take only region of hair from hair image.
            hair_fg = cv2.bitwise_and(hair, hair, mask=mask)

            # Put hair in ROI and modify the target image
            dst = cv2.add(img_bg, hair_fg)

            img[roi_ho:roi_ho + h_height, roi_wo:roi_wo + h_width] = dst
                
        return img

    def __repr__(self):
        return f'{self.__class__.__name__}(hairs={self.hairs}, hairs_folder="{self.hairs_folder}")'

In [None]:
train_transform = transforms.Compose([
    AdvancedHairAugmentation(hairs_folder='/kaggle/input/melanoma-hairs/'),
    transforms.RandomResizedCrop(size=IMG_SIZE, scale=(0.8, 1.1)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
#     transforms.RandomRotation(45),
    transforms.ColorJitter(brightness=0.25,saturation=0.5, contrast=0.25,),
#     transforms.Cutout(scale=(0.05, 0.007), value=(0, 0)),
    
    transforms.ToTensor(),
    transforms.RandomErasing(scale=(0.02, 0.25)),
    transforms.Normalize(mean=mean_images, std=std_images)
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=mean_images,std=std_images)
])

## Add a wrapper in the efficient net model
(From Roman as well)

In [None]:
class Net(nn.Module):
    def __init__(self, arch, n_meta_features: int, num_classes: int):
        super(Net, self).__init__()
        self.arch = arch
        self.arch.classifier = nn.Linear(in_features=1280, out_features=500, bias=True)
        self.meta = nn.Sequential(nn.Linear(n_meta_features, 500),
                                  nn.BatchNorm1d(500),
                                  nn.ReLU(),
                                  nn.Dropout(p=0.2),
                                  nn.Linear(500, 250),
                                  nn.BatchNorm1d(250),
                                  nn.ReLU(),
                                  nn.Dropout(p=0.2))
        self.output = nn.Linear(500 + 250, num_classes)
        
    def forward(self, inputs):
        x, meta = inputs
        cnn_features = self.arch(x)
        meta_features = self.meta(meta)
        features = torch.cat((cnn_features, meta_features), dim=1)
        
        output = self.output(features)
        return output
        
#     def forward(self, inputs):
#         """
#         No sigmoid in forward because we are going to use BCEWithLogitsLoss
#         Which applies sigmoid for us when calculating a loss
#         """
#         x, meta = inputs
#         cnn_features = self.arch(x)
#         meta_features = self.meta(meta)
#         features = torch.cat((cnn_features, meta_features), dim=1)
#         output = self.ouput(features)
#         return output

In [None]:
def train_model(model, train_loader, epoch):
    model.train() 
    
    losses = AverageMeter()
    avg_loss = 0.

    optimizer.zero_grad()
    
    tk = tqdm(train_loader, total=len(train_loader), position=0, leave=True)
    for idx, (imgs, meta, labels) in enumerate(tk):
        imgs_train, meta_train, labels_train = imgs.to(device), meta.to(device), labels.to(device).long()
        output_train = model((imgs_train, meta_train))
#         one_hot = torch.nn.functional.one_hot(labels_valid, num_classes=2).cuda().long()

        loss = criterion(output_train, labels_train)
        loss.backward()

        optimizer.step() 
        optimizer.zero_grad() 
        
        avg_loss += loss.item() / len(train_loader)
        
        losses.update(loss.item(), imgs_train.size(0))

        tk.set_postfix(loss=losses.avg)
        
    return avg_loss


def test_model(model, val_loader):    
    model.eval()
    
    losses = AverageMeter()
    avg_val_loss = 0.
    
    valid_preds, valid_targets = [], []
    
    with torch.no_grad():
        tk = tqdm(val_loader, total=len(val_loader), position=0, leave=True)
        for idx, (imgs, meta, labels) in enumerate(tk):
            imgs_valid, meta_valid, labels_valid = imgs.to(device), meta.to(device), labels.to(device).long()
            output_valid = model((imgs_valid, meta_valid))
            
            loss = criterion(output_valid, labels_valid)
            
            avg_val_loss += loss.item() / len(val_loader)

            losses.update(loss.item(), imgs_valid.size(0))
            
            tk.set_postfix(loss=losses.avg)
            
            valid_preds.append(torch.softmax(output_valid,1)[:,1].detach().cpu().numpy())
            valid_targets.append(labels_valid.detach().cpu().numpy())
            
        valid_preds = np.concatenate(valid_preds)
        valid_targets = np.concatenate(valid_targets)
        auc =  roc_auc_score(valid_targets, valid_preds) 
            
    return avg_val_loss, auc

In [None]:
kf = StratifiedKFold(5, shuffle=True, random_state=0)
#     train_df = train.loc[trn_ind][:10].append(train.loc[trn_ind][-10:])
#     val_df = train.loc[val_ind][:10].append(train.loc[val_ind][-10:])

# Label Smoothing Implementation
https://medium.com/towards-artificial-intelligence/how-to-use-label-smoothing-for-regularization-aa349f7f1dbb

In [None]:
import torch.nn.functional as F

def linear_combination(x, y, epsilon): 
    return epsilon*x + (1-epsilon)*y


def reduce_loss(loss, reduction='mean'):
    return loss.mean() if reduction=='mean' else loss.sum() if reduction=='sum' else loss


class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self, epsilon:float=0.1, reduction='mean'):
        super().__init__()
        self.epsilon = epsilon
        self.reduction = reduction
    
    def forward(self, preds, target):
        n = preds.size()[-1]
        log_preds = F.log_softmax(preds, dim=-1)
        loss = reduce_loss(-log_preds.sum(dim=-1), self.reduction)
        nll = F.nll_loss(log_preds, target, reduction=self.reduction)
        return linear_combination(loss/n, nll, self.epsilon)

In [None]:
fold = 0
list_results = []
list_predictions = []

for fold, (trn_ind, val_ind) in enumerate(kf.split(train.image_name, train.target), 1):
    print('fold:', fold)

    train_df = train.loc[trn_ind]
    val_df = train.loc[val_ind]
    train_df.reset_index(drop=True, inplace=True)
    val_df.reset_index(drop=True, inplace=True)

    trainset = MyDataset(train_df, transform=train_transform, meta_features=meta_features)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=bs, shuffle=True, num_workers=4)
   
    valset = MyDataset(val_df, transform=test_transform, meta_features=meta_features)
    val_loader = torch.utils.data.DataLoader(valset, batch_size=bs, shuffle=False, num_workers=4)

    model = timm.create_model(MODEL_NAME, pretrained=PRETRAINED, num_classes=num_classes)
    model = Net(model, len(meta_features), num_classes)
    model.to(device)

    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=0.001)
#     criterion = nn.CrossEntropyLoss(weight=pos_weight)
    criterion = LabelSmoothingCrossEntropy()
#     criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=2, mode='min',verbose=True)

    best_loss = np.inf
    es = 0

    for epoch in range(n_epochs):
        avg_loss = train_model(model, train_loader, epoch)
        avg_val_loss, auc = test_model(model, val_loader)

        if avg_val_loss < best_loss:
            best_loss = avg_val_loss
            torch.save(model.state_dict(), str(fold) + 'weight.pt')
            es = 0
        else:
            es += 1
            if es > patience:
                break
        dict_result = {"fold" : fold, "epoch" : epoch, "train loss" : avg_loss, "val loss": avg_val_loss, "auc": auc}
        list_results.append(dict_result)
        print("epoch={},train_loss={},val_loss={},best_val_loss={},val_auc={}".format(epoch, avg_loss, avg_val_loss, best_loss, auc))
        
        scheduler.step(avg_val_loss)

# Predictions from all validation sets

In [None]:
# store indexes for testing later

indexes_to_store = []
for fold, (trn_ind, val_ind) in enumerate(kf.split(train.image_name, train.target), 1):
    df_indexes = pd.DataFrame(np.hstack((trn_ind, val_ind)), columns=['row_id'])
    df_indexes.loc[:len(trn_ind), "set"] = 'train'
    df_indexes["set"] = df_indexes["set"].fillna("val")

    df_indexes["fold"] = fold
    indexes_to_store.append(df_indexes)
    
df_indexes = pd.concat(indexes_to_store)
df_indexes.to_csv("indexes.csv", index=False)

In [None]:
val_preds = []

for fold in range(1, 6):
    # get indexes
    val_ind = df_indexes.loc[(df_indexes["fold"] == fold) & (df_indexes["set"] == "val"), "row_id"]
    val_df = train.loc[val_ind]
    val_df.reset_index(drop=True, inplace=True)
    valset = MyDataset(val_df, transform=test_transform, meta_features=meta_features)
    val_loader = torch.utils.data.DataLoader(valset, batch_size=bs, shuffle=False, num_workers=4)
    
    # predict
    model = timm.create_model(MODEL_NAME, pretrained=PRETRAINED, num_classes=num_classes)
    model = Net(model, len(meta_features), num_classes)
    model.load_state_dict(torch.load("./{}weight.pt".format(fold), ))
    model.to(device)
    
    pred = np.zeros((len(val_ind),))
    batch_df = []
    with torch.no_grad():
        for i, (imgs, meta, labels) in enumerate(tqdm(val_loader, position=0, leave=True)):
            imgs_valid, meta_valid, labels_valid = imgs.to(device), meta.to(device), labels.to(device).long()
            
            pred = model((imgs_valid, meta_valid))
            
            # accumulate predictions
            df_val_preds_batch = pd.DataFrame(torch.softmax(pred,1).cpu().detach().numpy(), columns = ["val0", "val1"])
            df_val_preds_batch["actual"] = np.array(labels_valid.cpu())
            df_val_preds_batch["fold"] = fold
            batch_df.append(df_val_preds_batch)
    
    # add indexes to predictions
    df_val_preds = pd.concat(batch_df)
    df_val_preds["index"] = val_ind.values
    val_preds.append(df_val_preds)
            
pd.concat(val_preds).to_csv("val_predictions.csv", index=False)

# Submission

In [None]:
all_models = []
for i in range(1, 6):
    model = timm.create_model(MODEL_NAME, pretrained=PRETRAINED, num_classes=num_classes)
    model = Net(model, len(meta_features), num_classes)
    model.to(device)
    model.load_state_dict(torch.load("./{}weight.pt".format(i), ))
    all_models.append(model)

In [None]:
testset      = MyDataset(test, transform=train_transform, test_phase=True, meta_features=meta_features)
test_loader  = torch.utils.data.DataLoader(testset, batch_size=bs, shuffle=False, num_workers=4)

In [None]:
TTA = 5
list_pred = []
preds = torch.zeros((len(test)), dtype=torch.float32, device=device)  # Predictions for test test

with torch.no_grad():
    for model in all_models:
        for _ in range(TTA):
            for i, (imgs, meta) in enumerate(tqdm(test_loader, position=0, leave=True)):
                imgs_test, meta_test = imgs.to(device), meta.to(device)

                pred = model((imgs_test, meta_test))

                # accumulate predictions
                pred = torch.softmax(pred,1)[:,1]
                preds[i*bs: (i+1)*bs] += pred
                
        del pred, imgs, meta

        preds /= TTA

    preds /= len(all_models)

In [None]:
# test_pred = np.zeros((len(sample),))

# for i, (imgs, meta) in enumerate(tqdm(test_loader, position=0, leave=True)):
    
#     list_pred = []
    
#     for model in all_models:
#         imgs_test, meta_test = imgs.to(device), meta.to(device)

#         pred = model((imgs_test, meta_test))

#         # accumulate predictions
#         pred = torch.softmax(pred,1).cpu().detach().numpy()[:,1]
#         list_pred.append(pred)
        
#     test_pred[i*bs: (i+1)*bs] = np.array(list_pred).sum(axis=0)

In [None]:
# test_pred = np.zeros((len(sample),))

# with torch.no_grad():
#     for i, data in enumerate(tqdm(test_loader, position=0, leave=True)):
#         images, _ = data
#         images = images.to(device)
        
#         pred = np.zeros((len(val_ind),))
        
#         pred = (model1(images) + model2(images) + model3(images) + model4(images) + model5(images)) \
#              + (model1(images) + model2(images) + model3(images) + model4(images) + model5(images)) \
#              + (model1(images) + model2(images) + model3(images) + model4(images) + model5(images)) \
#              + (model1(images) + model2(images) + model3(images) + model4(images) + model5(images)) 
        
#         pred = torch.softmax(pred,1).cpu().detach().numpy()[:,1]
    
#         test_pred[i*bs: (i+1)*bs] = pred

In [None]:
print(preds[:10])

sample["target"] = preds.cpu().detach().numpy()

sample.to_csv('submission.csv',index=False)

In [None]:
pd.DataFrame(list_results).to_csv("metrics.csv", index=False)