In [None]:
1

In [None]:
%%capture
%%bash
curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
python pytorch-xla-env-setup.py --version nightly --apt-packages libomp5 libopenblas-dev
pip install efficientnet_pytorch torchtoolbox pytorch-lightning

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import cv2
import random
import os

from tqdm.notebook import tqdm
import multiprocessing as mp

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import LambdaLR
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import _LRScheduler

import pytorch_lightning as pl 
from pytorch_lightning import Trainer
from pytorch_lightning.core.lightning import LightningModule
from pytorch_lightning.metrics import AUROC
from pytorch_lightning.metrics.functional import accuracy, auroc
# from pytorch_lightning.logging import TensorBoardLogger
from pytorch_lightning import loggers as pl_loggers
from pytorch_lightning.callbacks.early_stopping import EarlyStopping


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# import torch_xla.core.xla_model as xm
# device = xm.xla_device()

import albumentations as A
from albumentations.pytorch import ToTensorV2

from efficientnet_pytorch import EfficientNet

import warnings
warnings.simplefilter('ignore')
%matplotlib inline

import pytorch_lightning as pl
pl.__version__

In [None]:
BASE = "efficientnet-b0"
EPOCHS = 10
GRAD_ACCUMULATE = 1
BS = 16
p = 0.3
LR_RANGE = [1e-7, 2e-4]

In [None]:
warnings.simplefilter('ignore')
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(42)

## Data

In [None]:
train_tfms = A.Compose([
    A.Cutout(p=p),
    A.RandomRotate90(p=p),
    A.Flip(p=p),
    A.OneOf([
        A.RandomBrightnessContrast(brightness_limit=0.2,
                                   contrast_limit=0.2,
                                   ),
        A.HueSaturationValue(
            hue_shift_limit=20,
            sat_shift_limit=50,
            val_shift_limit=50)
    ], p=p),
    A.OneOf([
        A.IAAAdditiveGaussianNoise(),
        A.GaussNoise(),
    ], p=p),
    A.OneOf([
        A.MedianBlur(blur_limit=3, p=0.1),
        A.Blur(blur_limit=3, p=0.1),
    ], p=p),
    A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=45, p=p),
    A.OneOf([
        A.OpticalDistortion(p=0.3),
        A.GridDistortion(p=0.1),
        A.IAAPiecewiseAffine(p=0.3),
    ], p=p), 
    A.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    ),
    ToTensorV2()
])
    
test_tfms = A.Compose([
    A.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    ),
    ToTensorV2()
])

In [None]:
class MelanomaDataset(Dataset):
    def __init__(self, df: pd.DataFrame, imfolder: str, train: bool = True, transforms = None, meta_features = None):
        """
        Class initialization
        Args:
            df (pd.DataFrame): DataFrame with data description
            imfolder (str): folder with images
            train (bool): flag of whether a training dataset is being initialized or testing one
            transforms: image transformation method to be applied
            meta_features (list): list of features with meta information, such as sex and age
            
        """
        self.df = df
        self.imfolder = imfolder
        self.transforms = transforms
        self.train = train
        self.meta_features = meta_features
        
    def __getitem__(self, index):
        im_path = os.path.join(self.imfolder, self.df.iloc[index]['image_name'] + '.jpg')
        x = cv2.cvtColor(cv2.imread(im_path), cv2.COLOR_BGR2RGB)
#         meta = np.array(self.df.iloc[index][self.meta_features].values, dtype=np.float32)

        if self.transforms:
            x = self.transforms(image=x)['image']
            
        if self.train:
            y = self.df.iloc[index]['target']
#             return (x, meta), y
            return x, y
        else:
#             return (x, meta)
            return x
    
    def __len__(self):
        return len(self.df)

In [None]:
train_df = pd.read_csv('/kaggle/input/jpeg-melanoma-256x256/train.csv')
test_df = pd.read_csv('/kaggle/input/jpeg-melanoma-256x256/test.csv')
train_df = train_df[train_df.tfrecord!=-1]
train_df.target = train_df.target.astype(np.float32)

In [None]:
idx = train_df.tfrecord.unique()
val_idx = np.random.choice(idx, 3, replace=False)
train_idx = np.array([i for i in idx if i not in val_idx])

## Loss

In [None]:
a1 = 1 / train_df["target"].mean()
a2 = 1 / (1 - train_df["target"].mean())

class WeightedFocalLoss(nn.Module):
    "Non weighted version of Focal Loss"
    def __init__(self, a1=1, a2=1, gamma=1.1):
        super().__init__()
        self.a1, self.a2 = a1, a2
        self.gamma = gamma

    def forward(self, inputs, targets):
        inputs = inputs.squeeze()
        targets = targets.squeeze()

        BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction='none')
        at = self.a1 * targets + self.a2 * (1-targets)
        pt = torch.exp(-BCE_loss)
        F_loss = at*(1-pt)**self.gamma * BCE_loss

        return F_loss.mean()

## Model

In [None]:
class Model(LightningModule):
    def __init__(self, loss_params, base=BASE, freeze=True):
        super().__init__()

        # EfficientNet
        self.base = EfficientNet.from_pretrained(base)
        
        if freeze:
            for p in self.base.parameters(): p.requires_grad=False
        
        # Replace last layer
        self.fc = nn.Linear(self.base._fc.in_features, 1)
        self.loss = WeightedFocalLoss(*loss_params)
        self.auroc = AUROC()
    
    def unfreeze(self):
        for p in self.base.parameters(): p.requires_grad=True
        for p in self.base._fc.parameters(): p.requires_grad=False
    
    def forward(self, x):
        pool = F.adaptive_avg_pool2d(self.base.extract_features(x), 1)
        pool = pool.view(x.shape[0], -1)
        return self.fc(pool)
    
    def configure_optimizers(self):
        params = [p for p in self.parameters() if p.requires_grad]
        optimizer = torch.optim.Adam(params, lr=1e-3)
        return optimizer
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self.forward(x)
        loss = self.loss(y_pred, y)
        
#         return loss
        result = pl.EvalResult(checkpoint_on=loss)
        result.log("val_loss", loss, prog_bar=True)
#         result.log("val_auc", self.auroc(y_pred.squeeze(), y.squeeze()), prog_bar=True)

        return result

    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self.forward(x)
        loss = self.loss(y_pred, y)
        
#         return loss
    
        result = pl.TrainResult(loss)
        result.log("train_loss", loss)
#         result.log("train_auc", self.auroc(y_pred.squeeze(), y.squeeze()), prog_bar=True)
        return result
    

model = Model(loss_params=(a1, a2))

In [None]:
class MelanomaData():
    def __init__(self, train_df, train_idx, val_idx, train_tfms, test_tfms, batch_size=BS):
        self.train_ds = MelanomaDataset(
            train_df.loc[train_df.tfrecord.isin(train_idx)].reset_index(drop=True),
            '/kaggle/input/jpeg-melanoma-256x256/train/', 
            train=True, 
            transforms=train_tfms
        )

        self.valid_ds = MelanomaDataset(
            train_df.loc[train_df.tfrecord.isin(val_idx)].reset_index(drop=True),
            '/kaggle/input/jpeg-melanoma-256x256/train/', 
            train=True, 
            transforms=test_tfms
        )
        
        self.batch_size = batch_size
    
    def train_dataloader(self):
        return DataLoader(self.train_ds, self.batch_size, shuffle=True, drop_last=True)
    
    def val_dataloader(self):
        return DataLoader(self.valid_ds, self.batch_size, drop_last=True)
    
data = MelanomaData(train_df, train_idx, val_idx, train_tfms, test_tfms, batch_size=BS)

In [None]:
# model.hparams.lr = 2e-3
# early_stopping = EarlyStopping('val_loss')
trainer = Trainer(tpu_cores=8, max_epochs=1, val_check_interval=0.5, accumulate_grad_batches=GRAD_ACCUMULATE)
trainer.fit(model, data.train_dataloader(), data.val_dataloader())

In [None]:
ys = []
y_preds = []
model = model.to(device)
model.eval()
with torch.no_grad():
    for x, y in tqdm(data.val_dataloader()):
        x, y = x.to(device), y.to(device)
        y_pred = model(x)
        ys.extend(y)
        y_preds.extend(y_pred)

# y_preds = torch.stack(y_preds)
# ys = torch.stack(ys)
# auc = auroc(y_preds.squeeze(), ys.squeeze())
# print(f"Initial AUC is {auc:.4f}")

In [None]:
y_preds = torch.stack(y_preds)
ys = torch.stack(ys)
# auc = auroc(y_preds.squeeze(), ys.squeeze())

In [None]:
torch.stack(ys)[:10]

In [None]:
weights = []
for param in model.parameters():
    weights.append(param.clone())

In [None]:
model.unfreeze()

In [None]:
def get_optimizer(model, lr_range=LR_RANGE):
    blocks = []

    for n,p in model.base.named_parameters():
        if p.requires_grad:
            if n.startswith("_blocks."):
                n = ".".join(n.split(".", maxsplit=2)[:2])
            else:
                n = n.split(".", maxsplit=1)[0]
            if n not in blocks:
                blocks.append(n)

    blocks = ["base."+block for block in blocks]
    blocks += ["fc"]
    blocks = [block+"." for block in blocks]

    mul = (lr_range[1] / lr_range[0]) ** (1/(len(blocks)-1))
    lrs = [lr_range[0]*mul**i for i in range(len(blocks))]

    param_list = []
    for lr, block in zip(lrs, blocks):
        param_list.extend([{'params':p ,'lr':lr} for n,p in model.named_parameters() if n.startswith(block)])
    optimizer = torch.optim.Adam(param_list)
    
    return optimizer

In [None]:
class OneCycleScheduler(_LRScheduler): 
    def __init__(self, optimizer, n_rounds, max_beta=0.95, min_beta=0.85, div_factor=10.0): 
        self.optimizer = optimizer
        self.max_lr = [grp['lr'] for grp in optimizer.param_groups]
        self.min_lr = [lr/div_factor for lr in self.max_lr]
        # initialise lrs
        for grp, lr in zip(self.optimizer.param_groups, self.min_lr):
            grp['lr'] = lr
            grp['betas'] = (max_beta, 0.999)
        
        self.min_beta = min_beta
        self.max_beta = max_beta
        
        self.cutoff1 = int(n_rounds * 0.3)
        if n_rounds < 20:
            self.cutoff2 = n_rounds
        else:
            self.cutoff2 = int(n_rounds * 0.95)

        self.k = 0

        gaps = [max - min for max, min in zip(self.max_lr, self.min_lr)]
        gaps2 = [min - min/100 for min in self.min_lr]
        # movement of learning rate and momentum
        self.step_up_lr = [gap / self.cutoff1 for gap in gaps]
        self.step_down_lr1 = [gap / (self.cutoff2 - self.cutoff1) for gap in gaps]
        self.step_down_lr2 = [gap / (n_rounds - self.cutoff2 + 1e-8) for gap in gaps2]
        self.step_down_beta = (max_beta - min_beta) / self.cutoff1
        self.step_up_beta = (max_beta - min_beta) / (self.cutoff2 - self.cutoff1)

    def step(self):
        self.k += 1
        if self.k <= self.cutoff1:
            for grp, d_lr in zip(self.optimizer.param_groups, self.step_up_lr):
                grp['lr'] += d_lr
                grp['betas'] = (grp['betas'][0] - self.step_down_beta, 0.999)
        elif self.k <= self.cutoff2:
            for grp, d_lr in zip(self.optimizer.param_groups, self.step_down_lr1):
                grp['lr'] -= d_lr
                grp['betas'] = (grp['betas'][0] + self.step_up_beta, 0.999)
        else:
            for grp, d_lr in zip(self.optimizer.param_groups, self.step_down_lr2):
                grp['lr'] -= d_lr

In [None]:
steps_per_epoch = len(model.train_dataloader())

In [None]:
class UnfrozenModel(Model):
    def __init__(self, loss_params, base=BASE, freeze=False):
        super().__init__(loss_params, base, freeze)
        for p in self.base._fc.parameters(): p.requires_grad = False
        
    def configure_optimizers(self):
        optimizer = get_optimizer(self)
        one_cycle_scheduler = OneCycleScheduler(optimizer, EPOCHS * steps_per_epoch // GRAD_ACCUMULATE)
        scheduler = {'scheduler': one_cycle_scheduler, "interval": "step"}

        return [optimizer], [scheduler]

model2 = UnfrozenModel(loss_params=(a1, a2))

In [None]:
for p, w in zip(model2.parameters(), weights): p.data = w

In [None]:
# early_stopping = EarlyStopping('val_loss')
trainer = Trainer(gpus=1, max_epochs=EPOCHS, accumulate_grad_batches=GRAD_ACCUMULATE)
trainer.fit(model2)

In [None]:
ys = []
y_preds = []
model2 = model2.to(device)
model2.eval()
with torch.no_grad():
    for x, y in tqdm(model2.val_dataloader()):
        x, y = x.to(device), y.to(device)
        y_pred = model2(x)
        ys.extend(y)
        y_preds.extend(y_pred)

y_preds = torch.stack(y_preds)
ys = torch.stack(ys)
auc = auroc(y_preds.squeeze(), ys.squeeze())
print(f"Initial AUC is {auc:.4f}")

In [None]:
test = MelanomaDataset(test_df,
   imfolder='/kaggle/input/jpeg-melanoma-256x256/test/', 
   train=False,
   transforms=test_tfms
                      )
test_loader = DataLoader(dataset=test, batch_size=BS, shuffle=False, num_workers=4)

In [None]:
y_preds = []
model2 = model2.to(device)
model2.eval()
with torch.no_grad():
    for x in tqdm(test_loader):
        x = x.to(device)
        y_pred = model2(x)
        y_preds.extend(y_pred)
        
test_df['target'] = torch.sigmoid(torch.stack(y_preds)).cpu().numpy()
test_df[['image_name', 'target']].to_csv('submission.csv', index=False)