In [1]:
!pip install wtfml==0.0.2
!pip install pretrainedmodels

Collecting wtfml==0.0.2
  Downloading wtfml-0.0.2-py3-none-any.whl (8.1 kB)
Installing collected packages: wtfml
Successfully installed wtfml-0.0.2
You should consider upgrading via the '/opt/conda/bin/python3.7 -m pip install --upgrade pip' command.[0m
Collecting pretrainedmodels
  Downloading pretrainedmodels-0.7.4.tar.gz (58 kB)
[K     |████████████████████████████████| 58 kB 1.6 MB/s 
Building wheels for collected packages: pretrainedmodels
  Building wheel for pretrainedmodels (setup.py) ... [?25l- \ done
[?25h  Created wheel for pretrainedmodels: filename=pretrainedmodels-0.7.4-py3-none-any.whl size=60962 sha256=b8fc3ba908434900423ed3582365b5ee3e50f7cc77921dd931783d487c9777e9
  Stored in directory: /root/.cache/pip/wheels/ed/27/e8/9543d42de2740d3544db96aefef63bda3f2c1761b3334f4873
Successfully built pretrainedmodels
Installing collected packages: pretrainedmodels
Successfully installed pretrainedmodels-0.7.4
You should consider upgrading via the '/opt/conda

In [2]:
!pip install efficientnet_pytorch

Collecting efficientnet_pytorch
  Downloading efficientnet_pytorch-0.6.3.tar.gz (16 kB)
Building wheels for collected packages: efficientnet-pytorch
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l- \ done
[?25h  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.6.3-py3-none-any.whl size=12419 sha256=9950e1a37afd950f6d7f1a7a571c6723417bd673c1774b79009ae0f2476d7c68
  Stored in directory: /root/.cache/pip/wheels/90/6b/0c/f0ad36d00310e65390b0d4c9218ae6250ac579c92540c9097a
Successfully built efficientnet-pytorch
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.6.3
You should consider upgrading via the '/opt/conda/bin/python3.7 -m pip install --upgrade pip' command.[0m


In [3]:
import os
import torch
import albumentations

import numpy as np
import pandas as pd

import torch.nn as nn
from sklearn import metrics
from sklearn import model_selection
from torch.nn import functional as F

from wtfml.utils import EarlyStopping
from wtfml.engine import Engine
from wtfml.data_loaders.image import ClassificationLoader
import efficientnet_pytorch
import pretrainedmodels

In [4]:
class SEResnext50_32x4d(nn.Module):
    def __init__(self, pretrained='imagenet'):
        super(SEResnext50_32x4d, self).__init__()
        
        self.base_model = pretrainedmodels.__dict__[
            "se_resnext50_32x4d"
        ](pretrained=None)
        if pretrained is not None:
            self.base_model.load_state_dict(
                torch.load(
                    "../input/pretrained-model-weights-pytorch/se_resnext50_32x4d-a260b3a4.pth"
                )
            )

        self.l0 = nn.Linear(2048, 1)
    
    def forward(self, image, targets):
        batch_size, _, _, _ = image.shape
        
        x = self.base_model.features(image)
        x = F.adaptive_avg_pool2d(x, 1).reshape(batch_size, -1)
        
        out = self.l0(x)
        loss = nn.BCEWithLogitsLoss()(out, targets.view(-1, 1).type_as(x))

        return out, loss

In [5]:
class EfficientNet(nn.Module):
    def __init__(self):
        super(EfficientNet, self).__init__()
        self.base_model = efficientnet_pytorch.EfficientNet.from_pretrained(
            'efficientnet-b4'
        )
        self.base_model._fc = nn.Linear(
            in_features=1792, 
            out_features=1, 
            bias=True
        )
        
    def forward(self, image, targets):
        out = self.base_model(image)
        loss = nn.BCEWithLogitsLoss()(out, targets.view(-1, 1).type_as(out))
        return out, loss

In [6]:
# create folds
df = pd.read_csv("../input/siim-isic-melanoma-classification/train.csv")
df["kfold"] = -1    
df = df.sample(frac=1).reset_index(drop=True)
y = df.target.values
kf = model_selection.StratifiedKFold(n_splits=5)

for f, (t_, v_) in enumerate(kf.split(X=df, y=y)):
    df.loc[v_, 'kfold'] = f

df.to_csv("train_folds.csv", index=False)

In [7]:
def train(fold):
    training_data_path = "../input/siic-isic-224x224-images/train/"
    df = pd.read_csv("/kaggle/working/train_folds.csv")
    device = "cuda"
    epochs = 50
    train_bs = 32
    valid_bs = 16

    df_train = df[df.kfold != fold].reset_index(drop=True)
    df_valid = df[df.kfold == fold].reset_index(drop=True)

#     model = SEResnext50_32x4d(pretrained="imagenet")
    model = EfficientNet()
    model.to(device)

    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)
    train_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True),
            albumentations.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=15),
            albumentations.Flip(p=0.5)
        ]
    )

    valid_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True)
        ]
    )

    train_images = df_train.image_name.values.tolist()
    train_images = [os.path.join(training_data_path, i + ".png") for i in train_images]
    train_targets = df_train.target.values

    valid_images = df_valid.image_name.values.tolist()
    valid_images = [os.path.join(training_data_path, i + ".png") for i in valid_images]
    valid_targets = df_valid.target.values

    train_dataset = ClassificationLoader(
        image_paths=train_images,
        targets=train_targets,
        resize=None,
        augmentations=train_aug,
    )

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=train_bs, shuffle=True, num_workers=4
    )

    valid_dataset = ClassificationLoader(
        image_paths=valid_images,
        targets=valid_targets,
        resize=None,
        augmentations=valid_aug,
    )

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=valid_bs, shuffle=False, num_workers=4
    )

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        patience=3,
        threshold=0.001,
        mode="max"
    )

    es = EarlyStopping(patience=5, mode="max")

    for epoch in range(epochs):
        train_loss = Engine.train(train_loader, model, optimizer, device=device)
        predictions, valid_loss = Engine.evaluate(
            valid_loader, model, device=device
        )
        predictions = np.vstack((predictions)).ravel()
        auc = metrics.roc_auc_score(valid_targets, predictions)
        print(f"Epoch = {epoch}, AUC = {auc}")
        scheduler.step(auc)

        es(auc, model, model_path=f"model_fold_{fold}.bin")
        if es.early_stop:
            print("Early stopping")
            break

In [8]:
def predict(fold):
    test_data_path = "../input/siic-isic-224x224-images/test/"
    df = pd.read_csv("../input/siim-isic-melanoma-classification/test.csv")
    device = "cuda"
    model_path=f"model_fold_{fold}.bin"

    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)
    aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True)
        ]
    )

    images = df.image_name.values.tolist()
    images = [os.path.join(test_data_path, i + ".png") for i in images]
    targets = np.zeros(len(images))

    test_dataset = ClassificationLoader(
        image_paths=images,
        targets=targets,
        resize=None,
        augmentations=aug,
    )

    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=16, shuffle=False, num_workers=4
    )

#     model = SEResnext50_32x4d(pretrained=None)
    model = EfficientNet()
    model.load_state_dict(torch.load(model_path))
    model.to(device)

    predictions = Engine.predict(test_loader, model, device=device)
    predictions = np.vstack((predictions)).ravel()

    return predictions

In [9]:
train(0)
train(1)
train(2)
train(3)
train(4)

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth" to /root/.cache/torch/checkpoints/efficientnet-b4-6ed6700e.pth


HBox(children=(FloatProgress(value=0.0, max=77999237.0), HTML(value='')))


Loaded pretrained weights for efficientnet-b4


100%|██████████| 829/829 [05:41<00:00,  2.42it/s, loss=0.1]
100%|██████████| 415/415 [00:33<00:00, 12.51it/s, loss=0.0755]
  0%|          | 0/829 [00:00<?, ?it/s]

Epoch = 0, AUC = 0.8569935382041696
Validation score improved (-inf --> 0.8569935382041696). Saving model!


100%|██████████| 829/829 [05:40<00:00,  2.43it/s, loss=0.0654]
100%|██████████| 415/415 [00:31<00:00, 13.10it/s, loss=0.0738]
  0%|          | 0/829 [00:00<?, ?it/s]

Epoch = 1, AUC = 0.8646423820797765
Validation score improved (0.8569935382041696 --> 0.8646423820797765). Saving model!


100%|██████████| 829/829 [05:40<00:00,  2.43it/s, loss=0.0568]
100%|██████████| 415/415 [00:32<00:00, 12.94it/s, loss=0.0715]
  0%|          | 0/829 [00:00<?, ?it/s]

Epoch = 2, AUC = 0.8826030492953215
Validation score improved (0.8646423820797765 --> 0.8826030492953215). Saving model!


100%|██████████| 829/829 [05:39<00:00,  2.44it/s, loss=0.0473]
100%|██████████| 415/415 [00:32<00:00, 12.80it/s, loss=0.0795]
  0%|          | 0/829 [00:00<?, ?it/s]

Epoch = 3, AUC = 0.8453620430882683
EarlyStopping counter: 1 out of 5


100%|██████████| 829/829 [05:40<00:00,  2.44it/s, loss=0.0402]
100%|██████████| 415/415 [00:32<00:00, 12.76it/s, loss=0.0833]
  0%|          | 0/829 [00:00<?, ?it/s]

Epoch = 4, AUC = 0.8884227361720064
Validation score improved (0.8826030492953215 --> 0.8884227361720064). Saving model!


100%|██████████| 829/829 [05:39<00:00,  2.44it/s, loss=0.03]
100%|██████████| 415/415 [00:31<00:00, 13.23it/s, loss=0.0955]
  0%|          | 0/829 [00:00<?, ?it/s]

Epoch = 5, AUC = 0.8342223062610219
EarlyStopping counter: 1 out of 5


100%|██████████| 829/829 [05:39<00:00,  2.44it/s, loss=0.0234]
100%|██████████| 415/415 [00:31<00:00, 13.12it/s, loss=0.0966]
  0%|          | 0/829 [00:00<?, ?it/s]

Epoch = 6, AUC = 0.8406650620508356
EarlyStopping counter: 2 out of 5


100%|██████████| 415/415 [00:31<00:00, 13.30it/s, loss=0.0736]
  0%|          | 0/829 [00:00<?, ?it/s]

Epoch = 1, AUC = 0.8523874105350151
EarlyStopping counter: 1 out of 5


100%|██████████| 829/829 [05:39<00:00,  2.44it/s, loss=0.0485]
100%|██████████| 415/415 [00:32<00:00, 12.80it/s, loss=0.0806]
  0%|          | 0/829 [00:00<?, ?it/s]

Epoch = 3, AUC = 0.87248425257336
EarlyStopping counter: 1 out of 5


100%|██████████| 829/829 [05:40<00:00,  2.44it/s, loss=0.102]
100%|██████████| 829/829 [05:39<00:00,  2.44it/s, loss=0.0379]
100%|██████████| 829/829 [05:38<00:00,  2.45it/s, loss=0.0265]
100%|██████████| 415/415 [00:32<00:00, 12.77it/s, loss=0.0831]
  0%|          | 0/829 [00:00<?, ?it/s]

Epoch = 6, AUC = 0.9121935395752238
Validation score improved (0.907912155453643 --> 0.9121935395752238). Saving model!


100%|██████████| 829/829 [05:39<00:00,  2.44it/s, loss=0.104]
100%|██████████| 829/829 [05:38<00:00,  2.45it/s, loss=0.0398]
100%|██████████| 415/415 [00:31<00:00, 12.99it/s, loss=0.0678]
  0%|          | 0/829 [00:00<?, ?it/s]

Epoch = 0, AUC = 0.8830722477003976
Validation score improved (-inf --> 0.8830722477003976). Saving model!


100%|██████████| 829/829 [05:40<00:00,  2.44it/s, loss=0.0642]
100%|██████████| 829/829 [05:39<00:00,  2.44it/s, loss=0.0227]
100%|██████████| 415/415 [00:32<00:00, 12.86it/s, loss=0.1]
  0%|          | 0/829 [00:00<?, ?it/s]

Epoch = 6, AUC = 0.8693402991190331
EarlyStopping counter: 4 out of 5


 18%|█▊        | 150/829 [01:02<04:36,  2.45it/s, loss=0.0211]

In [10]:
p1 = predict(0)
p2 = predict(1)
p3 = predict(2)
p4 = predict(3)
p5 = predict(4)

Loaded pretrained weights for efficientnet-b4


100%|██████████| 687/687 [00:53<00:00, 12.85it/s]


Loaded pretrained weights for efficientnet-b4


100%|██████████| 687/687 [00:51<00:00, 13.31it/s]


Loaded pretrained weights for efficientnet-b4


100%|██████████| 687/687 [00:51<00:00, 13.42it/s]


Loaded pretrained weights for efficientnet-b4


100%|██████████| 687/687 [00:50<00:00, 13.55it/s]


Loaded pretrained weights for efficientnet-b4


100%|██████████| 687/687 [00:51<00:00, 13.43it/s]


In [11]:
predictions = (p1 + p2 + p3 + p4 + p5) / 5
sample = pd.read_csv("../input/siim-isic-melanoma-classification/sample_submission.csv")
sample.loc[:, "target"] = predictions
sample.to_csv("submission.csv", index=False)