In [1]:
import sys
sys.path.insert(0, '../src/')
from collections import Counter
import os
import numpy as np 
import pandas
import torch
import pytorch_lightning as pl
from torchvision import models, transforms
from config_file import config
import albumentations as A 
from albumentations.pytorch import ToTensorV2
from torch.utils.data.dataset import Dataset
import pandas as pd
from torch.utils.data import DataLoader
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
import torch.nn as nn
from torch.optim.lr_scheduler import ReduceLROnPlateau
import matplotlib.pyplot as plt
%matplotlib inline
# to_pi

In [13]:
args = {
    'phase': 'train',
    'sample': False
}

In [2]:
data = np.load(os.path.join(config['data_dir'], config['data_train']))

In [4]:
checkpoint_callback = ModelCheckpoint(
    filepath=os.getcwd(),
    save_top_k=1,
    verbose=True,
    monitor='val_loss',
    mode='min',
    prefix=''
)

early_stop_callback = EarlyStopping(
   monitor='val_loss',
   min_delta=0.00,
   patience=7,
   verbose=False,
   mode='min',
)

augmentations = {
    'train':A.Compose([
            A.HorizontalFlip(p = 0.5),
            A.OneOf([
            A.RandomContrast(),
            A.RandomGamma(),
            A.RandomBrightness()],
            p = 0.5),            
            A.Normalize(mean=config['mean'], std=config['std']),            
            A.pytorch.ToTensor()],
            p = 1),
    'valid':A.Compose([            
            A.Normalize(mean=config['mean'], std=config['std']),            
            A.pytorch.ToTensor()],
            p = 1)
}

In [5]:
class MelanomaDataset(Dataset):
    def __init__(self, images, labels, transforms):
        self.images = images
        self.labels = labels
        self.transforms = transforms
        
    def __getitem__(self, index):
        image = self.images[index]
        image = self.transforms(image=image)['image']
        label = self.labels[index]*1.0
        return image, label

    def __len__(self):
        return len(self.images)

In [6]:
class MelanomaDetector(pl.LightningModule):
    def __init__(self, train_dl, val_dl, test_dl, criterion):
        super(MelanomaDetector, self).__init__()        
        self.model = self.get_model()
        self.criterion = criterion
        self.learning_rate = 0.0001     
        self.train_dl, self.val_dl, self.test_dl = train_dl, val_dl, test_dl

    def get_model(self):
        model = models.resnet50(pretrained=True)        
        model.fc = nn.Linear(2048, 1)
        return model
        
    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_nb):
        x, y = batch
        y_hat = self.forward(x).view(y.size())
        loss = self.criterion(y_hat, y)
        tensorboard_logs = {'train_loss': loss}
        return {'loss': loss, 'log': tensorboard_logs}

    def validation_step(self, batch, batch_nb):
        x, y = batch
        y_hat = self.forward(x).view(y.size())
        return {'val_loss': self.criterion(y_hat, y)}

    def validation_end(self, outputs):
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        tensorboard_logs = {'val_loss': avg_loss}
        return {'avg_val_loss': avg_loss, 'log': tensorboard_logs}

    def test_step(self, batch, batch_nb):
        x, y = batch
        y_hat = self.forward(x).view(y.size())
        return {'test_loss': self.criterion(y_hat, y)}

    def test_end(self, outputs):
        avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
        logs = {'test_loss': avg_loss}
        return {'avg_test_loss': avg_loss, 'log': logs, 'progress_bar': logs}

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5,patience=3, verbose=True)
        return {"optimizer": optimizer, "lr_scheduler": scheduler, "monitor": "val_loss"}
        

    def train_dataloader(self):
        return self.train_dl

    def val_dataloader(self):
        return self.val_dl

    def test_dataloader(self):
        return self.test_dl

In [7]:
def get_data_loader(train_imgs, train_lbls, valid_imgs, valid_lbls):
    train_imgs = train_imgs[:100]
    train_lbls = train_lbls[:100]
    
    valid_imgs = valid_imgs[:100]
    valid_lbls = valid_lbls[:100]
    
    train_ds = MelanomaDataset(train_imgs, train_lbls, augmentations['train'])
    train_dl = DataLoader(train_ds, batch_size=config['train_bs'], shuffle=True, num_workers=4)

    valid_ds = MelanomaDataset(valid_imgs, valid_lbls, augmentations['valid'])
    valid_dl = DataLoader(valid_ds, batch_size=config['val_bs'], shuffle=False, num_workers=4)

    test_dl = valid_dl

    return train_dl, valid_dl, test_dl

In [8]:
def train(fold):
    df = pd.read_csv(os.path.join(config['data_dir'], 'train_folds.csv'))
    train_idx = df[df.kfold != fold].index.values
    valid_idx = df[df.kfold == fold].index.values

    train_imgs, train_lbls = data[train_idx], df['target'][train_idx].to_list()
    valid_imgs, valid_lbls = data[valid_idx], df['target'][valid_idx].to_list()
    
    cnt = Counter(train_lbls)
    pos_weight = torch.tensor(cnt[0]/cnt[1])
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

    train_dl, valid_dl, test_dl = get_data_loader(train_imgs, train_lbls, valid_imgs, valid_lbls)

    model = MelanomaDetector(train_dl, valid_dl, test_dl, criterion)
    trainer = pl.Trainer(max_epochs=config['epochs'],
                        gpus=1, 
                        check_val_every_n_epoch=2,
                        auto_lr_find=True,
                        callbacks=[early_stop_callback, checkpoint_callback])    
    trainer.fit(model)




In [9]:
def get_data_loader(fold=0):
    
    df = pd.read_csv(os.path.join(config['data_dir'], 'train_folds.csv'))
    train_idx = df[df.kfold != fold].index.values
    valid_idx = df[df.kfold == fold].index.values

    train_imgs, train_lbls = data[train_idx], df['target'][train_idx].to_list()
    valid_imgs, valid_lbls = data[valid_idx], df['target'][valid_idx].to_list()
    
    if args['sample']:
        train_imgs, train_lbls = train_imgs[:100], train_lbls[:100]
        valid_imgs, valid_lbls = valid_imgs[:100], valid_lbls[:100]
    
    train_ds = MelanomaDataset(train_imgs, train_lbls, augmentations['train'])
    train_dl = DataLoader(train_ds, batch_size=config['train_bs'], shuffle=True, num_workers=4)

    valid_ds = MelanomaDataset(valid_imgs, valid_lbls, augmentations['valid'])
    valid_dl = DataLoader(valid_ds, batch_size=config['val_bs'], shuffle=False, num_workers=4)
    
    test_dl = valid_dl

    return train_dl, valid_dl, test_dl

In [16]:
def train(fold):      
    train_dl, valid_dl, test_dl = get_data_loader(fold)
    cnt = Counter(train_dl.dataset.labels)
    pos_weight = torch.tensor(cnt[0]/cnt[1])
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

    model = MelanomaDetector(train_dl, valid_dl, test_dl, criterion)
    trainer = pl.Trainer(max_epochs=config['epochs'],
                        gpus=1, 
                        check_val_every_n_epoch=2,
                        auto_lr_find=True,
                        callbacks=[early_stop_callback, checkpoint_callback])    
    trainer.fit(model)

In [18]:
train(0)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type              | Params
------------------------------------------------
0 | model     | ResNet            | 23.5 M
1 | criterion | BCEWithLogitsLoss | 0     


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

Please use self.log(...) inside the lightningModule instead.

# log on a step or aggregate epoch metric to the logger and/or progress bar
# (inside LightningModule)
self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)
Epoch 0: val_loss was not in top 1



