# Melanoma Classifier

* Written for the Manning Live Project - ["Semi supervised deep learning with gans for melanoma detection"](https://liveproject.manning.com/project/146/29/semi-supervised-deep-learning-with-gans-for-melanoma-detection)
* Contains 3 models - base line, augmented model and transfer learning model

In [1]:
import os
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
from torch.utils import data
import matplotlib.pyplot as plt
from operator import itemgetter
import torch
from torch import nn, optim
import numpy as np
import re
from PIL import Image
import random


dataset_path = "../Datasets/MelanomaDetection/"
train_dataset_path = dataset_path + "labeled"
test_dataset_path = dataset_path + "test"

## Download data to Google Colab

* Download data from Google Drive to local Google Colab disk
* Allows the code to pickup the data as if it is running locally

In [None]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/GDrive')

# Adjust data set path to match where the data has been loaded
dataset_path = '/GDrive/MyDrive/Datasets/MelanomaDetection/"
train_dataset_path = dataset_path + "labeled"
test_dataset_path = dataset_path + "test"


## Allow it to run on the GPU

* Code below detects if a GPU is available - if it is will run model on GPU
* Code currently does not run on a GPU as apply_ is not supported

In [2]:
if torch.cuda.is_available():
   print("Notebook is configured to run on the GPU!")
else:
   print("Notebook is currently running on the CPU.")

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

Notebook is currently running on the CPU.


## Dataloading code

In [3]:
def get_splits(dataset, percentage_train):
    len_train_set = len(dataset)
    train_set = int(len_train_set*percentage_train)
    val_set = len_train_set - train_set
    return train_set, val_set


def data_loader(batch_size, train_transform, test_transform):
    train_dataset = MelanomaDataset(extract_label, train_dataset_path, transform=train_transform)
    train_set_len, val_set_len = get_splits(train_dataset, 0.7)
    train_dataset, val_dataset = data.random_split(train_dataset, [train_set_len, val_set_len])
    test_dataset = MelanomaDataset(extract_label, test_dataset_path, transform=test_transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)    
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, test_loader


def array_to_dictionary(array):
    return {k: v for k, v in enumerate(array)}


def extract_label(s):
    if re.findall(".*_1.jpg", s):
        return 1
    elif re.findall(".*_0.jpg", s):
        return 0
    else:
        raise RuntimeError("Invalid filename format: " + s)


class MelanomaDataset(Dataset):
    """Unlabelled Melanoma datasets"""

    def __init__(self, label_extractor, dir_path, transform=None):
        self.label_extractor = label_extractor
        self.dir_path = dir_path
        self.transform = transform
        file_list = filter(lambda e: e != ".DS_Store", os.listdir(dir_path))
        self.file_list = array_to_dictionary(file_list)
        self.len = len(self.file_list)

    def __len__(self):
        return self.len

    def __getitem__(self, index):
        if index >= self.len:
            raise IndexError
        else:
            img_name = self.file_list[index]
            full_img_name = os.path.join(self.dir_path, img_name)
            image = Image.open(full_img_name)
            # image = io.read_image(full_img_name)
            # image = image.float()

            if self.transform:
                image = self.transform(image)

            result = {'name': img_name,
                      'image': image}

            if self.label_extractor:
                result['label'] = self.label_extractor(img_name)

            return result

## Training loop and validation

In [4]:
def validate(model, test_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = itemgetter('image', 'label')(data)
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            predicted = outputs.apply_(lambda e: 1 if e > 0.5 else 0)
            total += labels.size(0)
            correct += (predicted.squeeze() == labels).sum().item()

    return correct / total, correct, total


def train(model, criterion, train_loader, val_loader, test_loader, lr, epochs, momentum):
    # Each iteration of the loader serves up a pair (images, labels)
    # The images are [64, 1, 28, 28] and the labels [64]
    # The batch size is 64 images and the images are 28 x 28.
    losses = []
    val_accuracies = []
    test_accuracies = []

    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

    for e in range(epochs):
        print("\nEpocs: ", e + 1)
        model.train()
        running_loss = 0
        for data in train_loader:
            images, labels = itemgetter('image', 'label')(data)
            
            # As data streams off the loader, push it onto the GPU so the
            # calculation happens on the GPU
            images = images.to(device)
            labels = labels.to(device)
            
            # zeros all the gradients of the weights
            optimizer.zero_grad()
            output = model(images)
            loss = criterion(output, labels.float().unsqueeze(1))

            # Calculates all the gradients via backpropagation
            loss.backward()

            # Adjust weights based on the gradients
            optimizer.step()

            running_loss += loss.item()

        loss = running_loss / len(train_loader)
        val_accuracy, _, _ = validate(model, val_loader)
        test_accuracy, _, _ = validate(model, test_loader)
        print("Loss: ", loss)
        print("Val accuracy:", val_accuracy)
        print("Test accuracy:", test_accuracy)        
        losses.append(loss)
        val_accuracies.append(val_accuracy)
        test_accuracies.append(test_accuracy)

    return losses, val_accuracies, test_accuracies

## Models

In [5]:
def create_basic_model():
    class Model(nn.Module):
        def __init__(self):
            super(Model, self).__init__()
            self.conv1 = nn.Conv2d(3, 24, (3, 3))
            self.mp = nn.MaxPool2d((2, 2))
            self.conv2 = nn.Conv2d(24, 48, (3, 3))
            self.flatten = nn.Flatten()
            self.re = nn.ReLU()
            self.l1 = nn.Linear(1728, 28)
            self.dropout = nn.Dropout(0.5)
            self.l2 = nn.Linear(28, 1)
            self.sigmoid = nn.Sigmoid()

        def forward(self, x):
            x = self.conv1(x)
            x = self.re(x)
            x = self.mp(x)

            x = self.conv2(x)
            x = self.re(x)
            x = self.mp(x)

            x = self.flatten(x)
            x = self.l1(x)
            x = self.dropout(x)
            x = self.l2(x)
            x = self.sigmoid(x)

            return x

    model = Model()
    model = model.to(device)
    return model


def create_trained_model():
    class PretrainedModel(nn.Module):
        def __init__(self):
            super(PretrainedModel, self).__init__()
            self.resnet = models.resnet18(pretrained=True)
            for param in self.resnet.parameters():
                param.requires_grad = False

            self.linear = nn.Linear(1000, 1)
            self.sigmoid = nn.Sigmoid()

        def forward(self, x):
            x = self.resnet(x)
            x = self.linear(x)
            x = self.sigmoid(x)
            return x
        
    model = PretrainedModel()
    model = model.to(device)
    return PretrainedModel()


def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


def scale_image(image):
    return image * 256

## Baseline model

* Simple CNN
* Test accuracy around 68%

In [7]:
def run_basic_model(batch_size):
    random.seed(0)
    np.random.seed(0)
    torch.manual_seed(12321)

    lr = 0.0003
    momentum = 0.9
    epochs = 50

    transform = transforms.Compose([transforms.ToTensor(), scale_image])
    train_loader, val_loader, test_loader = data_loader(batch_size, transform, transform)
    criterion = nn.BCELoss()
    model = create_basic_model()

    _, val_errors, test_errors = train(model, criterion, train_loader, val_loader, test_loader, lr, epochs, momentum)

    opt_epochs = np.argmax(val_errors)
    print()
    print("Number of epocs:", opt_epochs+1)
    print("Test accuracy:", test_errors[opt_epochs])

run_basic_model(32)


Epocs:  1
Loss:  4.7849242210388185
Val accuracy: 0.5166666666666667
Test accuracy: 0.5

Epocs:  2
Loss:  50.58139190673828
Val accuracy: 0.5166666666666667
Test accuracy: 0.5

Epocs:  3
Loss:  49.51835250854492
Val accuracy: 0.5166666666666667
Test accuracy: 0.5

Epocs:  4
Loss:  48.44468688964844
Val accuracy: 0.5166666666666667
Test accuracy: 0.5

Epocs:  5
Loss:  22.70093650817871
Val accuracy: 0.48333333333333334
Test accuracy: 0.505

Epocs:  6
Loss:  1.4338564038276673
Val accuracy: 0.5166666666666667
Test accuracy: 0.5

Epocs:  7
Loss:  0.8717656135559082
Val accuracy: 0.48333333333333334
Test accuracy: 0.5133333333333333

Epocs:  8
Loss:  0.7175518989562988
Val accuracy: 0.5333333333333333
Test accuracy: 0.5066666666666667

Epocs:  9
Loss:  0.687485682964325
Val accuracy: 0.5166666666666667
Test accuracy: 0.5416666666666666

Epocs:  10
Loss:  0.7006665587425231
Val accuracy: 0.5
Test accuracy: 0.5416666666666666

Epocs:  11
Loss:  0.6847898602485657
Val accuracy: 0.73333333333

## Augmented model

* Uses transforms on the input data to increase model accuracy
* Highest accuracy around 71%

In [8]:
def augmentation_transforms():
    rotation = transforms.RandomChoice(
        [transforms.RandomRotation([-3, 3]),
         transforms.RandomRotation([87, 93]),
         transforms.RandomRotation([177, 183]),
         transforms.RandomRotation([267, 273])])

    return transforms.Compose([transforms.RandomHorizontalFlip(),
                               transforms.RandomVerticalFlip(),
                               rotation])


def run_augmented_model(batch_size):
    random.seed(0)
    np.random.seed(0)
    torch.manual_seed(12321)

    lr = 0.0003
    momentum = 0.9
    epochs = 100

    base_transform = transforms.Compose([transforms.ToTensor(), scale_image])
    augmentation = augmentation_transforms()
    preprocess = transforms.Compose([base_transform, augmentation])

    train_loader, val_loader, test_loader = data_loader(batch_size, preprocess, base_transform)

    criterion = nn.BCELoss()
    model = create_basic_model()

    _, val_errors, test_errors = train(model, criterion, train_loader, val_loader, test_loader, lr, epochs, momentum)

    opt_epochs = np.argmax(val_errors)
    print()
    print("Number of epocs:", opt_epochs+1)
    print("Test accuracy:", test_errors[opt_epochs])
    
    
run_augmented_model(32)


Epocs:  1
Loss:  6.561855882406235
Val accuracy: 0.55
Test accuracy: 0.5866666666666667

Epocs:  2
Loss:  3.5356847286224364
Val accuracy: 0.5666666666666667
Test accuracy: 0.5833333333333334

Epocs:  3
Loss:  0.7793725490570068
Val accuracy: 0.48333333333333334
Test accuracy: 0.4816666666666667

Epocs:  4
Loss:  0.7813573479652405
Val accuracy: 0.5
Test accuracy: 0.5033333333333333

Epocs:  5
Loss:  0.724374783039093
Val accuracy: 0.6
Test accuracy: 0.6066666666666667

Epocs:  6
Loss:  0.6733001589775085
Val accuracy: 0.5833333333333334
Test accuracy: 0.59

Epocs:  7
Loss:  0.6777583122253418
Val accuracy: 0.6
Test accuracy: 0.59

Epocs:  8
Loss:  0.6635091066360473
Val accuracy: 0.7166666666666667
Test accuracy: 0.6616666666666666

Epocs:  9
Loss:  0.6497506380081177
Val accuracy: 0.6166666666666667
Test accuracy: 0.65

Epocs:  10
Loss:  0.6610543251037597
Val accuracy: 0.6833333333333333
Test accuracy: 0.6316666666666667

Epocs:  11
Loss:  0.6380577564239502
Val accuracy: 0.6666666

Loss:  0.3901583433151245
Val accuracy: 0.7666666666666667
Test accuracy: 0.7433333333333333

Epocs:  89
Loss:  0.45222275257110595
Val accuracy: 0.65
Test accuracy: 0.69

Epocs:  90
Loss:  0.3825852632522583
Val accuracy: 0.7
Test accuracy: 0.7316666666666667

Epocs:  91
Loss:  0.4535839557647705
Val accuracy: 0.75
Test accuracy: 0.6916666666666667

Epocs:  92
Loss:  0.4496836096048355
Val accuracy: 0.7166666666666667
Test accuracy: 0.7216666666666667

Epocs:  93
Loss:  0.4311529934406281
Val accuracy: 0.65
Test accuracy: 0.735

Epocs:  94
Loss:  0.42976906299591067
Val accuracy: 0.6833333333333333
Test accuracy: 0.6933333333333334

Epocs:  95
Loss:  0.3987817943096161
Val accuracy: 0.6333333333333333
Test accuracy: 0.685

Epocs:  96
Loss:  0.5153918564319611
Val accuracy: 0.7833333333333333
Test accuracy: 0.7133333333333334

Epocs:  97
Loss:  0.5484216451644898
Val accuracy: 0.65
Test accuracy: 0.6616666666666666

Epocs:  98
Loss:  0.5126830041408539
Val accuracy: 0.6166666666666667


## Pretrained model

* Starts with ResNet 18 model 
* Freezes the parameters
* Adds a fully connected layer consisting of a single neuron and a sigmoid
* Accuracy around 79-80%

In [9]:
def run_pretrained_model(batch_size):
    random.seed(0)
    np.random.seed(0)
    torch.manual_seed(1)

    base = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
            )])
    augmentation = augmentation_transforms()
    preprocess = transforms.Compose([base, augmentation])

    train_loader, val_loader, test_loader = data_loader(batch_size, preprocess, base)

    criterion = nn.BCELoss()
    model = create_trained_model()

    lr = 0.0003
    momentum = 0.9
    epochs = 10

    _, val_errors, test_errors = train(model, criterion, train_loader, val_loader, test_loader, lr, epochs, momentum)

    opt_epochs = np.argmax(val_errors)
    print()
    print("Number of epocs:", opt_epochs+1)
    print("Test accuracy:", test_errors[opt_epochs])

    
run_pretrained_model(32)


Epocs:  1
Loss:  0.9931839346885681
Val accuracy: 0.5
Test accuracy: 0.5333333333333333

Epocs:  2
Loss:  0.6332376301288605
Val accuracy: 0.6833333333333333
Test accuracy: 0.7433333333333333

Epocs:  3
Loss:  0.5058987736701965
Val accuracy: 0.75
Test accuracy: 0.7766666666666666

Epocs:  4
Loss:  0.46236159205436705
Val accuracy: 0.75
Test accuracy: 0.79

Epocs:  5
Loss:  0.38968234658241274
Val accuracy: 0.7333333333333333
Test accuracy: 0.785

Epocs:  6
Loss:  0.4349686145782471
Val accuracy: 0.7333333333333333
Test accuracy: 0.7916666666666666

Epocs:  7
Loss:  0.33679367899894713
Val accuracy: 0.7
Test accuracy: 0.7883333333333333

Epocs:  8
Loss:  0.41119287014007566
Val accuracy: 0.7333333333333333
Test accuracy: 0.7933333333333333

Epocs:  9
Loss:  0.35547239482402804
Val accuracy: 0.7166666666666667
Test accuracy: 0.7866666666666666

Epocs:  10
Loss:  0.45358372330665586
Val accuracy: 0.7333333333333333
Test accuracy: 0.7933333333333333

Number of epocs: 3
Test accuracy: 0.7