# Melanoma Classifier

* Written for the Manning Live Project - ["Semi supervised deep learning with gans for melanoma detection"](https://liveproject.manning.com/project/146/29/semi-supervised-deep-learning-with-gans-for-melanoma-detection)
* Contains 3 models - base line, augmented model and transfer learning model

In [1]:
import os
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from operator import itemgetter
import torch
from torch import nn, optim
import numpy as np
import re
from PIL import Image
import random


dataset_path = "../Datasets/MelanomaDetection/"
train_dataset_path = dataset_path + "labeled"
test_dataset_path = dataset_path + "test"

## Download data to Google Colab

* Download data from Google Drive to local Google Colab disk
* Allows the code to pickup the data as if it is running locally

In [None]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/GDrive')

# Adjust data set path to match where the data has been loaded
dataset_path = '/GDrive/MyDrive/Datasets/MelanomaDetection/"
train_dataset_path = dataset_path + "labeled"
test_dataset_path = dataset_path + "test"


## Allow it to run on the GPU

* Code below detects if a GPU is available - if it is will run model on GPU
* Code currently does not run on a GPU as apply_ is not supported

In [2]:
if torch.cuda.is_available():
   print("Notebook is configured to run on the GPU!")
else:
   print("Notebook is currently running on the CPU.")

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

Notebook is currently running on the CPU.


## Dataloading code

In [3]:
def data_loader(batch_size, train_transform, test_transform):
    train_dataset = MelanomaDataset(extract_label, train_dataset_path, transform=train_transform)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    test_dataset = MelanomaDataset(extract_label, test_dataset_path, transform=test_transform)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader


def array_to_dictionary(array):
    return {k: v for k, v in enumerate(array)}


def extract_label(s):
    if re.findall(".*_1.jpg", s):
        return 1
    elif re.findall(".*_0.jpg", s):
        return 0
    else:
        raise RuntimeError("Invalid filename format: " + s)


class MelanomaDataset(Dataset):
    """Unlabelled Melanoma datasets"""

    def __init__(self, label_extractor, dir_path, transform=None):
        self.label_extractor = label_extractor
        self.dir_path = dir_path
        self.transform = transform
        file_list = filter(lambda e: e != ".DS_Store", os.listdir(dir_path))
        self.file_list = array_to_dictionary(file_list)
        self.len = len(self.file_list)

    def __len__(self):
        return self.len

    def __getitem__(self, index):
        if index >= self.len:
            raise IndexError
        else:
            img_name = self.file_list[index]
            full_img_name = os.path.join(self.dir_path, img_name)
            image = Image.open(full_img_name)
            # image = io.read_image(full_img_name)
            # image = image.float()

            if self.transform:
                image = self.transform(image)

            result = {'name': img_name,
                      'image': image}

            if self.label_extractor:
                result['label'] = self.label_extractor(img_name)

            return result

## Training loop and validation

In [4]:
def validate(model, test_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = itemgetter('image', 'label')(data)
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            predicted = outputs.apply_(lambda e: 1 if e > 0.5 else 0)
            total += labels.size(0)
            correct += (predicted.squeeze() == labels).sum().item()

    return correct / total, correct, total


def train(model, criterion, train_loader, test_loader, lr, epochs, momentum):
    # Each iteration of the loader serves up a pair (images, labels)
    # The images are [64, 1, 28, 28] and the labels [64]
    # The batch size is 64 images and the images are 28 x 28.
    losses = []
    test_accuracies = []

    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

    for e in range(epochs):
        print("\nEpocs: ", e + 1)
        model.train()
        running_loss = 0
        for data in train_loader:
            images, labels = itemgetter('image', 'label')(data)
            
            # As data streams off the loader, push it onto the GPU so the
            # calculation happens on the GPU
            images = images.to(device)
            labels = labels.to(device)
            
            # zeros all the gradients of the weights
            optimizer.zero_grad()
            output = model(images)
            loss = criterion(output, labels.float().unsqueeze(1))

            # Calculates all the gradients via backpropagation
            loss.backward()

            # Adjust weights based on the gradients
            optimizer.step()

            running_loss += loss.item()

        loss = running_loss / len(train_loader)
        test_accuracy, test_correct, test_total = validate(model, test_loader)
        print("Loss: ", loss)
        print("Test accuracy:", test_accuracy, ", Correct: ", test_correct, ", Total:", test_total)
        losses.append(loss)
        test_accuracies.append(test_accuracy)

    return losses, test_accuracies

## Models

In [5]:
def create_basic_model():
    class Model(nn.Module):
        def __init__(self):
            super(Model, self).__init__()
            self.conv1 = nn.Conv2d(3, 24, (3, 3))
            self.mp = nn.MaxPool2d((2, 2))
            self.conv2 = nn.Conv2d(24, 48, (3, 3))
            self.flatten = nn.Flatten()
            self.re = nn.ReLU()
            self.l1 = nn.Linear(1728, 28)
            self.dropout = nn.Dropout(0.5)
            self.l2 = nn.Linear(28, 1)
            self.sigmoid = nn.Sigmoid()

        def forward(self, x):
            x = self.conv1(x)
            x = self.re(x)
            x = self.mp(x)

            x = self.conv2(x)
            x = self.re(x)
            x = self.mp(x)

            x = self.flatten(x)
            x = self.l1(x)
            x = self.dropout(x)
            x = self.l2(x)
            x = self.sigmoid(x)

            return x

    model = Model()
    model = model.to(device)
    return model


def create_trained_model():
    class PretrainedModel(nn.Module):
        def __init__(self):
            super(PretrainedModel, self).__init__()
            self.resnet = models.resnet18(pretrained=True)
            for param in self.resnet.parameters():
                param.requires_grad = False

            self.linear = nn.Linear(1000, 1)
            self.sigmoid = nn.Sigmoid()

        def forward(self, x):
            x = self.resnet(x)
            x = self.linear(x)
            x = self.sigmoid(x)
            return x
        
    model = PretrainedModel()
    model = model.to(device)
    return PretrainedModel()


def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


def scale_image(image):
    return image * 256

## Baseline model

* Simple CNN
* Test accuracy around 73%

In [6]:
def run_basic_model(batch_size):
    random.seed(0)
    np.random.seed(0)
    torch.manual_seed(12321)

    lr = 0.001
    momentum = 0
    epochs = 10

    transform = transforms.Compose([transforms.ToTensor(), scale_image])
    train_loader, test_loader = data_loader(batch_size, transform, transform)
    criterion = nn.BCELoss()
    model = create_basic_model()

    _, test_error = train(model, criterion, train_loader, test_loader, lr, epochs, momentum)

    print()
    print("Highest test accuracy:", max(test_error))
    print("Number of epocs:", np.argmax(test_error) + 1)

run_basic_model(32)


Epocs:  1
Loss:  18.509376491819108
Test accuracy: 0.5 , Correct:  300 , Total: 600

Epocs:  2
Loss:  9.840440162590571
Test accuracy: 0.5133333333333333 , Correct:  308 , Total: 600

Epocs:  3
Loss:  0.6630214835916247
Test accuracy: 0.705 , Correct:  423 , Total: 600

Epocs:  4
Loss:  0.5884216342653547
Test accuracy: 0.66 , Correct:  396 , Total: 600

Epocs:  5
Loss:  0.6349138702665057
Test accuracy: 0.555 , Correct:  333 , Total: 600

Epocs:  6
Loss:  0.5798675801072802
Test accuracy: 0.6933333333333334 , Correct:  416 , Total: 600

Epocs:  7
Loss:  0.5631925719124931
Test accuracy: 0.6283333333333333 , Correct:  377 , Total: 600

Epocs:  8
Loss:  0.5961653249604362
Test accuracy: 0.7283333333333334 , Correct:  437 , Total: 600

Epocs:  9
Loss:  0.5702534828867231
Test accuracy: 0.665 , Correct:  399 , Total: 600

Epocs:  10
Loss:  0.5593111557619912
Test accuracy: 0.5333333333333333 , Correct:  320 , Total: 600

Highest test accuracy: 0.7283333333333334
Number of epocs: 8


## Augmented model

* Uses transforms on the input data to increase model accuracy
* Highest accuracy around 76%

In [7]:
def augmentation_transforms():
    rotation = transforms.RandomChoice(
        [transforms.RandomRotation([-3, 3]),
         transforms.RandomRotation([87, 93]),
         transforms.RandomRotation([177, 183]),
         transforms.RandomRotation([267, 273])])

    return transforms.Compose([transforms.RandomHorizontalFlip(),
                               transforms.RandomVerticalFlip(),
                               rotation])


def run_augmented_model(batch_size):
    random.seed(0)
    np.random.seed(0)
    torch.manual_seed(12321)

    lr = 0.001
    momentum = 0.2
    epochs = 200

    base_transform = transforms.Compose([transforms.ToTensor(), scale_image])
    augmentation = augmentation_transforms()
    preprocess = transforms.Compose([base_transform, augmentation])

    train_loader, test_loader = data_loader(batch_size, preprocess, base_transform)

    criterion = nn.BCELoss()
    model = create_basic_model()

    _, test_error = train(model, criterion, train_loader, test_loader, lr, epochs, momentum)

    print()
    print("Highest test accuracy:", max(test_error))
    print("Number of epocs:", np.argmax(test_error) + 1)
    
    
run_augmented_model(32)


Epocs:  1
Loss:  41.45547103881836
Test accuracy: 0.5 , Correct:  300 , Total: 600

Epocs:  2
Loss:  47.32142857142857
Test accuracy: 0.5 , Correct:  300 , Total: 600

Epocs:  3
Loss:  51.339285714285715
Test accuracy: 0.5 , Correct:  300 , Total: 600

Epocs:  4
Loss:  48.28236280168806
Test accuracy: 0.5 , Correct:  300 , Total: 600

Epocs:  5
Loss:  53.62774058750698
Test accuracy: 0.5 , Correct:  300 , Total: 600

Epocs:  6
Loss:  48.66073226928711
Test accuracy: 0.5 , Correct:  300 , Total: 600

Epocs:  7
Loss:  51.52667454310826
Test accuracy: 0.5 , Correct:  300 , Total: 600

Epocs:  8
Loss:  19.03887845788683
Test accuracy: 0.52 , Correct:  312 , Total: 600

Epocs:  9
Loss:  0.9346129042761666
Test accuracy: 0.6133333333333333 , Correct:  368 , Total: 600

Epocs:  10
Loss:  0.7425960898399353
Test accuracy: 0.5216666666666666 , Correct:  313 , Total: 600

Epocs:  11
Loss:  0.6814041478293282
Test accuracy: 0.5516666666666666 , Correct:  331 , Total: 600

Epocs:  12
Loss:  0.590

Loss:  0.45847053612981525
Test accuracy: 0.72 , Correct:  432 , Total: 600

Epocs:  89
Loss:  0.48150940452303204
Test accuracy: 0.6266666666666667 , Correct:  376 , Total: 600

Epocs:  90
Loss:  0.44262742144720896
Test accuracy: 0.7433333333333333 , Correct:  446 , Total: 600

Epocs:  91
Loss:  0.4783282790865217
Test accuracy: 0.7366666666666667 , Correct:  442 , Total: 600

Epocs:  92
Loss:  0.4878224900790623
Test accuracy: 0.7133333333333334 , Correct:  428 , Total: 600

Epocs:  93
Loss:  0.43952041012900217
Test accuracy: 0.6966666666666667 , Correct:  418 , Total: 600

Epocs:  94
Loss:  0.43405922821589876
Test accuracy: 0.6783333333333333 , Correct:  407 , Total: 600

Epocs:  95
Loss:  0.4981179450239454
Test accuracy: 0.7366666666666667 , Correct:  442 , Total: 600

Epocs:  96
Loss:  0.4631384696279253
Test accuracy: 0.7283333333333334 , Correct:  437 , Total: 600

Epocs:  97
Loss:  0.4252978669745581
Test accuracy: 0.745 , Correct:  447 , Total: 600

Epocs:  98
Loss:  0.456

Loss:  0.40792656796319143
Test accuracy: 0.7216666666666667 , Correct:  433 , Total: 600

Epocs:  173
Loss:  0.4385403905596052
Test accuracy: 0.7383333333333333 , Correct:  443 , Total: 600

Epocs:  174
Loss:  0.3593711129256657
Test accuracy: 0.7116666666666667 , Correct:  427 , Total: 600

Epocs:  175
Loss:  0.41929927468299866
Test accuracy: 0.75 , Correct:  450 , Total: 600

Epocs:  176
Loss:  0.4246256394045694
Test accuracy: 0.7033333333333334 , Correct:  422 , Total: 600

Epocs:  177
Loss:  0.4149159405912672
Test accuracy: 0.6583333333333333 , Correct:  395 , Total: 600

Epocs:  178
Loss:  0.43273715887750897
Test accuracy: 0.75 , Correct:  450 , Total: 600

Epocs:  179
Loss:  0.4055129417351314
Test accuracy: 0.71 , Correct:  426 , Total: 600

Epocs:  180
Loss:  0.4037867433258465
Test accuracy: 0.6733333333333333 , Correct:  404 , Total: 600

Epocs:  181
Loss:  0.3779088258743286
Test accuracy: 0.705 , Correct:  423 , Total: 600

Epocs:  182
Loss:  0.37598549681050436
Test 

## Pretrained model

* Starts with ResNet 18 model 
* Freezes the parameters
* Adds a fully connected layer consisting of a single neuron and a sigmoid
* Accuracy around 79-80%

In [9]:
def run_pretrained_model(batch_size):
    random.seed(0)
    np.random.seed(0)
    torch.manual_seed(1)

    base = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
            )])
    augmentation = augmentation_transforms()
    preprocess = transforms.Compose([base, augmentation])

    train_loader, test_loader = data_loader(batch_size, preprocess, base)

    criterion = nn.BCELoss()
    model = create_trained_model()

    lr = 0.001
    momentum = 0.9
    epochs = 3

    _, test_error = train(model, criterion, train_loader, test_loader, lr, epochs, momentum)

    print()
    print("Highest test accuracy:", max(test_error))
    print("Number of epocs:", np.argmax(test_error) + 1)

    
run_pretrained_model(32)


Epocs:  1
Loss:  0.5716858293328967
Test accuracy: 0.715 , Correct:  429 , Total: 600

Epocs:  2
Loss:  0.41775238939694
Test accuracy: 0.7583333333333333 , Correct:  455 , Total: 600

Epocs:  3
Loss:  0.6201600474970681
Test accuracy: 0.7883333333333333 , Correct:  473 , Total: 600

Highest test accuracy: 0.7883333333333333
Number of epocs: 3
