# Dermatologist AI - Skin Cancer Detector

## Dataloader

In [None]:
import os
from PIL import Image
import torchvision.transforms as transforms
from torchvision import datasets
from torch.utils.data.sampler import SubsetRandomSampler

#Check if GPU is available
use_cuda = torch.cuda.is_available()

# how many samples per batch to load
batch_size = 32

# Declare augmentation transformations which will be applied to all input images of the trainset
transform_augm = transforms.Compose([
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    transforms.RandomHorizontalFlip(),
    transforms.RandomAffine(30),
    transforms.RandomResizedCrop(224, scale=(0.75, 1.0), ratio=(0.85, 1.15)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
    ])

# Declare transformations which will be applied to all input images of the test and validationset
transform = transforms.Compose([
    transforms.Resize((230,230)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
    ])

#Declare datasets
data = {}
data['train'] = datasets.ImageFolder('data/train/',transform=transform_augm)
data['valid'] = datasets.ImageFolder('data/valid/',transform=transform)
data['test'] = datasets.ImageFolder('data/test/',transform=transform)

#Declare loader
loaders = {}
loaders['train'] = torch.utils.data.DataLoader(data_scratch['train'], shuffle=True, batch_size=batch_size)
loaders['valid'] = torch.utils.data.DataLoader(data_scratch['valid'], shuffle=True, batch_size=batch_size)
loaders['test'] = torch.utils.data.DataLoader(data_scratch['test'], shuffle=False, batch_size=batch_size)

## Data Inspection

In [None]:
import torch.nn as nn
import torch.nn.functional as F

# define the CNN architecture
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        #Activation function
        self.leakyRelu = nn.LeakyReLU(0.1)
        
        self.conv1_1 = nn.Conv2d(3, 16, 5, padding=2, stride=1)
        self.conv1_bn = nn.BatchNorm2d(16)
        self.pool1_1 = nn.MaxPool2d(2, 2)
        
        self.conv2_1 = nn.Conv2d(16, 32, 5, padding=2,  stride=1)
        self.conv2_bn = nn.BatchNorm2d(32)
        self.pool2_1 = nn.MaxPool2d(2, 2)
        
        self.conv3_1 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3_bn = nn.BatchNorm2d(64)
        self.pool3_1 = nn.MaxPool2d(2, 2)
        
        self.conv4_1 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv4_bn = nn.BatchNorm2d(128)
        self.pool4_1 = nn.MaxPool2d(2, 2)
        
        self.conv5_1 = nn.Conv2d(128, 256, 3, padding=1)
        self.conv5_bn = nn.BatchNorm2d(256)
        self.pool5_1 = nn.MaxPool2d(2, 2)
        
        self.fc1 = nn.Linear(7*7*256, 200)
        self.fc1_bn = nn.BatchNorm1d(200)
        self.dropoutfc = nn.Dropout(0.2)
        self.fc2 = nn.Linear(200, 133)
    
    def forward(self, x):
        ## Define forward behavior
        x = self.leakyRelu(self.conv1_1(x))
        x = self.conv1_bn(x)
        x = self.pool1_1(x)
        
        x = self.leakyRelu(self.conv2_1(x))
        x = self.conv2_bn(x)
        x = self.pool2_1(x)
        
        x = self.leakyRelu(self.conv3_1(x))
        x = self.conv3_bn(x)
        x = self.pool3_1(x)
        
        x = self.leakyRelu(self.conv4_1(x))
        x = self.pool4_1(x)
        x = self.conv4_bn(x)
        
        x = self.leakyRelu(self.conv5_1(x))
        x = self.pool5_1(x)
        x = self.conv5_bn(x)
        
        x = x.view(-1,7*7*256)
        x = self.leakyRelu(self.fc1(x))
        x = self.dropoutfc(x)
        x = self.leakyRelu(self.fc2(x))
        
        return x


# instantiate the CNN
model_scratch = Net()

# move tensors to GPU if CUDA is available
if use_cuda:
    model_scratch.cuda()

## Loss Function and Optimizer

In [None]:
import torch.optim as optim

#loss function
criterion = nn.CrossEntropyLoss()

#optimizer
optimizer = optim.Adam(model_scratch.parameters(), lr=0.001)

## Train and Validate the Model

In [None]:
import os.path
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf 
    
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):

            # move to GPU            
            if use_cuda:
                data, target = data.cuda(), target.cuda()
                
            ## find the loss and update the model parameters accordingly
            # clear the gradients of all optimized variables
            optimizer.zero_grad()
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the batch loss
            loss = criterion(output, target)
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # perform a single optimization step (parameter update)
            optimizer.step()
            # update training loss
            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            
        ######################    
        # validate the model #
        ######################
        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            
            output = model(data)
            loss = criterion(output, target)
            # update the average validation loss
            valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
            
        # print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, 
            train_loss,
            valid_loss
            ))
        
        ## save the model if validation loss has decreased
        if valid_loss <= valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
            valid_loss_min,
            valid_loss))
            torch.save(model.state_dict(), save_path)
            valid_loss_min = valid_loss
            
    # return trained model
    return model

#Continiue Training if file already exists
if os.path.isfile('model.pt') :
    model_scratch.load_state_dict(torch.load('model.pt'))
# train the model
model = train(10, loaders, model_scratch, optimizer, criterion, use_cuda, 'model.pt')
# load the model that got the best validation accuracy
model.load_state_dict(torch.load('model.pt'))

## Test Final Model

In [None]:
def test(loaders, model, criterion, use_cuda):

    # monitor test loss and accuracy
    test_loss = 0.
    correct = 0.
    total = 0.

    model.eval()
    for batch_idx, (data, target) in enumerate(loaders['test']):
        # move to GPU
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update average test loss 
        test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
        # convert output probabilities to predicted class
        pred = output.data.max(1, keepdim=True)[1]
        # compare predictions to true label
        correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
        total += data.size(0)
            
    print('Test Loss: {:.6f}\n'.format(test_loss))

    print('\nTest Accuracy: %2d%% (%2d/%2d)' % (
        100. * correct / total, correct, total))

test(loaders, model, criterion, use_cuda)