In [1]:
## All the imports

%matplotlib inline
# python libraties
import os, cv2,itertools
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
from glob import glob
from PIL import Image

# pytorch libraries
import torch
import torch.nn.functional as F
from torch import optim,nn
from torch.autograd import Variable
from torch.utils.data import DataLoader,Dataset
from torchvision import models,transforms,datasets


# Data Loaders and transforms

In [2]:
## data loaders for training, validation, and test sets
## Specify appropriate transforms, and batch_sizes

## AlERT !!!! If you are going to train your data with Inception model, remember to update it with 299x299 images
## Tried inception model, didn't perform very well
## Now trying 
train_transform = transforms.Compose([transforms.RandomRotation(10),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.RandomVerticalFlip(),
                                      transforms.ColorJitter(brightness=0.1, contrast=0.1, hue=0.1),
                                      transforms.Resize(224),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406],
                                                            [0.229, 0.224, 0.225])])

valid_transform = transforms.Compose([transforms.Resize(224),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406],
                                                            [0.229, 0.224, 0.225])])

test_transform = transforms.Compose([transforms.Resize(224),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406],
                                                            [0.229, 0.224, 0.225])])


root_dir = 'data'

train_data = datasets.ImageFolder(os.path.join(root_dir, 'train'), transform=train_transform)
valid_data = datasets.ImageFolder(os.path.join(root_dir, 'valid'), transform=valid_transform)
test_data = datasets.ImageFolder(os.path.join(root_dir, 'test'), transform=test_transform)

train_loader = DataLoader(train_data, batch_size=10, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=10, shuffle=False)
test_loader = DataLoader(test_data, batch_size=10, shuffle=False)

print('Training data size: ',len(train_data))
print('Valid data size: ',len(valid_data))
print('Test data size: ',len(test_data))

Training data size:  2000
Valid data size:  150
Test data size:  600


# Building my model

In [3]:
num_classes = 3 # for now. I need to download more images.


## Specify model architecture 
model_transfer = models.densenet121(pretrained=True)

# freeze parameters of the model
for param in model_transfer.parameters():
    param.requires_grad = False


num_ftrs = model_transfer.classifier.in_features
model_transfer.classifier = nn.Linear(num_ftrs, num_classes)
input_size = 224
        
    
# check if cuda is available
use_cuda = torch.cuda.is_available()
device = torch.device('cpu')
if use_cuda:
    model_transfer = model_transfer.cuda()
    device = torch.device('cuda:0')
    
print(model_transfer)

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

)


# Loss function and optimizer

In [4]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model_transfer.parameters(), lr=1e-4)

# Helper class to calculate average values

In [5]:
# this function is used during training process, to calculation the loss and accuracy
class AverageIt(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

# Training method

In [6]:
total_loss_train, total_acc_train = [],[]
def train(train_loader, model, criterion, optimizer, epoch):
    model.train()
    train_loss = AverageIt()
    train_acc = AverageIt()
    curr_iter = (epoch - 1) * len(train_loader)
    for i, data in enumerate(train_loader):
        images, labels = data
        N = images.size(0)
        # print('image shape:',images.size(0), 'label shape',labels.size(0))
        images = Variable(images).to(device)
        labels = Variable(labels).to(device)

        optimizer.zero_grad()
        #outputs, aux = model(images) ### for Inception model
        outputs = model(images)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        prediction = outputs.max(1, keepdim=True)[1]
        train_acc.update(prediction.eq(labels.view_as(prediction)).sum().item()/N)
        train_loss.update(loss.item())
        curr_iter += 1
        if (i + 1) % 100 == 0:
            print('[epoch %d], [iter %d / %d], [train loss %.5f], [train acc %.5f]' % (
                epoch, i + 1, len(train_loader), train_loss.avg, train_acc.avg))
            total_loss_train.append(train_loss.avg)
            total_acc_train.append(train_acc.avg)
    return train_loss.avg, train_acc.avg

# Validate method

In [7]:
def validate(val_loader, model, criterion, optimizer, epoch):
    model.eval()
    val_loss = AverageIt()
    val_acc = AverageIt()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            images, labels = data
            N = images.size(0)
            images = Variable(images).to(device)
            labels = Variable(labels).to(device)

            outputs = model(images)
            prediction = outputs.max(1, keepdim=True)[1]

            val_acc.update(prediction.eq(labels.view_as(prediction)).sum().item()/N)

            val_loss.update(criterion(outputs, labels).item())

    print('------------------------------------------------------------')
    print('[epoch %d], [val loss %.5f], [val acc %.5f]' % (epoch, val_loss.avg, val_acc.avg))
    print('------------------------------------------------------------')
    return val_loss.avg, val_acc.avg

# Run train, validate methods and save the model with best validation loss

In [8]:
epoch_num = 30
best_val_acc = 0
total_loss_val, total_acc_val = [],[]
valid_loss_min = np.Inf

for epoch in range(1, epoch_num+1):
    loss_train, acc_train = train(train_loader, model_transfer, criterion, optimizer, epoch)
    loss_val, acc_val = validate(valid_loader, model_transfer, criterion, optimizer, epoch)
    total_loss_val.append(loss_val)
    total_acc_val.append(acc_val)
    if acc_val > best_val_acc:
        best_val_acc = acc_val
        print('*****************************************************')
        print('best record: [epoch %d], [val loss %.5f], [val acc %.5f]' % (epoch, loss_val, acc_val))
        print('*****************************************************')
    if loss_val < valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model...'.format(valid_loss_min, loss_val))
        torch.save(model_transfer.state_dict(), 'ai-skin-cancer-detector.pt')
        valid_loss_min = loss_val

[epoch 1], [iter 100 / 200], [train loss 0.87403], [train acc 0.66500]
[epoch 1], [iter 200 / 200], [train loss 0.84951], [train acc 0.67300]
------------------------------------------------------------
[epoch 1], [val loss 0.97630], [val acc 0.52000]
------------------------------------------------------------
*****************************************************
best record: [epoch 1], [val loss 0.97630], [val acc 0.52000]
*****************************************************
Validation loss decreased (inf --> 0.976297). Saving model...
[epoch 2], [iter 100 / 200], [train loss 0.79969], [train acc 0.68400]
[epoch 2], [iter 200 / 200], [train loss 0.79153], [train acc 0.68700]
------------------------------------------------------------
[epoch 2], [val loss 0.92172], [val acc 0.54000]
------------------------------------------------------------
*****************************************************
best record: [epoch 2], [val loss 0.92172], [val acc 0.54000]
**************************

*****************************************************
best record: [epoch 18], [val loss 0.70928], [val acc 0.72667]
*****************************************************
Validation loss decreased (0.725150 --> 0.709280). Saving model...
[epoch 19], [iter 100 / 200], [train loss 0.64278], [train acc 0.73200]
[epoch 19], [iter 200 / 200], [train loss 0.62629], [train acc 0.73250]
------------------------------------------------------------
[epoch 19], [val loss 0.71554], [val acc 0.72667]
------------------------------------------------------------
[epoch 20], [iter 100 / 200], [train loss 0.65719], [train acc 0.73000]
[epoch 20], [iter 200 / 200], [train loss 0.64851], [train acc 0.73350]
------------------------------------------------------------
[epoch 20], [val loss 0.73182], [val acc 0.70000]
------------------------------------------------------------
[epoch 21], [iter 100 / 200], [train loss 0.62891], [train acc 0.72500]
[epoch 21], [iter 200 / 200], [train loss 0.62699], [train

# Check for test accuracy

In [10]:
def test(loaders, model, criterion, use_cuda):

    # monitor test loss and accuracy
    test_loss = 0.
    correct = 0.
    total = 0.

    model.eval()
    for batch_idx, (data, target) in enumerate(loaders):
        # move to GPU
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)       
        # calculate the loss
        loss = criterion(output, target)
        # update average test loss 
        test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
        
        # convert output probabilities to predicted class
        output = F.softmax(output, dim=1)
        pred = output.data.max(1, keepdim=True)[1]
        
        # compare predictions to true label
        correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
        total += data.size(0)
            
    print('Test Loss: {:.6f}\n'.format(test_loss))
    print('\nTest Accuracy: %2d%% (%2d/%2d)' % (100. * correct / total, correct, total))


test(test_loader, model_transfer, criterion, use_cuda)

Test Loss: 0.718242


Test Accuracy: 68% (412/600)
