In [1]:
## All the imports

%matplotlib inline
# python libraties
import os, cv2,itertools
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
from glob import glob
from PIL import Image

# pytorch libraries
import torch
import torch.nn.functional as F
from torch import optim,nn
from torch.autograd import Variable
from torch.utils.data import DataLoader,Dataset
from torchvision import models,transforms,datasets

# Data Loaders and transforms¶

In [2]:
## data loaders for training, validation, and test sets
## Specify appropriate transforms, and batch_sizes

## AlERT !!!! We are going to training our data with Inception model and this model take 299x299 images
train_transform = transforms.Compose([transforms.RandomRotation(10),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.RandomVerticalFlip(),
                                      transforms.ColorJitter(brightness=0.1, contrast=0.1, hue=0.1),
                                      transforms.Resize(299),
                                      transforms.CenterCrop(299),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406],
                                                            [0.229, 0.224, 0.225])])

valid_transform = transforms.Compose([transforms.Resize(299),
                                      transforms.CenterCrop(299),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406],
                                                            [0.229, 0.224, 0.225])])

test_transform = transforms.Compose([transforms.Resize(299),
                                      transforms.CenterCrop(299),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406],
                                                            [0.229, 0.224, 0.225])])


root_dir = 'data'

train_data = datasets.ImageFolder(os.path.join(root_dir, 'train'), transform=train_transform)
valid_data = datasets.ImageFolder(os.path.join(root_dir, 'valid'), transform=valid_transform)
test_data = datasets.ImageFolder(os.path.join(root_dir, 'test'), transform=test_transform)

train_loader = DataLoader(train_data, batch_size=10, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=10, shuffle=False)
test_loader = DataLoader(test_data, batch_size=10, shuffle=False)

print('Training data size: ',len(train_data))
print('Valid data size: ',len(valid_data))
print('Test data size: ',len(test_data))

Training data size:  2000
Valid data size:  150
Test data size:  600


# Building my model

In [3]:
## Specify model architecture 
model_transfer = models.vgg16(pretrained=True)

# freeze parameters of the model
for param in model_transfer.parameters():
    param.requires_grad = False

# update the classifier part of model_transfer
classifier = nn.Sequential(nn.Linear(25088, 1024),
                           nn.ReLU(),
                           nn.Dropout(0.5),
                           nn.Linear(1024, 3))
model_transfer.classifier = classifier

# check if cuda is available
use_cuda = torch.cuda.is_available()
device = torch.device('cpu')
if use_cuda:
    model_transfer = model_transfer.cuda()
    device = torch.device('cuda:0')
    
print(model_transfer)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

# Loss function and optimizer

In [4]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model_transfer.parameters(), lr=1e-4)

# Train the model

In [5]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path):
    
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf
    
    for epoch in range(1, n_epochs+1):
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()

            # clear gradients of all optimized variables
            optimizer.zero_grad()

            # forwarward pass
            output = model(data)
            
            # calculate batch loss
            loss = criterion(output, target)
            
            # backward pass
            loss.backward()
            
            # perform optimization step
            optimizer.step()
            
            # update training loss
            train_loss += ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            
            if (batch_idx + 1) % 20 == 0:
                print(f'Epoch:{epoch} \tBatch:{batch_idx + 1} \tTrain Loss: {train_loss}\n')
                

        ######################    
        # validate the model #
        ######################
        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## update the average validation loss
            with torch.no_grad():
                output = model(data)
            loss = criterion(output, target)
            valid_loss += ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
           
        # print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, 
            train_loss,
            valid_loss
            ))
        
        # save the model if validation loss has decreased
        if valid_loss < valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model...'.format(valid_loss_min, valid_loss))
            torch.save(model.state_dict(), save_path)
            valid_loss_min = valid_loss
            
    # return trained model
    return model


# define loaders_transfer
loaders_transfer = {'train': train_loader,
                   'valid': valid_loader,
                   'test': test_loader}

model_transfer = train(15, loaders_transfer, model_transfer, optimizer, criterion, use_cuda, 'skin-cancer-model_transfer.pt')

# load the model that got the best validation accuracy
model_transfer.load_state_dict(torch.load('skin-cancer-model_transfer.pt'))

Epoch:1 	Batch:20 	Train Loss: 1.0278104543685913

Epoch:1 	Batch:40 	Train Loss: 0.9111490249633789

Epoch:1 	Batch:60 	Train Loss: 0.8659690618515015

Epoch:1 	Batch:80 	Train Loss: 0.7995301485061646

Epoch:1 	Batch:100 	Train Loss: 0.7972770929336548

Epoch:1 	Batch:120 	Train Loss: 0.8148987293243408

Epoch:1 	Batch:140 	Train Loss: 0.8123986721038818

Epoch:1 	Batch:160 	Train Loss: 0.8140407204627991

Epoch:1 	Batch:180 	Train Loss: 0.8061633706092834

Epoch:1 	Batch:200 	Train Loss: 0.7990249395370483

Epoch: 1 	Training Loss: 0.799025 	Validation Loss: 0.776437
Validation loss decreased (inf --> 0.776437). Saving model...
Epoch:2 	Batch:20 	Train Loss: 0.6472934484481812

Epoch:2 	Batch:40 	Train Loss: 0.6642495393753052

Epoch:2 	Batch:60 	Train Loss: 0.674979567527771

Epoch:2 	Batch:80 	Train Loss: 0.700082004070282

Epoch:2 	Batch:100 	Train Loss: 0.6957595944404602

Epoch:2 	Batch:120 	Train Loss: 0.7068817019462585

Epoch:2 	Batch:140 	Train Loss: 0.7041639089584351

Epo


Epoch:14 	Batch:180 	Train Loss: 0.46300357580184937

Epoch:14 	Batch:200 	Train Loss: 0.45773959159851074

Epoch: 14 	Training Loss: 0.457740 	Validation Loss: 0.701392
Epoch:15 	Batch:20 	Train Loss: 0.4917726516723633

Epoch:15 	Batch:40 	Train Loss: 0.4135316014289856

Epoch:15 	Batch:60 	Train Loss: 0.432269424200058

Epoch:15 	Batch:80 	Train Loss: 0.4558653235435486

Epoch:15 	Batch:100 	Train Loss: 0.44621768593788147

Epoch:15 	Batch:120 	Train Loss: 0.44902488589286804

Epoch:15 	Batch:140 	Train Loss: 0.44585901498794556

Epoch:15 	Batch:160 	Train Loss: 0.4369741678237915

Epoch:15 	Batch:180 	Train Loss: 0.44436949491500854

Epoch:15 	Batch:200 	Train Loss: 0.44636186957359314

Epoch: 15 	Training Loss: 0.446362 	Validation Loss: 0.716523


<All keys matched successfully>

# Check for test accuracy

In [6]:
def test(loaders, model, criterion, use_cuda):

    # monitor test loss and accuracy
    test_loss = 0.
    correct = 0.
    total = 0.

    model.eval()
    for batch_idx, (data, target) in enumerate(loaders['test']):
        # move to GPU
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)       
        # calculate the loss
        loss = criterion(output, target)
        # update average test loss 
        test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
        
        # convert output probabilities to predicted class
        output = F.softmax(output, dim=1)
        pred = output.data.max(1, keepdim=True)[1]
        
        # compare predictions to true label
        correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
        total += data.size(0)
            
    print('Test Loss: {:.6f}\n'.format(test_loss))
    print('\nTest Accuracy: %2d%% (%2d/%2d)' % (100. * correct / total, correct, total))

test(loaders_transfer, model_transfer, criterion, use_cuda)

Test Loss: 0.784682


Test Accuracy: 67% (403/600)
