In [1]:
import numpy as np
from glob import glob

In [2]:
import cv2                
import matplotlib.pyplot as plt                        
%matplotlib inline 

In [3]:
import torch
import torchvision.models as models
from PIL import Image
import torchvision.transforms.functional as TF
import os
from torchvision import datasets
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from PIL import ImageFile


In [4]:
ImageFile.LOAD_TRUNCATED_IMAGES = True

def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf 
    
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):

            if use_cuda:
                data, target = data.cuda(), target.cuda()
                
            optimizer.zero_grad()
            
            # Forward pass
            output = model.forward(data)
            # Calculate the batch loss
            loss = criterion(output, target)
            # Back propagation
            loss.backward()
            # Perform weigth update
            optimizer.step()
            # Update training loss
            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
        
            ## find the loss and update the model parameters accordingly
            ## record the average training loss, using something like
            ## train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            
        ######################    
        # validate the model #
        ######################
        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## update the average validation loss
            # Prediction
            output = model.forward(data)
            # Batch loss
            loss = criterion(output, target)
            # Update average validation loss
            valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
            
        # print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, 
            train_loss,
            valid_loss
            ))
        
        ## TODO: save the model if validation loss has decreased
        if valid_loss <= valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
            valid_loss_min,
            valid_loss))
            torch.save(model.state_dict(), save_path)
            valid_loss_min = valid_loss
        
    # return trained model
    return model

In [5]:
num_workers = 0
batch_size = 20
valid_size = 0.2

# check if CUDA is available
train_on_gpu = torch.cuda.is_available()
print(train_on_gpu)

True


In [6]:
import torchvision.transforms as transforms
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [7]:
transform = transforms.Compose([transforms.RandomAffine(15),
                                transforms.RandomResizedCrop(224),
                                          transforms.ToTensor(),
                                          transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                          std=[0.229, 0.224, 0.225])])

In [8]:
train_data = datasets.ImageFolder("images/train", transform=data_transforms['train'])
test_data = datasets.ImageFolder("images/test", transform=data_transforms['val'])
val_data = datasets.ImageFolder("images/valid", transform=data_transforms['val'])

In [9]:
next(iter(train_data))[0].shape

torch.Size([3, 224, 224])

In [10]:
num_train = len(train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]

# define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

# prepare data loaders (combine dataset and sampler)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=20,
    sampler=train_sampler, num_workers=num_workers)
valid_loader = torch.utils.data.DataLoader(train_data, batch_size=20, 
    sampler=valid_sampler, num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=20, 
    num_workers=num_workers)

loaders_transfer = {'train': train_loader, 'valid': valid_loader, 'test': test_loader}

In [11]:
model_transfer = models.resnet152(pretrained=True)
use_cuda = torch.cuda.is_available()
if use_cuda:
    model_transfer = model_transfer.cuda()

In [90]:
from collections import OrderedDict
for param in model_transfer.parameters():
    param.requires_grad = False

for param in model_transfer.fc.parameters():
    param.requires_grad = True 



classifier = nn.Sequential(OrderedDict([('fc1', nn.Linear(2048, 512)),
                                         ('relu', nn.ReLU()),
                                         ('drop', nn.Dropout(0.5)),
                                         ('fc2', nn.Linear(512, 3)), 
                                         ('output', nn.Softmax(dim=1))]))


model_transfer.fc = classifier
    
model_transfer.cuda()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [14]:
alternative_model = models.resnext101_32x8d(pretrained=True)

In [91]:
criterion_transfer = nn.CrossEntropyLoss()
optimizer_transfer = optim.Adadelta(model_transfer.fc.parameters(),lr=0.01)

In [92]:
# train the model
n_epochs = 50

model_transfer = train(n_epochs, loaders_transfer, model_transfer, optimizer_transfer, criterion_transfer, use_cuda, 'model_transfer.pt')

# load the model that got the best validation accuracy (uncomment the line below)
model_transfer.load_state_dict(torch.load('model_transfer.pt'))



Epoch: 1 	Training Loss: 0.941881 	Validation Loss: 0.871365
Validation loss decreased (inf --> 0.871365).  Saving model ...
Epoch: 2 	Training Loss: 0.879427 	Validation Loss: 0.862069
Validation loss decreased (0.871365 --> 0.862069).  Saving model ...
Epoch: 3 	Training Loss: 0.873951 	Validation Loss: 0.856899
Validation loss decreased (0.862069 --> 0.856899).  Saving model ...
Epoch: 4 	Training Loss: 0.871530 	Validation Loss: 0.855527
Validation loss decreased (0.856899 --> 0.855527).  Saving model ...
Epoch: 5 	Training Loss: 0.870803 	Validation Loss: 0.855502
Validation loss decreased (0.855527 --> 0.855502).  Saving model ...
Epoch: 6 	Training Loss: 0.869939 	Validation Loss: 0.855501
Validation loss decreased (0.855502 --> 0.855501).  Saving model ...
Epoch: 7 	Training Loss: 0.869587 	Validation Loss: 0.854667
Validation loss decreased (0.855501 --> 0.854667).  Saving model ...


KeyboardInterrupt: 

In [93]:
def test(loaders, model, criterion, use_cuda):

    # monitor test loss and accuracy
    test_loss = 0.
    correct = 0.
    total = 0.

    model.eval()
    for batch_idx, (data, target) in enumerate(loaders['test']):
        # move to GPU
        data, target = data.cuda(), target.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update average test loss 
        test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
        # convert output probabilities to predicted class
        pred = output.data.max(1, keepdim=True)[1]
        # compare predictions to true label
        correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
        total += data.size(0)
            
    print('Test Loss: {:.6f}\n'.format(test_loss))

    print('\nTest Accuracy: %2d%% (%2d/%2d)' % (
        100. * correct / total, correct, total))

In [94]:
test(loaders_transfer, model_transfer, criterion_transfer, use_cuda)

Test Loss: 0.896661


Test Accuracy: 65% (393/600)


In [160]:
import numpy as np
from PIL import Image

def load_image(path):
  image = Image.open(path)
  image = data_transforms['val'](image)
  image = torch.tensor(image, requires_grad=True)
  image = image.unsqueeze(0)
  return image

def get_predictions(img_paths):
  """ Is melanoma? """
  pred_rank1 = [] # Is melanoma?
  pred_rank2 = [] # Is seborrheic?
  for img_path in img_paths:
    img = load_image(img_path).cuda()
    #img.to(device)
    prediction = model_transfer(img)
    #img.to("cpu")
    pred_rank1.append(float(prediction.data[0][0]))
    pred_rank2.append(float(prediction.data[0][2]))
  return pred_rank1, pred_rank2

In [156]:
import glob 
import cv2

train_dir = ["images/train/melanoma", 
               "images/train/nevus",
               "images/train/seborrheic_keratosis"]

test_dir = ["images/test/melanoma", 
              "images/test/nevus",
              "images/test/seborrheic_keratosis"]

def get_images_paths(directory, n_per_class):
  img_paths = []
  for i in range(3):
    cont = 1
    for file in glob.glob(directory[i]+"/*.jpg"):
      img_paths.append(file)
      if cont == n_per_class:
        break
      cont +=1 
  return img_paths

# Get a sample of 15 training images (5 for each class)


img_paths = [line.rstrip('\n') for line in open('results.csv')] 
img_paths = img_paths[1:]


img_paths = [line.split(',')[0] for line in img_paths] 



In [None]:
task1, task2 = get_predictions(img_paths)



In [145]:
print("Task 1 predictions: ", len(task1))
print("first five predictions (Task 1): ", task1[:5])

print("Task 2 predictions: ", len(task2))
print("first five predictions (Task 2): ", task2[:5])

Task 1 predictions:  600
first five predictions (Task 1):  [0.0006542829214595258, 0.0007437533349730074, 0.0049471259117126465, 0.0017734547145664692, 0.008357163518667221]
Task 2 predictions:  600
first five predictions (Task 2):  [0.0002893433265853673, 0.00042843481060117483, 0.003377052489668131, 0.0010951684089377522, 0.006376801058650017]


In [146]:
import csv
        
with open('results.csv', 'w', newline='') as csvfile:
  doc = csv.writer(csvfile)
  doc.writerow(['Id', 'task_1', 'task_2'])
  for i in range(len(img_paths)):
    doc.writerow([img_paths[i], task1[i], task2[i]])

In [127]:
import plot_metric
from plot_metric.functions import BinaryClassification
# Visualisation with plot_metric
bc = BinaryClassification(y_test, y_pred, labels=["Class 1", "Class 2"])

# Figures
plt.figure(figsize=(5,5))
bc.plot_roc_curve()
plt.show()

ModuleNotFoundError: No module named 'plot_metric'

In [151]:
from sklearn.metrics import confusion_matrix
threshold = 0.5 

# get ground truth labels for test dataset
truth = pd.read_csv('ground_truth.csv')
# y_true = truth.as_matrix(columns=["task_1", "task_2"])
y_true= pd.DataFrame(truth, columns = ['task_1', 'task_2'])

# get model predictions for test dataset

# y_pred = y_pred.as_matrix(columns=["task_1", "task_2"])

import sklearn.metrics as metrics
# calculate the fpr and tpr for all thresholds of the classification

y_pred = pd.read_csv('results.csv')
y_pred= pd.DataFrame(y_pred, columns = ['task_1', 'task_2'])
# fpr, tpr, threshold = metrics.roc_curve(y_true, y_pred)
print(y_true.head())
print(y_pred.head())


roc_auc = metrics.auc(fpr, tpr)

# method I: plt
import matplotlib.pyplot as plt
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()


   task_1  task_2
0       1       0
1       1       0
2       1       0
3       1       0
4       1       0
     task_1    task_2
0  0.000654  0.000289
1  0.000744  0.000428
2  0.004947  0.003377
3  0.001773  0.001095
4  0.008357  0.006377


NameError: name 'fpr' is not defined

In [88]:
import pandas as pd
import sys
import itertools
import get_results as results

threshold = 0.5 

# get ground truth labels for test dataset
truth = pd.read_csv('ground_truth.csv')
y_true = truth.values

# get model predictions for test dataset
y_pred = pd.read_csv('results.csv')
y_pred = y_pred.values

# plot ROC curves and print scores
results.plot_roc_auc(y_true, y_pred)
# plot confusion matrix
classes = ['benign', 'malignant']
results.plot_confusion_matrix(y_true[:,0], y_pred[:,0], threshold, classes)

ValueError: multiclass format is not supported

<Figure size 432x288 with 0 Axes>

In [70]:
class_names = [item[4:].replace("_", " ") for item in train_data.classes]

def predict_skin_lesion(img):
    transformations = transforms.Compose([transforms.Resize(256), 
                                          transforms.CenterCrop(224),
                                          transforms.ToTensor(),
                                          transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                          std=[0.229, 0.224, 0.225])])
    image_tensor = transformations(img)[:3,:,:].unsqueeze(0)
    model_transfer.eval()
    image_tensor = image_tensor.cuda()
    output = model_transfer(image_tensor)
    pred = output.data.max(1, keepdim=True)[1]

    return class_names[pred[0]]

In [71]:
predict_skin_lesion(Image.open('images/test/seborrheic_keratosis/ISIC_0012974.jpg'))

['noma', 's', 'rrheic keratosis']


IndexError: invalid index of a 0-dim tensor. Use tensor.item() to convert a 0-dim tensor to a Python number