In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt 
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets
import torchvision.transforms as transforms
import torchvision.models as models
from sklearn.metrics import roc_auc_score

%matplotlib inline  

In [2]:
Data_dir = 'data'
num_workers = 0
batch_size = 64

transforms = {
    'train' : transforms.Compose([transforms.Resize(256), 
                                 transforms.CenterCrop(224),
                                 transforms.ToTensor(),
                                 transforms.Normalize([0.5, 0.5, 0.5], [0.229, 0.224, 0.225])]),
    'val_test' : transforms.Compose([transforms.Resize(256), 
                                 transforms.CenterCrop(224),
                                 transforms.ToTensor(),
                                 transforms.Normalize([0.5, 0.5, 0.5], [0.229, 0.224, 0.225])])
}

loaders = {
    'train' : torch.utils.data.DataLoader(datasets.ImageFolder(Data_dir + '/train', transform=transforms['train']),
                                         batch_size=batch_size, num_workers=num_workers, shuffle = True),
    'valid' : torch.utils.data.DataLoader(datasets.ImageFolder(Data_dir + '/valid', transform=transforms['val_test']),
                                         batch_size=batch_size, num_workers=num_workers, shuffle = True),
    'test' : torch.utils.data.DataLoader(datasets.ImageFolder(Data_dir + '/test', transform=transforms['val_test']),
                                        batch_size=batch_size, num_workers=num_workers, shuffle = True)
}

In [3]:
def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf 
    
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## find the loss and update the model parameters accordingly
            ## record the average training loss, using something like
            ## train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output , target)
            loss.backward()
            optimizer.step()
            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data- train_loss))
        ######################    
        # validate the model #
        ######################
        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## update the average validation loss
            optimizer.zero_grad()
            with torch.no_grad():
                output = model(data)
                loss_val = criterion(output,target)
                valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data- valid_loss))

            
        # print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, 
            train_loss,
            valid_loss
            ))
        
        ## TODO: save the model if validation loss has decreased
        if valid_loss_min > valid_loss:
            print("Saving the model...")
            valid_loss_min = valid_loss
            torch.save(model.state_dict(), save_path)
            
    # return trained model
    return model

In [4]:
def test(loaders, model, criterion, use_cuda):

    # monitor test loss and accuracy
    test_loss = 0.
    correct = 0.
    total = 0.

    model.eval()
    for batch_idx, (data, target) in enumerate(loaders['test']):
        # move to GPU
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update average test loss 
        test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
        # convert output probabilities to predicted class
        pred = output.data.max(1, keepdim=True)[1]
        # compare predictions to true label
        correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
        total += data.size(0)
            
    print('Test Loss: {:.6f}\n'.format(test_loss))

    print('\nTest Accuracy: %2d%% (%2d/%2d)' % (
        100. * correct / total, correct, total))

In [5]:
model = models.vgg16(pretrained=True)
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [6]:
for param in model.features.parameters():
    param.requires_grad = False

n_inputs = model.classifier[6].in_features
last_layer = nn.Linear(n_inputs, 3)

model.classifier[6] = last_layer
print(model)

use_cuda = torch.cuda.is_available()
if use_cuda:
    model = model.cuda()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [7]:
criterion= nn.CrossEntropyLoss()
optimizer= optim.Adam(model.classifier.parameters(), lr=0.001)

In [8]:
# train the model


model= train(10, loaders, model, optimizer, criterion, use_cuda, 'model.pt')

# load the model that got the best validation accuracy (uncomment the line below)
model.load_state_dict(torch.load('model.pt'))

Epoch: 1 	Training Loss: 1.019250 	Validation Loss: 1.183824
Saving the model...
Epoch: 2 	Training Loss: 0.702109 	Validation Loss: 0.776870
Saving the model...
Epoch: 3 	Training Loss: 0.502969 	Validation Loss: 0.439655
Saving the model...
Epoch: 4 	Training Loss: 0.413789 	Validation Loss: 0.491001
Epoch: 5 	Training Loss: 0.256826 	Validation Loss: 0.315692
Saving the model...
Epoch: 6 	Training Loss: 0.253727 	Validation Loss: 0.041141
Saving the model...
Epoch: 7 	Training Loss: 0.199654 	Validation Loss: 0.468519
Epoch: 8 	Training Loss: 0.283064 	Validation Loss: 0.319264
Epoch: 9 	Training Loss: 0.175337 	Validation Loss: 0.003177
Saving the model...
Epoch: 10 	Training Loss: 0.144157 	Validation Loss: 0.011440


<All keys matched successfully>

In [9]:
results = test(loaders, model, criterion, use_cuda)

Test Loss: 3.081656


Test Accuracy: 60% (364/600)


In [10]:
from PIL import Image
from torchvision import transforms
from pylab import imread,subplot,imshow,show
import  pandas as pd
import os
from pathlib import Path
import torch.nn.functional as F

img_path = Path('data/test/')

sample_predictions = pd.DataFrame(columns =['Id', 'task_1', 'task_2'])

# pred_to =[]
for path, subdirs, files in os.walk(img_path):
    
    for name in files:
        path2 = os.path.join(path, name)
        transform = transforms.Compose([transforms.Resize(256), 
                                 transforms.CenterCrop(224),
                                 transforms.ToTensor(),
                                 transforms.Normalize([0.5, 0.5, 0.5], [0.229, 0.224, 0.225])])
        image_pil = Image.open(path2)
        img_tensor = transform(image_pil)
        image = img_tensor.unsqueeze_(0)
        image = image.cuda()
        prediction = model(image)
        pred = F.softmax(prediction)
        pred2 = np.where(pred.cpu().detach().numpy()>0.5 , 1 , 0)

        sample_predictions = sample_predictions.append({'Id' : path2 , 'task_1' : pred2[0][0] , 'task_2' : pred2[0][2]}, ignore_index=True)
       




In [14]:
sample_predictions.to_csv('D:/Udacity/Repos/dermatologist-ai/sample_predictions.csv')