# Imports

In [1]:
# %pip install pandas
# %pip install matplotlib
# %pip install opencv-python-headless
# %pip install scikit-image
# %pip install basic-image-eda
# %pip install seaborn
# %pip install torchvision

In [2]:
import os
import glob
import time
import copy

import pandas as pd
import numpy as np

import cv2

import matplotlib.pyplot as plt
import matplotlib.image as mp_image
import seaborn as sns

from IPython.display import Image, display

from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
import torchvision.models as models

import torch
from torch.utils.data import DataLoader
import torch.nn as nn

  from .autonotebook import tqdm as notebook_tqdm


# Data Load

In [3]:
# credits: https://github.com/yuliyabohdan/Skin-diseases-classification-Dermnet-/blob/main/skin_diseases_clas_ResNet50.ipynb

DIR = 'dermnet'
DIR_TRAIN = f'{DIR}/train/'
DIR_TEST = f'{DIR}/test/'

classes = os.listdir(DIR_TRAIN)
print(f'Total classes: {len(classes)}')

# total train and test images
train_count = 0
test_count = 0

classes_df = []
for _class in classes:
    class_dict = {}
    train_count += len(os.listdir(DIR_TRAIN + _class))
    test_count += len(os.listdir(DIR_TEST + _class))
    class_dict.update({'Class': _class, 
                       'Train': len(os.listdir(DIR_TRAIN + _class)), 
                       'Test': len(os.listdir(DIR_TEST + _class)) })
    classes_df.append(class_dict)

print(f'Total num train images: {train_count}')
print(f'Total num test images: {test_count}')
print(pd.DataFrame(classes_df))

Total classes: 23
Total num train images: 15557
Total num test images: 4002
                                                Class  Train  Test
0                    Herpes HPV and other STDs Photos    405   102
1          Lupus and other Connective Tissue diseases    420   105
2                 Melanoma Skin Cancer Nevi and Moles    463   116
3                                     Urticaria Hives    212    53
4                                     Vascular Tumors    482   121
5                                    Systemic Disease    606   152
6                  Nail Fungus and other Nail Disease   1040   261
7   Psoriasis pictures Lichen Planus and related d...   1405   352
8   Cellulitis Impetigo and other Bacterial Infect...    288    73
9   Tinea Ringworm Candidiasis and other Fungal In...   1300   325
10  Scabies Lyme Disease and other Infestations an...    431   108
11  Hair Loss Photos Alopecia and other Hair Diseases    239    60
12         Warts Molluscum and other Viral Infections

In [4]:
# map class labels to integer index

train_imgs = []
test_imgs = []

for _class in classes:
    
    for img in os.listdir(DIR_TRAIN + _class):
        train_imgs.append(f'{DIR_TRAIN}{_class}/{img}')
    
    for img in os.listdir(DIR_TEST + _class):
        test_imgs.append(f'{DIR_TEST}{_class}/{img}')

classToInt = {classes[i]: i for i in range(len(classes))}
intToClass = dict(map(reversed, classToInt.items()))
classToInt

{'Herpes HPV and other STDs Photos': 0,
 'Lupus and other Connective Tissue diseases': 1,
 'Melanoma Skin Cancer Nevi and Moles': 2,
 'Urticaria Hives': 3,
 'Vascular Tumors': 4,
 'Systemic Disease': 5,
 'Nail Fungus and other Nail Disease': 6,
 'Psoriasis pictures Lichen Planus and related diseases': 7,
 'Cellulitis Impetigo and other Bacterial Infections': 8,
 'Tinea Ringworm Candidiasis and other Fungal Infections': 9,
 'Scabies Lyme Disease and other Infestations and Bites': 10,
 'Hair Loss Photos Alopecia and other Hair Diseases': 11,
 'Warts Molluscum and other Viral Infections': 12,
 'Eczema Photos': 13,
 'Actinic Keratosis Basal Cell Carcinoma and other Malignant Lesions': 14,
 'Vasculitis Photos': 15,
 'Poison Ivy Photos and other Contact Dermatitis': 16,
 'Exanthems and Drug Eruptions': 17,
 'Atopic Dermatitis Photos': 18,
 'Light Diseases and Disorders of Pigmentation': 19,
 'Acne and Rosacea Photos': 20,
 'Seborrheic Keratoses and other Benign Tumors': 21,
 'Bullous Disease

# Data Split/Transforms

In [5]:
train_dataset = ImageFolder(root = DIR_TRAIN, transform=transforms.Compose([
    transforms.RandomRotation([-8, +8]),                                           # if augmentation
    transforms.ColorJitter(brightness=0, contrast=0.4, saturation=0, hue=0),      # if augmentation
    transforms.RandomHorizontalFlip(),                                            # if augmentation
    transforms.Resize(255),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.541, 0.414, 0.382], std=[0.256, 0.215, 0.209])
]))
test_dataset = ImageFolder(root = DIR_TEST, transform=transforms.Compose([
    transforms.Resize(255),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.541, 0.414, 0.382], std=[0.256, 0.215, 0.209])
]))

In [6]:
test_size = int(0.5 * len(test_dataset))
valid_size = len(test_dataset) - test_size
valid_dataset, test_dataset = torch.utils.data.random_split(test_dataset, 
                                                            [valid_size, test_size])

# Train/Val Data Loader

In [7]:
dataloaders_dict = {}
dataloaders_dict['train'] = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
dataloaders_dict['val'] = DataLoader(valid_dataset, batch_size=32, shuffle=False, num_workers=2, drop_last=False)
dataloader_test = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2, drop_last=False)

# Modeling

In [8]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0


    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:               
                model.eval()   # Set model to evaluate mode
                #update_bn_stats(model=model, data_loader=dataloaders[phase])  # if update_bn_stats
                
            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                      # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

               # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
   
    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

In [9]:
def test_model(model, dl, normalize=True):
    model.eval()
    true_labels = []
    predictions = []
    total = 0
    num_correct = 0
    with torch.no_grad():
        for images, labels in dl:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            predicted = torch.argmax(outputs.data,-1)        
            true_labels.append(labels.cpu().numpy())
            predictions.append(predicted.cpu().numpy())
            total += labels.size(0)
            num_correct += (predicted == labels).sum()
        print(f"Test Accuracy of the model: {float(num_correct)/float(total)*100:.2f}")    
        true_labels = np.hstack(true_labels)
        predictions = np.hstack(predictions)

    return true_labels, predictions

In [10]:
# x, y - find the img from class x labelled as class y 
def test(model, dl, x, y, normalize=True):
    model.eval()
    true_labels = []
    predictions = []
    images_list = []

    with torch.no_grad():
        for images, labels in dl:
            images_list.append(images.cpu().numpy())
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            predicted = torch.argmax(outputs.data,-1)        
            true_labels.append(labels.cpu().numpy())
            predictions.append(predicted.cpu().numpy())
    
    for n in range(60):
        for i in range(32):
            if (true_labels[n][i] == x)  & (predictions[n][i] == y):
                #inv_tensor = inv_normalize(image_list[n][i]])
                plt.imshow(np.transpose(images_list[n][i], (1, 2, 0)))
                plt.show()

In [11]:
# Number of epochs to train for
num_epochs = 100

model = models.resnet50(weights='DEFAULT')
model.fc = nn.Linear(2048, 23, bias=True)

# Detect if we have a GPU available
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)
model = model.to(device)

optimizer = torch.optim.Adam(
    model.parameters(),
    lr=0.0001
)
# Setup the loss fxn
criterion = nn.CrossEntropyLoss()

cuda:0


In [None]:
# Train and evaluate
model, hist = train_model(model, dataloaders_dict, criterion, optimizer, num_epochs=num_epochs)

Epoch 0/99
----------
train Loss: 2.3664 Acc: 0.3048
val Loss: 1.9634 Acc: 0.4233

Epoch 1/99
----------
train Loss: 1.7468 Acc: 0.4813
val Loss: 1.6473 Acc: 0.5097

Epoch 2/99
----------
train Loss: 1.3629 Acc: 0.5912
val Loss: 1.4972 Acc: 0.5467

Epoch 3/99
----------
train Loss: 1.0588 Acc: 0.6760
val Loss: 1.4109 Acc: 0.5907

Epoch 4/99
----------
train Loss: 0.7884 Acc: 0.7581
val Loss: 1.3815 Acc: 0.6072

Epoch 5/99
----------
train Loss: 0.5833 Acc: 0.8198
val Loss: 1.4147 Acc: 0.6202

Epoch 6/99
----------
train Loss: 0.4586 Acc: 0.8567
val Loss: 1.4312 Acc: 0.6312

Epoch 7/99
----------
train Loss: 0.3670 Acc: 0.8823
val Loss: 1.4947 Acc: 0.6147

Epoch 8/99
----------
train Loss: 0.2912 Acc: 0.9038
val Loss: 1.5187 Acc: 0.6252

Epoch 9/99
----------
train Loss: 0.2514 Acc: 0.9125
