In [None]:
import torch
import torch.nn as nn
from torch.utils import data
from torchvision import transforms as trf
from torchvision import models
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, recall_score, precision_score, roc_auc_score
from PIL import Image
import os
from glob import glob
import time
import copy

In [None]:
np.random.seed(32)
torch.manual_seed(42)

In [None]:
CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda:0" if CUDA else "cpu")

In [None]:
df = pd.read_csv('../data/HAM10000_metadata.csv')
df.head()

In [None]:
lesion_type = {
    'nv': 'Melanocytic nevi',
    'mel': 'Melanoma',
    'bkl': 'Benign keratosis-like lesions ',
    'bcc': 'Basal cell carcinoma',
    'akiec': 'Actinic keratoses',
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma'
}

imageid_path = {os.path.splitext(os.path.basename(x))[0]: x
                for x in glob(os.path.join("..\\data", '*', '*.jpg'))}

In [None]:
df['path'] = df['image_id'].map(imageid_path.get)
df['cell_type'] = df['dx'].map(lesion_type.get)
df['target'] = pd.Categorical(df['cell_type']).codes

In [None]:
df['cell_type'].value_counts()

In [None]:
n_samples = 5
fig, m_axs = plt.subplots(7, n_samples, figsize = (4*n_samples, 3*7))
for n_axs, (type_name, type_rows) in zip(m_axs, df.sort_values(['cell_type']).groupby('cell_type')):
    n_axs[0].set_title(type_name)
    for c_ax, (_, c_row) in zip(n_axs, type_rows.sample(n_samples, random_state=128).iterrows()):
        img = mpimg.imread(c_row['path'])
        c_ax.imshow(img)
        c_ax.axis('off')
fig.savefig('./output/category_samples.png', dpi=300)
fig.show()

In [None]:
class SkinData(data.Dataset):
    def __init__(self, df, transform=None):
        """Initialization"""
        self.df = df
        self.transform = transform
        
    def __len__(self):
        """Denotes the total number of samples"""
        return len(self.df)
    
    def __getitem__(self, index):
        """Generates one sample of data"""
        # Load data and get label
        X = Image.open(self.df['path'][index])
        y = torch.tensor(int(self.df['target'][index]))
        
        if self.transform:
            X = self.transform(X)
        
        return X, y

## Data Perparation

In [None]:
train, test = train_test_split(df, test_size=0.1)

In [None]:
validation, test = train_test_split(test, test_size=0.5)

In [None]:
train = train.reset_index()
validation = validation.reset_index()
test = test.reset_index()

## Train Model

In [None]:
def train_model(name, model, data_gen, val_gen, optimizer, criterion, num_epochs=20):
    since = time.time()
    best_weights = copy.deepcopy(model.state_dict())
    fpath = '../model/'

    for epoch in range(num_epochs):
        if(epoch%5==0):
            print(f'Saving model...')
            torch.save(model.state_dict(), f'{fpath}_{name}_{epoch}_{time.time()}__.pth')
        print(f'Epoch {epoch} / {num_epochs-1}')
        print('--' * 4)
        trainings_error = []
        validation_error = []
        trainings_err_tmp = []
        model.train()
        for data, label in data_gen:
            data_gpu = data.to(DEVICE)
            label_gpu = label.to(DEVICE)
            output = model(data_gpu)
            err = criterion(output, label_gpu)
            err.backward()
            optimizer.step()
            trainings_err_tmp.append(err.item())
        mean_trainings_error = np.mean(trainings_err_tmp)
        trainings_error.append(mean_trainings_error)
        print('trainings error:', mean_trainings_error)
            
        with torch.set_grad_enabled(False):
            val_err_tmp = []
            count_val = 0
            model.eval()
            for data, label in val_gen:
                data_gpu = data.to(DEVICE)
                label_gpu = label.to(DEVICE)
                output = model(data_gpu)
                err = criterion(output, label_gpu)
                val_err_tmp.append(err.item())
                count_val += 1
                if count_val >= 10:
                    count_val = 0
                    mean_val_error = np.mean(val_err_tmp)
                    validation_error.append(mean_val_error)
                    print('validation error:', mean_val_error)
                    break
                    
    time_since = time.time() - since
    print(f'Train completed in {time_since//60:.0f}m {time_since%60:.0f}s')
    print(f'Leat loss: {min(mean_val_error)}')
    return trainings_errors, validation_error

## Predict

In [None]:
def predict(model, data_gen):
    results = []
    targets = []
    model.eval()
    with torch.no_grad():
        for data, label in data_gen:
            data_gpu = data.to(DEVICE)
            label_gpu = label.to(DEVICE)
            outputs = model(data_gpu)
            _, preds = torch.max(outputs,1)
            results.append(preds.item())
            targets.append(label.item())
    return results , targets

## Data augmentation

In [None]:
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

In [None]:
composed = trf.Compose([trf.RandomHorizontalFlip(), trf.RandomVerticalFlip(), trf.CenterCrop(256),
                      trf.RandomCrop(224), trf.ToTensor(), trf.Normalize(mean=mean, std=std)])

## To Tensor

In [None]:
normalized = trf.Compose([trf.ToTensor(), trf.Normalize(mean=mean, std=std)])

## Loading Data

In [None]:
params = {
    'batch_size': 30,
    'shuffle': True
}

# ResNet34

### **without augmentation**

In [None]:
train_set_wo = SkinData(train, transform=normalized)
train_gen_wo = data.DataLoader(train_set_wo, **params)

val_set_wo = SkinData(validation, transform=normalized)
val_gen_wo = data.DataLoader(val_set_wo, **params)

In [None]:
res_net = models.resnet34(pretrained=True)
for param in res_net.parameters():
    param.require_grad = False

In [None]:
num_ftrs = res_net.fc.in_features
res_net.fc = nn.Linear(num_ftrs, 7)

In [None]:
res_net = res_net.to(DEVICE)

In [None]:
optimizer = torch.optim.Adam(res_net.parameters(), lr=0.000001)
criterion = nn.CrossEntropyLoss()

In [None]:
train_loss_res_wo, val_loss_res_wo = train_model('ResNet', res_net, train_gen_wo, val_gen_wo, optimizer, criterion, 7)

**Validation**

In [None]:
test_set = SkinData(test, transform=(normalized))
test_gen = data.DataLoader(test_set)

In [None]:
result_res_wo, target = predict(res_net, test_gen)

### **with augmentation**

In [None]:
train_set_w = SkinData(train, transform=composed)
train_gen_w = data.DataLoader(train_set_w, **params)

val_set_w = SkinData(validation, transform=composed)
val_gen_w = data.DataLoader(val_set_w, **params)

In [None]:
res_net = models.resnet34(pretrained=True)
for param in res_net.parameters():
    param.require_grad = False

In [None]:
num_ftrs = res_net.fc.in_features
res_net.fc = nn.Linear(num_ftrs, 7)

In [None]:
res_net = res_net.to(DEVICE)

In [None]:
optimizer = torch.optim.Adam(res_net.parameters(), lr=0.000001)
criterion = nn.CrossEntropyLoss()

In [None]:
train_loss_res_w, val_loss_res_w = train_model('ResNet_with_augment', res_net, train_gen_w, val_gen_w, optimizer, criterion, 7)

**Validation** 

In [None]:
result_res_w, target = predict(res_net, test_gen)

# VGG16

### **without augmentation**

In [None]:
vgg_16 = models.vgg16(pretrained=True)
for param in vgg_16.parameters():
    param.require_grad = False

In [None]:
num_ftrs = vgg_16.classifier[6].in_features
features = list(vgg_16.classifier.children())[:-1]
features.extend([nn.Linear(num_ftrs, 7)])
vgg_16.classifier = nn.Sequential(*features)

In [None]:
vgg_16 = vgg_16.to(DEVICE)

In [None]:
optimizer = torch.optim.Adam(vgg_16.parameters(), lr=0.000001)
criterion = nn.CrossEntropyLoss()

In [None]:
train_loss_vgg_wo, val_loss_vgg_wo = train_model('vgg', vgg_16, train_gen_wo, val_gen_wo, optimizer, criterion, 7)

**validation**

In [None]:
result_vgg_wo, target = predict(vgg_16, test_gen)

### **with augmentation**

In [None]:
vgg_16 = models.vgg16(pretrained=True)
for param in vgg_16.parameters():
    param.require_grad = False

In [None]:
num_ftrs = vgg_16.classifier[6].in_features
features = list(vgg_16.classifier.children())[:-1]
features.extend([nn.Linear(num_ftrs, 7)])
vgg_16.classifier = nn.Sequential(*features)

In [None]:
vgg_16 = vgg_16.to(DEVICE)

In [None]:
optimizer = torch.optim.Adam(vgg_16.parameters(), lr=0.000001)
criterion = nn.CrossEntropyLoss()

In [None]:
train_loss_vgg_w, val_loss_vgg_w = train_model('vgg_with_aug', vgg_16, train_gen_w, val_gen_w, optimizer, criterion, 7)

**validation**

In [None]:
result_vgg_w, target = predict(vgg_16, test_gen)

## Release GPU memory

In [None]:
torch.cuda.empty_cache()