In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import _LRScheduler
import torch.utils.data as data
from torch.utils.data import Dataset
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from PIL import Image
from sklearn import decomposition
from sklearn import manifold
from sklearn.metrics import confusion_matrix
from tqdm.notebook import tqdm, trange
import matplotlib.pyplot as plt
import numpy as np
import os
import random
from torchvision.models import resnet50, vgg16
from PIL import Image
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch
from torchvision import transforms

We'll also set the random seeds.

In [None]:
SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [None]:

class SkinDataset(Dataset):
    def __init__(self, dataset_path='./datapath', resize=256, cropsize=224, BU_files=[], Le_files=[], Sc_files=[], Mc_files = [], Ya_files = []):
        self.BU_files = BU_files
        self.Le_files = Le_files
        self.Sc_files = Sc_files
        self.Mc_files = Mc_files
        self.Ya_files = Ya_files
        self.dataset_path = dataset_path
        self.resize = resize
        self.cropsize = cropsize

        # load dataset
        self.x, self.y = self.load_dataset_folder()

        # set transforms
        self.transform_x = transforms.Compose([transforms.Resize(resize, Image.ANTIALIAS),
                                      transforms.CenterCrop(cropsize),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                  std=[0.229, 0.224, 0.225])])



    def __getitem__(self, idx):
        x = self.x[idx]
        y = self.y[idx]

        x = Image.open(x).convert('RGB')
        x = self.transform_x(x)

        return x, y

    def __len__(self):
        return len(self.x)

    def load_dataset_folder(self):
        x = []
        y = []

        for file1 in os.listdir(self.dataset_path):
            # file1 comes from [BU, Le, Sc]
            if file1 == 'BU':
                file2 = self.BU_files
            elif file1 == 'Leprosy':
                file2 = self.Le_files
            elif file1 == 'Scabies':
                file2 = self.Sc_files
            elif file1 == 'Mycetoma':
                file2 = self.Mc_files
            elif file1 == 'Yaws':
                file2 = self.Ya_files

            for file3 in file2:
                file4 = self.dataset_path + '/' + file1 + '/' + file3 + '/'
                # file4 = os.path.join(file2, file3)
                for file5 in os.listdir(file4):
                    simage = os.path.join(file4, file5)
                    x.append(simage)

                    if file1 == 'BU':
                        y.append(0)
                    elif file1 == 'Leprosy':
                        y.append(1)
                    elif file1 == 'Scabies':
                        y.append(2)
                    elif file1 == 'Mycetoma':
                        y.append(3)
                    elif file1 == 'Yaws':
                        y.append(4)

        return list(x), list(y)


def get_dataset(data_path, train_ratio = 0.5):
    print('The training ratio is', train_ratio)
    # train_ratio = 0.5
    for file1 in os.listdir(data_path):
        file2 = os.path.join(data_path, file1)
        files = os.listdir(file2)
        # random.shuffle(files)
        num_train = int(len(files) * train_ratio)
        print(file1, len(files), num_train)
        if file1 == 'BU':
            BU_train = files[:num_train]
            BU_test = files[num_train:]
        elif file1 == 'Leprosy':
            Le_train = files[:num_train]
            Le_test = files[num_train:]
        elif file1 == 'Scabies':
            Sc_train = files[:num_train]
            Sc_test = files[num_train:]
        elif file1 == 'Mycetoma':
            Mc_train = files[:num_train]
            Mc_test = files[num_train:]
        elif file1 == 'Yaws':
            Ya_train = files[:num_train]
            Ya_test = files[num_train:]

    train_dataset = SkinDataset(dataset_path=data_path, BU_files=BU_train, Le_files=Le_train, Sc_files=Sc_train, Mc_files=Mc_train, Ya_files=Ya_train)
    test_dataset = SkinDataset(dataset_path=data_path, BU_files=BU_test, Le_files=Le_test, Sc_files=Sc_test, Mc_files=Mc_test, Ya_files=Ya_test)



    return train_dataset, test_dataset

In [None]:
class VGG(nn.Module):
    def __init__(self, features, output_dim):
        super().__init__()

        self.features = features

        self.avgpool = nn.AdaptiveAvgPool2d(7)

        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(4096, output_dim),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        h = x.view(x.shape[0], -1)
        x = self.classifier(h)
        return x, h

In [None]:
vgg19_config = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512,
                512, 512, 'M', 512, 512, 512, 512, 'M']


def get_vgg_layers(config, batch_norm):

    layers = []
    in_channels = 3

    for c in config:
        assert c == 'M' or isinstance(c, int)
        if c == 'M':
            layers += [nn.MaxPool2d(kernel_size=2)]
        else:
            conv2d = nn.Conv2d(in_channels, c, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(c), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = c

    return nn.Sequential(*layers)

In [None]:
import torchvision.models as models

OUTPUT_DIM = 5

vgg19_layers = get_vgg_layers(vgg19_config, batch_norm=True)

model = VGG(vgg19_layers, OUTPUT_DIM)

pretrained_model = models.vgg19_bn(pretrained=True)

pretrained_model.classifier[-1]

IN_FEATURES = pretrained_model.classifier[-1].in_features

final_fc = nn.Linear(IN_FEATURES, OUTPUT_DIM)

pretrained_model.classifier[-1] = final_fc


model.load_state_dict(pretrained_model.state_dict())



<All keys matched successfully>

This model has considerably more parameters than the previous model, AlexNet - 128M compared to 23M.

In [None]:
data_dir = './DataSet/Data5com'
ratio = 0.5
train_dataset, test_dataset = get_dataset(data_dir, ratio)

train_iterator = data.DataLoader(train_dataset, batch_size=32, shuffle=True, drop_last=True)
test_iterator = data.DataLoader(test_dataset, batch_size=10, shuffle=False, drop_last=False)


The training ratio is 0.5
Yaws 32 16
Leprosy 38 19
Scabies 107 53
Mycetoma 12 6
BU 97 48


  self.transform_x = transforms.Compose([transforms.Resize(resize, Image.ANTIALIAS),


In [None]:
LR = 5e-4
params = [
          {'params': model.features.parameters(), 'lr': LR / 10},
          {'params': model.classifier.parameters()}
         ]

optimizer = optim.Adam(params, lr=LR)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

criterion = nn.CrossEntropyLoss()

model = model.to(device)
criterion = criterion.to(device)

In [None]:
def calculate_accuracy(y_pred, y):
    top_pred = y_pred.argmax(1, keepdim=True)
    correct = top_pred.eq(y.view_as(top_pred)).sum()
    acc = correct.float() / y.shape[0]
    return acc
    
def train(model, iterator, optimizer, criterion, device):

    epoch_loss = 0
    epoch_acc = 0

    model.train()

    for (x, y) in tqdm(iterator, desc="Training", leave=False):

        x = x.to(device)
        y = y.to(device)

        optimizer.zero_grad()

        y_pred, _ = model(x)

        loss = criterion(y_pred, y)

        acc = calculate_accuracy(y_pred, y)

        loss.backward()

        optimizer.step()

        epoch_loss += loss.item()
        epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

def evaluate(model, iterator, criterion, device):

    epoch_loss = 0
    epoch_acc = 0

    model.eval()

    with torch.no_grad():

        for (x, y) in tqdm(iterator, desc="Evaluating", leave=False):

            x = x.to(device)
            y = y.to(device)

            y_pred, _ = model(x)

            loss = criterion(y_pred, y)

            acc = calculate_accuracy(y_pred, y)

            epoch_loss += loss.item()
            epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [None]:
EPOCHS = 5

best_valid_loss = float('inf')

for epoch in trange(EPOCHS, desc="Epochs"):


    train_loss, train_acc = train(model, train_iterator, optimizer, criterion, device)
    test_loss, test_acc = evaluate(model, test_iterator, criterion, device)

    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Test. Loss: {test_loss:.3f} |  Test. Acc: {test_acc*100:.2f}%')

Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Training:   0%|          | 0/14 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/53 [00:00<?, ?it/s]

	Train Loss: 1.093 | Train Acc: 56.47%
	 Test. Loss: 0.812 |  Test. Acc: 72.08%


Training:   0%|          | 0/14 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/53 [00:00<?, ?it/s]

	Train Loss: 0.388 | Train Acc: 86.83%
	 Test. Loss: 0.878 |  Test. Acc: 76.60%


Training:   0%|          | 0/14 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/53 [00:00<?, ?it/s]

	Train Loss: 0.114 | Train Acc: 97.10%
	 Test. Loss: 1.318 |  Test. Acc: 72.89%


Training:   0%|          | 0/14 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/53 [00:00<?, ?it/s]

	Train Loss: 0.019 | Train Acc: 99.78%
	 Test. Loss: 1.258 |  Test. Acc: 73.96%


Training:   0%|          | 0/14 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/53 [00:00<?, ?it/s]

	Train Loss: 0.005 | Train Acc: 100.00%
	 Test. Loss: 1.543 |  Test. Acc: 75.66%
