In [1]:
#Thành viên nhóm:
#-Lương Công Hoàn
#-Huỳnh Quốc Trọng
#-Lê Văn Thành
#-Đỗ Hoàng Giang

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
from __future__ import print_function, division
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import models, transforms, utils
import time
import copy
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

In [4]:
class CustomMyDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        img_name = os.path.join(self.root_dir + self.data_frame.iloc[idx, 0] + '.jpg')
        image = io.imread(img_name)
        label = self.data_frame.iloc[idx, 1:]
        label = np.array(label)
        label = label.astype('float')

        if self.transform:
            image = self.transform(image).type('torch.FloatTensor')
            label = torch.from_numpy(label)

        return image, label

In [5]:
data_transforms = {
    'train': transforms.Compose([
        transforms.ToTensor(),
        transforms.RandomResizedCrop([600,450]),
        transforms.RandomHorizontalFlip(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize([600,450]),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}



In [6]:
train_data_dir = '/content/drive/MyDrive/data_val/ISIC2018_Task3_Training_Input/'
valid_data_dir = '/content/drive/MyDrive/data_val/ISIC2018_Task3_Validation_Input/'

train_groundtruth = "/content/drive/MyDrive/data_val/ISIC2018_Task3_Training_GroundTruth/ISIC2018_Task3_Training_GroundTruth.csv"
valid_groundtruth = "/content/drive/MyDrive/data_val/ISIC2018_Task3_Validation_GroundTruth/ISIC2018_Task3_Validation_GroundTruth.csv"

train_val_dataset_csv = {
    'train': train_groundtruth,
    'val': valid_groundtruth
}
train_val_dataset_dir = {
    'train': train_data_dir,
    'val': valid_data_dir
}
train_val_datasets = {x: CustomMyDataset(train_val_dataset_csv[x], train_val_dataset_dir[x],transform=data_transforms[x]) 
            for x in ['train', 'val']}
train_val_dataloaders = {x: torch.utils.data.DataLoader(train_val_datasets[x], batch_size=50,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}
dataset_sizes = {x: len(train_val_datasets[x]) for x in ['train', 'val']}

# class_names = datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [7]:
def train_model(dataloaders, model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0.0
              # Iterate over data.
            for inputs, labels in dataloaders[phase]:
              inputs = inputs.to(device)
              labels = labels.to(device)
              # zero the parameter gradients
              optimizer.zero_grad()

              # forward
              # track history if only in train
              with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                _, preds = torch.max(outputs, -1)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                      loss.backward()
                      optimizer.step()

              # statistics
              _, label = torch.max(labels, -1)
              running_loss += loss.item() * inputs.size(0)
              running_corrects += torch.sum(preds == label)
            if phase == 'train':
                 scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                print(f'Best val Acc: {best_acc:4f}')
                model.load_state_dict(best_model_wts)

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [8]:
import torch.nn as nn
import torchvision.models as models
class swin_v2_t_model(torch.nn.Module):
    def __init__(self, n_class):
        super(swin_v2_t_model, self).__init__()
        swinv2t = models.swin_v2_t(pretrained=True)
        base_layers = list(swinv2t.children())
        self.backbone = nn.Sequential(*base_layers[:5])
        for param in self.backbone.parameters():
          param.requires_grad = False
        self.cls = base_layers[5]
        num_ftrs = self.cls.in_features
        self.cls = nn.Linear(num_ftrs,n_class)
    def forward(self, x):
        x = self.backbone(x)
        x = self.cls(x)     
        return x

In [9]:
model_ft = swin_v2_t_model(7)
#Finetuningmodel_ft = swin_v2_t_model(7)
#Finetuning
model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

Downloading: "https://download.pytorch.org/models/swin_v2_t-b137f0e2.pth" to /root/.cache/torch/hub/checkpoints/swin_v2_t-b137f0e2.pth
100%|██████████| 109M/109M [00:01<00:00, 64.6MB/s]


In [10]:
#Train 5 epochs
model_ft = train_model(train_val_dataloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=9)

Epoch 0/8
----------
train Loss: 0.8414 Acc: 0.7110
val Loss: 0.8305 Acc: 0.7098
Best val Acc: 0.709845

Epoch 1/8
----------
train Loss: 0.7084 Acc: 0.7458
val Loss: 0.8174 Acc: 0.7047

Epoch 2/8
----------
train Loss: 0.6723 Acc: 0.7566
val Loss: 0.7271 Acc: 0.7513
Best val Acc: 0.751295

Epoch 3/8
----------
train Loss: 0.6553 Acc: 0.7631
val Loss: 0.7184 Acc: 0.7617
Best val Acc: 0.761658

Epoch 4/8
----------
train Loss: 0.6354 Acc: 0.7789
val Loss: 0.7532 Acc: 0.7617

Epoch 5/8
----------
train Loss: 0.6260 Acc: 0.7762
val Loss: 0.7884 Acc: 0.7254

Epoch 6/8
----------
train Loss: 0.6226 Acc: 0.7747
val Loss: 0.7238 Acc: 0.7617

Epoch 7/8
----------
train Loss: 0.5989 Acc: 0.7852
val Loss: 0.7048 Acc: 0.7772
Best val Acc: 0.777202

Epoch 8/8
----------
train Loss: 0.5958 Acc: 0.7848
val Loss: 0.7007 Acc: 0.7876
Best val Acc: 0.787565

Training complete in 75m 22s
Best val Acc: 0.787565


In [11]:
test_data_dir = '/content/drive/MyDrive/data_val/ISIC2018_Task3_Test_Input/'
test_groundtruth = '/content/drive/MyDrive/data_val/ISIC2018_Task3_Test_GroundTruth/ISIC2018_Task3_Test_GroundTruth.csv'
testdataset = CustomMyDataset(test_groundtruth , test_data_dir,transform=data_transforms['val'])
testdataloader = torch.utils.data.DataLoader(testdataset, batch_size = 10, shuffle=True, num_workers=4)

In [12]:
import torch
import torch.nn.functional as func
def test_model(model, testdataloader):
  predictions = []
  predictions_onehot = []
  for inputs, labels in testdataloader:
    model.eval()
    inputs = inputs.to(device)
    scores = model(inputs)
    _, pred = torch.max(scores, -1)
    _, label = torch.max(labels, -1)
    for i in range(0,inputs.size(0)):
      predictions.append(pred[i])
    ## print(predictions)
  model_ft.train()
  for i in range(0,len(predictions)):
    predictions_onehot.append(func.one_hot(predictions[i], num_classes = 7))
  return predictions_onehot

In [13]:
preds = test_model(model_ft, testdataloader)

In [14]:
from google.colab import files
upploaded = files.upload()

Saving SWIN_V2_T_LuongCongHoan.csv to SWIN_V2_T_LuongCongHoan.csv


In [15]:
testcsv=pd.read_csv(test_groundtruth)
print(testcsv.iloc[2,0])

ISIC_0034526


In [16]:
import csv
# Define the filename and open the CSV file
filename = 'SWIN_V2_T_LuongCongHoan.csv'
def write_csv(preds, testcsv, filename ):
  with open(filename, mode='w', newline='') as file:
    writer = csv.writer(file)
    # Write the headers for the two columns
    writer.writerow(['image', 'MEL', 'BCC', 'BCC', 'AKIEC', 'BKL', 'DF', 'VASC',])
    for i in range(0,len(preds)):
      writer.writerow([testcsv.iloc[i,0],preds[i][0].item(),preds[i][1].item(),preds[i][2].item(),preds[i][3].item(),preds[i][4].item(),preds[i][5].item(),preds[i][6].item()])


In [17]:
write_csv(preds, testcsv, filename )

In [18]:
files.download("SWIN_V2_T_LuongCongHoan.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>