In [28]:
# Install MedVit
!git clone https://github.com/Omid-Nejati/MedViT.git
# %cd /MedVit

Cloning into 'MedViT'...
remote: Enumerating objects: 130, done.[K
remote: Counting objects: 100% (129/129), done.[K
remote: Compressing objects: 100% (64/64), done.[K
remote: Total 130 (delta 58), reused 128 (delta 57), pack-reused 1[K
Receiving objects: 100% (130/130), 800.27 KiB | 22.86 MiB/s, done.
Resolving deltas: 100% (58/58), done.


In [29]:
%ls

[0m[01;34mMedViT[0m/


In [30]:
os.listdir('/kaggle/input/medvit-saved/MedViT_saved')

['config.json', 'preprocessor_config.json', 'model.safetensors']

In [31]:
import os
os.chdir('MedViT')

In [32]:
pip install chardet einops fire fvcore

Collecting chardet
  Downloading chardet-5.2.0-py3-none-any.whl.metadata (3.4 kB)
Collecting einops
  Downloading einops-0.8.0-py3-none-any.whl.metadata (12 kB)
Collecting fire
  Downloading fire-0.6.0.tar.gz (88 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.4/88.4 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting fvcore
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting yacs>=0.1.6 (from fvcore)
  Downloading yacs-0.1.8-py3-none-any.whl.metadata (639 bytes)
Collecting iopath>=0.1.7 (from fvcore)
  Downloading iopath-0.1.10.tar.gz (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Col

In [33]:
pip install evaluate transformers accelerate

Collecting evaluate
  Downloading evaluate-0.4.2-py3-none-any.whl.metadata (9.3 kB)
Downloading evaluate-0.4.2-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.2
Note: you may need to restart the kernel to use updated packages.


In [37]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"

from PIL import Image
# from torchinfo import summary
import torch
import os
import warnings
warnings.filterwarnings("ignore")
from typing import Tuple

from PIL import Image
import torch
import torch.nn as nn
# import torch.nn.functional as F
import torchvision.transforms as T
from torchvision.transforms import functional as F
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split

from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np

from datasets import load_dataset
import pandas as pd

import random

# import wandb
# wandb.login("1fa58b4e42c64c2531b3abeb43c04f5991be307e")

torch.cuda.empty_cache()


In [38]:
# 471257f8658cc55c4ec33930066c1c6d1f101821

In [39]:
print('Number CUDA Devices:', torch.cuda.device_count())
print ('Current cuda device: ', torch.cuda.current_device(), ' **May not correspond to nvidia-smi ID above, check visibility parameter')

Number CUDA Devices: 1
Current cuda device:  0  **May not correspond to nvidia-smi ID above, check visibility parameter


In [40]:
# Define configuration
from transformers import PretrainedConfig
from typing import List


class MedViTConfig(PretrainedConfig):
    model_type = "medvit"

    def __init__(
        self,
        stem_chs: List[int] = [64, 32, 64],
        depths: List[int] = [3, 4, 30, 3],
        path_dropout: float = 0.2,
        attn_drop: int = 0,
        drop: int = 0,
        num_classes: int = 5,
        strides: List[int] = [1, 2, 2, 2],
        sr_ratios: List[int] = [8, 4, 2, 1],
        head_dim: int = 32,
        mix_block_ratio: float = 0.75,
        use_checkpoint: bool = False,
        **kwargs
    ):
        self.stem_chs = stem_chs
        self.depths = depths
        self.path_dropout = path_dropout
        self.attn_drop = attn_drop
        self.drop = drop
        self.num_classes = num_classes
        self.strides = strides
        self.sr_ratios = sr_ratios
        self.head_dim = head_dim
        self.mix_block_ratio = mix_block_ratio
        self.use_checkpoint = use_checkpoint
        super().__init__(**kwargs)

medvit_config = MedViTConfig()

In [41]:
# Initialise a MedViT class
from transformers import PreTrainedModel
from MedViT import MedViT
print(MedViT)
class MedViTClassification(PreTrainedModel):
    config_class = MedViTConfig

    def __init__(self, config):
        super().__init__(config)
        self.model = MedViT(
        stem_chs = config.stem_chs,
        depths = config.depths,
        path_dropout = config.path_dropout,
        attn_drop = config.attn_drop,
        drop = config.drop,
        num_classes = config.num_classes,
        strides = config.strides,
        sr_ratios = config.sr_ratios,
        head_dim = config.head_dim,
        mix_block_ratio = config.mix_block_ratio,
        use_checkpoint = config.use_checkpoint,
        )

    def forward(self, pixel_values, labels=None):
        logits = self.model(pixel_values)
        # loss = torch.nn.CrossEntropyLoss(logits, labels)
        loss = torch.nn.functional.cross_entropy(logits, labels)
        return {"loss": loss, "logits": logits}

<class 'MedViT.MedViT'>


In [118]:
# Initialize a model
model = MedViTClassification(medvit_config)

initialize_weights...


In [43]:
%cd ../

/kaggle/working


In [44]:
os.listdir(r'../input/labels')

['image_labels.csv']

In [119]:
# dataset_folder_name = r"../input/short-imbalanced-dataset/short_imbalanced_dataset"
# dataset_folder_name = r"../input/dr-dataset"

def load_dataset_path2images(dataset_folder_name):
    train_test_folders = os.listdir(dataset_folder_name)
    datasets = {}
    for trts_split in train_test_folders:
        class_folders = os.listdir(dataset_folder_name+'/'+trts_split)
#         class_folders = os.listdir(dataset_folder_name + '\\' + trts_split)
        labels = []
        paths = []
        for class_folder in class_folders:
            image_names = os.listdir(dataset_folder_name+'/'+trts_split+'/'+class_folder)
            image_paths = [dataset_folder_name+'/'+trts_split+'/'+class_folder+'/'+x for x in image_names]
#             image_names = os.listdir(dataset_folder_name + '\\' + trts_split + '\\' + class_folder)
#             image_paths = [dataset_folder_name + '\\' + trts_split + '\\' + class_folder + '\\' + x for x in image_names]
            class_labels = [int(class_folder)] * len(image_paths)
            labels.extend(class_labels)
            paths.extend(image_paths)
        local_dataset = {'image_path' : paths, 'label' : labels}
        datasets[trts_split] = pd.DataFrame.from_dict(local_dataset)

    return datasets

# dataset = load_dataset_path2images(dataset_folder_name)
dataset = pd.read_csv(r'/kaggle/input/labels/image_labels.csv')
dataset['path'], dataset['ext'] = '/kaggle/input/dr-train/train/', '.jpeg'
dataset['image_path'] = dataset['path'] + dataset['image'] + dataset['ext']
dataset.drop(columns = ['image', 'path', 'ext'], inplace = True)
torch.cuda.empty_cache()

In [120]:

# oversampling just repeating minority class items
# enought times to be equal to major dataset in size

##############################################################################################

# max_size = train_dataset['label'].value_counts().max()
# lst = [train_dataset]
def resample(_dataset, ratio = 3):
    min_size = _dataset['label'].value_counts().min()
    lst = []
    added_unique_rows = 0
    all_n_rows = 0

    for class_index, group in _dataset.groupby('label'):
        # lst.append(group.sample(max_size-len(group), replace=True))
        all_n_rows += len(group)
        if class_index == 0:
            added_unique_rows += min_size*ratio
            lst.append(group.sample(min_size*ratio, replace=False))
        else:
            if len(group) > min_size*ratio:
                added_unique_rows += min_size*ratio
                lst.append(group.sample(min_size*ratio, replace=False))
            else:
                lst.append(group)
                added_unique_rows += len(group)
                lst.append(group.sample(min_size*ratio-len(group), replace=True))

    _dataset = pd.concat(lst)

    for class_index, group in _dataset.groupby('label'):
        print(f'{class_index}: length: {len(group)}')

    print('N_added_rows: ', added_unique_rows)
    print('N_all_rows: ', all_n_rows)
    print('Ratio of used rows: ', added_unique_rows/all_n_rows)
    return _dataset

In [121]:
dataset

Unnamed: 0,label,image_path
0,0,/kaggle/input/dr-train/train/10003_left.jpeg
1,0,/kaggle/input/dr-train/train/10003_right.jpeg
2,0,/kaggle/input/dr-train/train/10007_left.jpeg
3,0,/kaggle/input/dr-train/train/10007_right.jpeg
4,0,/kaggle/input/dr-train/train/10009_left.jpeg
...,...,...
8403,0,/kaggle/input/dr-train/train/19494_right.jpeg
8404,0,/kaggle/input/dr-train/train/19498_left.jpeg
8405,0,/kaggle/input/dr-train/train/19498_right.jpeg
8406,0,/kaggle/input/dr-train/train/194_left.jpeg


In [122]:
##############################################################################################
train_dataset = resample(dataset, ratio = 20)
# test_dataset = resample(dataset['short_test'], ratio = 1)

0: length: 3320
1: length: 3320
2: length: 3320
3: length: 3320
4: length: 3320
N_added_rows:  5578
N_all_rows:  8408
Ratio of used rows:  0.6634157944814463


In [123]:
mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
from datasets import Dataset
from transformers import ViTImageProcessor
from transformers import AutoImageProcessor

# model_name_or_path = 'google/vit-base-patch16-224-in21k'
# model_name_or_path = "microsoft/swinv2-tiny-patch4-window8-256"
model_name_or_path = "microsoft/swin-base-patch4-window12-384"
# model_name_or_path = "/kaggle/input/medvit-saved/MedViT_saved"

# processor = ViTImageProcessor.from_pretrained(model_name_or_path)
image_processor = AutoImageProcessor.from_pretrained(model_name_or_path)
# model = MedViTClassification.from_pretrained(model_name_or_path)

preprocessor_config.json:   0%|          | 0.00/255 [00:00<?, ?B/s]



In [124]:
from typing import Any
class Spot(object):
    def __init__(self, size, prob = 0.5):
        self.size = size
        self.prob = prob
        self.center = None
        self.radius = None
        self.zeros = torch.zeros((self.size, self.size)) #.cuda()
        self.ones = torch.ones((3, 1)) #.cuda()
        self.tensor_to_image = T.ToPILImage()
        self.image_to_tensor = T.ToTensor()

    def __call__(self, image_tensors, target = None):
        if random.random() < self.prob:
            image_tensors = self.image_to_tensor(image_tensors)
#             print('Yes')
#             modified_image_tensors = image_tensors.clone()
            # print(f'Min value: {torch.amin(modified_image_tensors)}')
            # print(f'Max value: {torch.amax(modified_image_tensors)}')
            n_spots = random.randint(5, 7)
            self.initial_mask = self.zeros.clone()

            self.dim1_offset = (image_tensors.shape[1] - self.size) // 2
            self.dim2_offset = (image_tensors.shape[2] - self.size) // 2

            for _ in range(n_spots):
                new_image_tensors = self.add_random_spot(image_tensors)
#                 modified_image_tensors = self.add_random_spot(modified_image_tensors)
            return torch.clamp(new_image_tensors, min = torch.amin(image_tensors), max = torch.amax(image_tensors))
#             return torch.clamp(modified_image_tensors, min = torch.amin(image_tensors), max = torch.amax(image_tensors))
        else: return image_tensors
        
    def add_random_spot(self, image_tensor):
        self.radius = random.randint(int(0.01 * self.size) + 1, int(0.05 * self.size))
        self.center = [random.randint(self.radius + 1, self.size - self.radius - 1), 
                       random.randint(self.radius + 1, self.size - self.radius - 1)]
        y, x = np.ogrid[: self.size, : self.size]
        dist_from_center = np.sqrt((x - self.center[0]) ** 2 + (y - self.center[1]) ** 2)
        circle = dist_from_center <= (self.radius // 2)

        k = 14 / 25 + (1.0 - self.radius / 25)
        beta = 0.5 + (1.5 - 0.5) * self.radius / 25
        A = k * self.ones.clone()
        d = 0.3 * self.radius / 25
        t = np.exp(-beta * d)

        spot_mask = self.zeros.clone()
        spot_mask[circle] = torch.multiply(A[0], torch.tensor(1 - t))

        self.initial_mask = self.initial_mask + spot_mask
        self.initial_mask[self.initial_mask != 0] = 1

        sigma = (5 + (2 - 0) * self.radius / 25) * 2
        rad_w = random.randint(int(sigma / 5), int(sigma / 4))
        rad_h = random.randint(int(sigma / 5), int(sigma / 4))

        if (rad_w % 2) == 0: rad_w = rad_w + 1
        if (rad_h % 2) == 0: rad_h = rad_h + 1

        spot_mask = F.gaussian_blur(torch.reshape(spot_mask, (1, self.size, self.size)), (rad_w, rad_h), sigma)
        spot_mask = torch.stack([spot_mask, spot_mask, spot_mask]) * 255
        
        image_tensor[:, self.dim1_offset : self.dim1_offset + self.size, self.dim2_offset : self.dim2_offset + self.size] += torch.reshape(spot_mask, (3, self.size, self.size))
        return image_tensor

class Halo(object):
    def __init__(self, size, prob = 0.5, intensity_range = (0.8, 1.2)):
        self.size = size
        self.prob = prob
        self.center = None
        self.radius = None
        self.intensity_range = intensity_range
        self.tensor_to_image = T.ToPILImage()
        self.image_to_tensor = T.ToTensor()

    def __call__(self, image_tensors, target = None):
        if random.random() < self.prob:
            image_tensors = self.image_to_tensor(image_tensors)
#             print('Yes')
#             modified_image_tensors = image_tensors.clone()
            # print(f'Min value: {torch.amin(modified_image_tensors)}')
            # print(f'Max value: {torch.amax(modified_image_tensors)}')
            n_halos = random.randint(5, 7)

            self.dim1_offset = (image_tensors.shape[1] - self.size) // 2
            self.dim2_offset = (image_tensors.shape[2] - self.size) // 2
            
            for _ in range(n_halos):
                image_tensors = self.add_random_halo(image_tensors)
#                 modified_image_tensors = self.add_random_halo(modified_image_tensors)
            return torch.clamp(image_tensors, min = torch.amin(image_tensors), max = torch.amax(image_tensors))
#             return torch.clamp(modified_image_tensors, min = torch.amin(image_tensors), max = torch.amax(image_tensors))
        else: return image_tensors

    def add_random_halo(self, image_tensor):
        self.radius = random.randint(int(0.01 * self.size), int(0.05 * self.size))
        self.center = [random.randint(self.radius + 1, self.size - self.radius - 1),
                        random.randint(self.radius + 1, self.size - self.radius - 1)]
        
        y, x = torch.meshgrid(torch.arange(self.size), torch.arange(self.size))
        dist_from_center = torch.sqrt(((x - self.center[0]) ** 2 + (y - self.center[1]) ** 2))
        normalized_dist = dist_from_center / self.radius
        
        halo_intensity = torch.clamp(self.intensity_range[0] + (self.intensity_range[1] - self.intensity_range[0]) * (1 - normalized_dist), min = 0, max = 1)
        halo_mask = dist_from_center <= self.radius // 2
        halo_effect = halo_intensity * (self.radius - dist_from_center) / self.radius
        halo_effect = np.clip(halo_effect, 0, 1)
        halo_effect = np.expand_dims(halo_effect, axis = 0)
        halo_effect = np.repeat(halo_effect, image_tensor.shape[0], axis = 0)
        image_tensor[:, halo_mask] = image_tensor[:, halo_mask] * (1 - halo_effect[:, halo_mask]) + halo_effect[:, halo_mask] * 255

        return image_tensor

class Hole(object):
    def __init__(self, size, prob = 0.5):
        self.size = size
        self.prob = prob
        self.center = None
        self.radius = None
        self.tensor_to_image = T.ToPILImage()
        self.image_to_tensor = T.ToTensor()

    def __call__(self, image_tensors, target = None):
        if random.random() < self.prob:
            image_tensors = self.image_to_tensor(image_tensors)
#             print('Yes')
#             modified_image_tensors = image_tensors.clone()
            # print(f'Min value: {torch.amin(modified_image_tensors)}')
            # print(f'Max value: {torch.amax(modified_image_tensors)}')
            n_halos = random.randint(5, 7)

            self.dim1_offset = (image_tensors.shape[1] - self.size) // 2
            self.dim2_offset = (image_tensors.shape[2] - self.size) // 2
            
            for _ in range(n_halos):
                image_tensors = self.add_random_hole(image_tensors)
#                 modified_image_tensors = self.add_random_hole(modified_image_tensors)
            return torch.clamp(image_tensors, min = torch.amin(image_tensors), max = torch.amax(image_tensors))
#             return torch.clamp(modified_image_tensors, min = torch.amin(image_tensors), max = torch.amax(image_tensors))
        else: return image_tensors

    def add_random_hole(self, image_tensor):
        self.radius = random.randint(int(0.01 * self.size), int(0.05 * self.size))
        self.center = [random.randint(self.radius + 1, self.size - self.radius - 1),
                        random.randint(self.radius + 1, self.size - self.radius - 1)]
        
        y, x = torch.meshgrid(torch.arange(self.size), torch.arange(self.size))
        dist_from_center = torch.sqrt(((x - self.center[0]) ** 2 + (y - self.center[1]) ** 2))
        
        hole_mask = dist_from_center <= self.radius // 2
        image_tensor[:, hole_mask] = 0

        return image_tensor

In [125]:
from PIL import ImageEnhance

def RandomSharpen(image, alpha = 0.2):
    sharpener = ImageEnhance.Sharpness(image)
    factor = 0.5  
    image = sharpener.enhance(1.0 + alpha * factor)
    return image

In [126]:
size = (
    image_processor.size["shortest_edge"]
    if "shortest_edge" in image_processor.size
    else (image_processor.size["height"], image_processor.size["width"])
)

print(size)

(384, 384)


In [127]:
_transforms_train = T.Compose([
    T.RandomHorizontalFlip(p = 0.5),
    T.RandomVerticalFlip(p = 0.5),
    T.RandomCrop(2000, padding_mode='symmetric', pad_if_needed=True),
#     Spot(size[0]),
    # Halo(),
    # Hole(),
#     T.Lambda(RandomSharpen),
    # Blur()
])

_transforms_test = T.Compose([
    T.CenterCrop(2000),
])

In [128]:
def load_image(path_image, label, mode):
    # load image
    try:
        image = Image.open(path_image)
#         print(image)
#         image.verify()  # Verify the image is valid
        if mode == 'train':
            image = _transforms_train(image)

            return image
        else:
            return image
    except (IOError, SyntaxError) as e:
        
        image = Image.open('/kaggle/input/dr-train/train/10003_left.jpeg')
#         image.verify()  # Verify the image is valid
        if mode == 'train':
            image = _transforms_train(image)

            return image
        else:
            return image


def func_transform(examples):

    # loaded_images = [load_image(path, lb, 'train').convert("RGB") for path, lb in zip(examples['image_path'], examples['label'])]
    # _transforms(img.convert("RGB"))
    inputs = image_processor([load_image(path, lb, 'train')
                                for path, lb in zip(examples['image_path'], examples['label'])], return_tensors='pt')
    inputs['label'] = examples['label']
    return inputs

    ###############################

    # examples["pixel_values"] = [load_image(path, lb, 'train')
    #                             for path, lb in zip(examples['image_path'], examples['label'])]
    # del examples["image_path"]
    # return examples



def func_transform_test(examples):

    # loaded_images = [load_image(path, lb, 'test').convert("RGB") for path, lb in zip(examples['image_path'], examples['label'])]
    inputs = image_processor([load_image(path, lb, 'test')
                                for path, lb in zip(examples['image_path'], examples['label'])], return_tensors='pt')
    inputs['label'] = examples['label']
    return inputs

    ########################################
    # examples["pixel_values"] = [load_image(path, lb, 'test')
    #                             for path, lb in zip(examples['image_path'], examples['label'])]
    # del examples["image_path"]
    # return examples

In [129]:
train_ds = Dataset.from_pandas(train_dataset, preserve_index=False)

In [130]:
train_test_dataset = train_ds.train_test_split(test_size = 0.20, seed = 42)
train_dataset, test_dataset = train_test_dataset['train'], train_test_dataset['test']
# train_dataset = Dataset.from_pandas(train_dataset, preserve_index=False)
# test_dataset = Dataset.from_pandas(test_dataset, preserve_index=False)

In [131]:
prepared_train_dataset = train_dataset.with_transform(func_transform)
prepared_test_dataset = test_dataset.with_transform(func_transform_test)
prepared_train_dataset = prepared_train_dataset.shuffle(seed = 42)
prepared_test_dataset = prepared_test_dataset.shuffle(seed = 42)

In [132]:
print("rows in train_dataset: ", len(prepared_train_dataset))
# print("rows in test_dataset: ", len(prepared_test_dataset))

# labels = prepared_ds_train.features["label"].names()
labels = [0, 1, 2, 3, 4]
label2id, id2label = dict(), dict()

for i, label in enumerate(labels):
    label2id[label] = i
    id2label[i] = label

print("ID2label: ", id2label)

rows in train_dataset:  13280
ID2label:  {0: 0, 1: 1, 2: 2, 3: 3, 4: 4}


In [133]:
def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        # 'tensor': torch.stack([x['tensor'] for x in batch]),
        'labels': torch.tensor([x['label'] for x in batch])
    }

def calculate_per_class_accuracy(confusion_matrix):
        num_classes = confusion_matrix.shape[0]
        per_class_accuracy = []

        for i in range(num_classes):
            TP = confusion_matrix[i, i]
            FN = np.sum(confusion_matrix[i, :]) - TP
            FP = np.sum(confusion_matrix[:, i]) - TP
            TN = np.sum(confusion_matrix) - (TP + FP + FN)

            accuracy = (TP + TN) / (TP + TN + FP + FN)
            per_class_accuracy.append(accuracy)

        return per_class_accuracy

In [134]:
from sklearn.metrics import cohen_kappa_score, confusion_matrix
from sklearn.metrics import f1_score #, kappa
# from sklearn import metrics

import evaluate

accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    predictions_proba, labels = eval_pred

    # print(predictions)
    predictions = np.argmax(predictions_proba, axis=1)
    # print(predictions)
    # print(labels)
    result_accuracy = accuracy.compute(predictions=predictions, references=labels)
    
    cm = confusion_matrix(labels, predictions)
    print(cm)
    perclass_acc = calculate_per_class_accuracy(cm)
    
#     print(f'per class accuracies: {perclass_acc}')
    
    
    result = {
             'accuracy': np.mean([result_accuracy['accuracy']]),
             'kappa': np.mean([cohen_kappa_score(labels, predictions, weights = "quadratic")]),
             # 'quadratic_kappa': np.mean([kappa(labels, predictions, weights = "quadratic")]),
             'f1': np.mean([f1_score(labels, predictions, average='weighted')]),
             'roc_auc': np.mean([roc_auc_score(labels, predictions_proba, multi_class='ovr')])
             'class_0' : perclass_acc[0],
             'class_1' : perclass_acc[1],
             'class_2' : perclass_acc[2],
             'class_3' : perclass_acc[3],
             'class_4' : perclass_acc[4],
             }

    
    
#     print(f"\nClass 0 Accuracy (vs Others): {class_0:.2f}%")
#     print(f"Other Classes Accuracy: {remaining_classes:.2f}%")
    
    # print(cohen_kappa_score(labels, predictions))
    # print(result)

    return result


In [135]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./MedViT-base",
    evaluation_strategy="steps",
    logging_steps=20,

    save_steps=20,
    eval_steps=20,
    save_total_limit=2,

    # report_to="wandb",  # enable logging to W&B
    # run_name="swin384_shrp_rt20",  # name of the W&B run (optional)

    remove_unused_columns=False,
    dataloader_num_workers = 2,

    learning_rate=2e-5,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=2,
    per_device_eval_batch_size=2,
    num_train_epochs=2,
    warmup_ratio=0.1,

    metric_for_best_model="kappa",
    greater_is_better = True,
    load_best_model_at_end=True,

    push_to_hub=False
)

In [136]:
sample_ids = np.random.choice(len(prepared_test_dataset), size=250, replace=False)
inv_sample_ids = np.setdiff1d(np.arange(len(prepared_test_dataset)), sample_ids)
val_ds = prepared_test_dataset.select(sample_ids)
test_ds = prepared_test_dataset.select(inv_sample_ids)

In [137]:
val_ds, test_ds

(Dataset({
     features: ['label', 'image_path'],
     num_rows: 250
 }),
 Dataset({
     features: ['label', 'image_path'],
     num_rows: 3070
 }))

In [138]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    train_dataset=prepared_train_dataset,
#     eval_dataset=prepared_test_dataset,
    eval_dataset=val_ds,
    tokenizer=image_processor,
)

In [139]:
torch.cuda.empty_cache()

In [None]:
# CUDA_LAUNCH_BLOCKING=1
train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Accuracy,Kappa,F1,Class 0,Class 1,Class 2,Class 3,Class 4
20,1.6169,1.633851,0.232,-0.003865,0.09401,0.76,0.784,0.832,0.856,0.232
40,1.6346,1.674526,0.208,0.006979,0.114595,0.756,0.7,0.828,0.808,0.324
60,1.6207,1.77085,0.16,-0.088584,0.133811,0.736,0.656,0.5,0.788,0.64
80,1.62,1.747423,0.22,0.037148,0.211334,0.724,0.612,0.716,0.832,0.556
100,1.6124,1.841839,0.224,-0.026912,0.196162,0.728,0.596,0.8,0.792,0.532
120,1.6223,1.752283,0.232,0.097714,0.185397,0.76,0.684,0.672,0.844,0.504
140,1.5858,1.721466,0.236,0.08424,0.219405,0.68,0.624,0.788,0.82,0.56
160,1.5734,1.792762,0.272,0.115527,0.258863,0.624,0.696,0.792,0.788,0.644
180,1.5871,1.713181,0.244,0.032871,0.209878,0.588,0.736,0.812,0.776,0.576
200,1.6442,1.778968,0.212,0.04272,0.20659,0.632,0.66,0.76,0.752,0.62


[[ 0  0  0  0 60]
 [ 0  0  0  0 54]
 [ 0  0  0  0 42]
 [ 0  0  0  1 35]
 [ 0  0  0  1 57]]
[[ 0 11  0  2 47]
 [ 0  7  0  3 44]
 [ 0  5  0  5 32]
 [ 1  2  0  0 33]
 [ 0 10  1  2 45]]
[[ 1 10 31  4 14]
 [ 0  7 27  6 14]
 [ 1 11 18  5  7]
 [ 1  3 21  2  9]
 [ 5 15 22  4 12]]
[[14 14 11  3 18]
 [ 9 15  9  0 21]
 [ 1 17  7  0 17]
 [ 5  6  9  1 15]
 [ 8 21  7  4 18]]
[[ 6 19  3  7 25]
 [ 4 23  1  4 22]
 [ 2 17  1  5 17]
 [ 4  9  2  6 15]
 [ 4 25  3  6 20]]
[[ 2 14 12  0 32]
 [ 1 12 17  0 24]
 [ 0  9  6  3 24]
 [ 1  2  9  2 22]
 [ 0 12  8  2 36]]
[[10 18  3  6 23]
 [13 13  7  1 20]
 [ 2 15  4  3 18]
 [ 7  4  2  5 18]
 [ 8 16  3  4 27]]
[[27  7  3  7 16]
 [21 10  9  4 10]
 [ 9 11  6  4 12]
 [15  2  1  5 13]
 [16 12  3  7 20]]
[[27  3  0  9 21]
 [19  3  7  3 22]
 [14  6  3  5 14]
 [16  0  1  4 15]
 [21  6  0  7 24]]
[[18 13  5  8 16]
 [13 11 12  2 16]
 [10 11  4  7 10]
 [15  4  2  4 11]
 [12 14  3 13 16]]
[[ 9 16  5  7 23]
 [12 15  6  1 20]
 [ 4 12  0  5 21]
 [ 7  4  1  4 20]
 [ 3 20  2  4 29]]

In [35]:
# 471257f8658cc55c4ec33930066c1c6d1f101821

In [None]:
metrics = trainer.evaluate(test_ds)
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)