# Task A: Fine-tune a pretrained model using the DeepDRiD dataset.
1.
Download the DeepDRiD dataset from the provided link along with the template code which would provide you with an explanation on diabetic retinopathy and the dataset itself. All the images for training, validation, and evaluation are 512 by 512 in size.

2.
Fine-tune an ImageNet pretrained model (e.g., ResNet18, ResNet34, VGG, EfficientNet, DenseNet) on the DeepDRiD dataset. Evaluate and test it on the validation and test sets. Your goal here is to reach the highest Cohen Kappa score you can get.

3.
Play around different image augmentation techniques and check whether it boosts your evaluation metrics or decreases them.

4.
Save the fine-tuned model.

In [None]:
import copy
import os
import random
import sys

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from PIL import Image
from sklearn.metrics import cohen_kappa_score, precision_score, recall_score, accuracy_score
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from torchvision.transforms.functional import to_pil_image
from tqdm import tqdm
import torch.optim as optim
import gdown

In [None]:
!pip install gdown
file_id = "1PK8X-rkPtE1fsSSF_nLUyjghzL7VPT-0"
url = f"https://drive.google.com/uc?id=1PK8X-rkPtE1fsSSF_nLUyjghzL7VPT-0"
output = "DeepDRiD.zip"
gdown.download(url, output, quiet=False)




Downloading...
From (original): https://drive.google.com/uc?id=1PK8X-rkPtE1fsSSF_nLUyjghzL7VPT-0
From (redirected): https://drive.google.com/uc?id=1PK8X-rkPtE1fsSSF_nLUyjghzL7VPT-0&confirm=t&uuid=e8a6703a-a195-42b8-a2db-e55be34483cd
To: /content/DeepDRiD.zip
100%|██████████| 99.1M/99.1M [00:01<00:00, 59.3MB/s]


'DeepDRiD.zip'

In [None]:
!unzip DeepDRiD.zip

Archive:  DeepDRiD.zip
   creating: DeepDRiD/
  inflating: DeepDRiD/sample_submission.csv  
   creating: DeepDRiD/test/
   creating: DeepDRiD/test/347/
  inflating: DeepDRiD/test/347/347_l1.jpg  
  inflating: DeepDRiD/test/347/347_l2.jpg  
  inflating: DeepDRiD/test/347/347_r1.jpg  
  inflating: DeepDRiD/test/347/347_r2.jpg  
   creating: DeepDRiD/test/353/
  inflating: DeepDRiD/test/353/353_l1.jpg  
  inflating: DeepDRiD/test/353/353_l2.jpg  
  inflating: DeepDRiD/test/353/353_r1.jpg  
  inflating: DeepDRiD/test/353/353_r2.jpg  
   creating: DeepDRiD/test/354/
  inflating: DeepDRiD/test/354/354_l1.jpg  
  inflating: DeepDRiD/test/354/354_l2.jpg  
  inflating: DeepDRiD/test/354/354_r1.jpg  
  inflating: DeepDRiD/test/354/354_r2.jpg  
   creating: DeepDRiD/test/366/
  inflating: DeepDRiD/test/366/366_l1.jpg  
  inflating: DeepDRiD/test/366/366_l2.jpg  
  inflating: DeepDRiD/test/366/366_r1.jpg  
  inflating: DeepDRiD/test/366/366_r2.jpg  
   creating: DeepDRiD/test/368/
  inflating: Dee

In [None]:
# Hyper Parameters
batch_size = 24
num_classes = 5  # 5 DR levels
learning_rate = 0.0001
num_epochs = 20

In [None]:
class RetinopathyDataset(Dataset):
    def __init__(self, ann_file, image_dir, transform=None, mode='single', test=False):
        self.ann_file = ann_file
        self.image_dir = image_dir
        self.transform = transform

        self.test = test
        self.mode = mode

        if self.mode == 'single':
            self.data = self.load_data()
        else:
            self.data = self.load_data_dual()

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        if self.mode == 'single':
            return self.get_item(index)
        else:
            return self.get_item_dual(index)

    # 1. single image
    def load_data(self):
        df = pd.read_csv(self.ann_file)

        data = []
        for _, row in df.iterrows():
            file_info = dict()
            file_info['img_path'] = os.path.join(self.image_dir, row['img_path'])
            if not self.test:
                file_info['dr_level'] = int(row['patient_DR_Level'])
            data.append(file_info)
        return data

    def get_item(self, index):
        data = self.data[index]
        img = Image.open(data['img_path']).convert('RGB')
        if self.transform:
            img = self.transform(img)

        if not self.test:
            label = torch.tensor(data['dr_level'], dtype=torch.int64)
            return img, label
        else:
            return img

    # 2. dual image
    def load_data_dual(self):
        df = pd.read_csv(self.ann_file)

        df['prefix'] = df['image_id'].str.split('_').str[0]  # The patient id of each image
        df['suffix'] = df['image_id'].str.split('_').str[1].str[0]  # The left or right eye
        grouped = df.groupby(['prefix', 'suffix'])

        data = []
        for (prefix, suffix), group in grouped:
            file_info = dict()
            file_info['img_path1'] = os.path.join(self.image_dir, group.iloc[0]['img_path'])
            file_info['img_path2'] = os.path.join(self.image_dir, group.iloc[1]['img_path'])
            if not self.test:
                file_info['dr_level'] = int(group.iloc[0]['patient_DR_Level'])
            data.append(file_info)
        return data

    def get_item_dual(self, index):
        data = self.data[index]
        img1 = Image.open(data['img_path1']).convert('RGB')
        img2 = Image.open(data['img_path2']).convert('RGB')

        if self.transform:
            img1 = self.transform(img1)
            img2 = self.transform(img2)

        if not self.test:
            label = torch.tensor(data['dr_level'], dtype=torch.int64)
            return [img1, img2], label
        else:
            return [img1, img2]

In [None]:
class CutOut(object):
    def __init__(self, mask_size, p=0.5):
        self.mask_size = mask_size
        self.p = p

    def __call__(self, img):
        if np.random.rand() > self.p:
            return img

        # Ensure the image is a tensor
        if not isinstance(img, torch.Tensor):
            raise TypeError('Input image must be a torch.Tensor')

        # Get height and width of the image
        h, w = img.shape[1], img.shape[2]
        mask_size_half = self.mask_size // 2
        offset = 1 if self.mask_size % 2 == 0 else 0

        cx = np.random.randint(mask_size_half, w + offset - mask_size_half)
        cy = np.random.randint(mask_size_half, h + offset - mask_size_half)

        xmin, xmax = cx - mask_size_half, cx + mask_size_half + offset
        ymin, ymax = cy - mask_size_half, cy + mask_size_half + offset
        xmin, xmax = max(0, xmin), min(w, xmax)
        ymin, ymax = max(0, ymin), min(h, ymax)

        img[:, ymin:ymax, xmin:xmax] = 0
        return img


class SLORandomPad:
    def __init__(self, size):
        self.size = size

    def __call__(self, img):
        pad_width = max(0, self.size[0] - img.width)
        pad_height = max(0, self.size[1] - img.height)
        pad_left = random.randint(0, pad_width)
        pad_top = random.randint(0, pad_height)
        pad_right = pad_width - pad_left
        pad_bottom = pad_height - pad_top
        return transforms.functional.pad(img, (pad_left, pad_top, pad_right, pad_bottom))


class FundRandomRotate:
    def __init__(self, prob, degree):
        self.prob = prob
        self.degree = degree

    def __call__(self, img):
        if random.random() < self.prob:
            angle = random.uniform(-self.degree, self.degree)
            return transforms.functional.rotate(img, angle)
        return img

In [None]:
transform_train = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomCrop((210, 210)),
    SLORandomPad((224, 224)),
    FundRandomRotate(prob=0.5, degree=30),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ColorJitter(brightness=(0.1, 0.9)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


def train_model(model, train_loader, val_loader, device, criterion, optimizer, lr_scheduler, num_epochs=25,
                checkpoint_path='model.pth'):
    best_model = model.state_dict()
    best_epoch = None
    best_val_kappa = -1.0  # Initialize the best kappa score

    for epoch in range(1, num_epochs + 1):
        print(f'\nEpoch {epoch}/{num_epochs}')
        running_loss = []
        all_preds = []
        all_labels = []

        model.train()

        with tqdm(total=len(train_loader), desc=f'Training', unit=' batch', file=sys.stdout) as pbar:
            for images, labels in train_loader:
                if not isinstance(images, list):
                    images = images.to(device)  # single image case
                else:
                    images = [x.to(device) for x in images]  # dual images case

                labels = labels.to(device)

                optimizer.zero_grad()

                outputs = model(images)
                loss = criterion(outputs, labels.long())

                loss.backward()
                optimizer.step()

                preds = torch.argmax(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

                running_loss.append(loss.item())

                pbar.set_postfix({'lr': f'{optimizer.param_groups[0]["lr"]:.1e}', 'Loss': f'{loss.item():.4f}'})
                pbar.update(1)

        lr_scheduler.step()

        epoch_loss = sum(running_loss) / len(running_loss)

        train_metrics = compute_metrics(all_preds, all_labels, per_class=True)
        kappa, accuracy, precision, recall = train_metrics[:4]

        print(f'[Train] Kappa: {kappa:.4f} Accuracy: {accuracy:.4f} '
              f'Precision: {precision:.4f} Recall: {recall:.4f} Loss: {epoch_loss:.4f}')

        if len(train_metrics) > 4:
            precision_per_class, recall_per_class = train_metrics[4:]
            for i, (precision, recall) in enumerate(zip(precision_per_class, recall_per_class)):
                print(f'[Train] Class {i}: Precision: {precision:.4f}, Recall: {recall:.4f}')

        # Evaluation on the validation set at the end of each epoch
        val_metrics = evaluate_model(model, val_loader, device)
        val_kappa, val_accuracy, val_precision, val_recall = val_metrics[:4]
        print(f'[Val] Kappa: {val_kappa:.4f} Accuracy: {val_accuracy:.4f} '
              f'Precision: {val_precision:.4f} Recall: {val_recall:.4f}')

        if val_kappa > best_val_kappa:
            best_val_kappa = val_kappa
            best_epoch = epoch
            best_model = model.state_dict()
            torch.save(best_model, checkpoint_path)

    print(f'[Val] Best kappa: {best_val_kappa:.4f}, Epoch {best_epoch}')

    return model, best_val_kappa

def evaluate_model(model, test_loader, device, test_only=False, prediction_path='./test_predictions.csv'):
    model.eval()

    all_preds = []
    all_labels = []
    all_image_ids = []

    with tqdm(total=len(test_loader), desc=f'Evaluating', unit=' batch', file=sys.stdout) as pbar:
        for i, data in enumerate(test_loader):

            if test_only:
                images = data
            else:
                images, labels = data

            if not isinstance(images, list):
                images = images.to(device)  # single image case
            else:
                images = [x.to(device) for x in images]  # dual images case

            with torch.no_grad():
                outputs = model(images)
                preds = torch.argmax(outputs, 1)

            if not isinstance(images, list):
                # single image case
                all_preds.extend(preds.cpu().numpy())
                image_ids = [
                    os.path.basename(test_loader.dataset.data[idx]['img_path']) for idx in
                    range(i * test_loader.batch_size, i * test_loader.batch_size + len(images))
                ]
                all_image_ids.extend(image_ids)
                if not test_only:
                    all_labels.extend(labels.numpy())
            else:
                # dual images case
                for k in range(2):
                    all_preds.extend(preds.cpu().numpy())
                    image_ids = [
                        os.path.basename(test_loader.dataset.data[idx][f'img_path{k + 1}']) for idx in
                        range(i * test_loader.batch_size, i * test_loader.batch_size + len(images[k]))
                    ]
                    all_image_ids.extend(image_ids)
                    if not test_only:
                        all_labels.extend(labels.numpy())

            pbar.update(1)

    # Save predictions to csv file for Kaggle online evaluation
    if test_only:
        df = pd.DataFrame({
            'ID': all_image_ids,
            'TARGET': all_preds
        })
        df.to_csv(prediction_path, index=False)
        print(f'[Test] Save predictions to {os.path.abspath(prediction_path)}')
    else:
        metrics = compute_metrics(all_preds, all_labels)
        return metrics


def compute_metrics(preds, labels, per_class=False):
    kappa = cohen_kappa_score(labels, preds, weights='quadratic')
    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds, average='weighted', zero_division=0)
    recall = recall_score(labels, preds, average='weighted', zero_division=0)

    # Calculate and print precision and recall for each class
    if per_class:
        precision_per_class = precision_score(labels, preds, average=None, zero_division=0)
        recall_per_class = recall_score(labels, preds, average=None, zero_division=0)
        return kappa, accuracy, precision, recall, precision_per_class, recall_per_class

    return kappa, accuracy, precision, recall



In [None]:
# Define model classes as given in the base code
class MyModel(nn.Module):
    def __init__(self, backbone, num_classes=5, dropout_rate=0.5):
        super().__init__()

        self.backbone = backbone
        self.backbone.fc = nn.Identity()  # Remove original classification layer

        self.fc = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_rate),
            nn.Linear(256, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_rate),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.backbone(x)
        x = self.fc(x)
        return x

class VGG16Model(nn.Module):
    def __init__(self, backbone, num_classes=5, dropout_rate=0.5):
        super().__init__()

        # Use the VGG16 feature extractor
        self.backbone = backbone
        self.backbone.classifier = nn.Identity()  # Remove VGG16's original classifier

        # Add adaptive pooling to ensure consistent output
        self.adaptive_pool = nn.AdaptiveAvgPool2d((7, 7))

        # Define the custom classifier
        self.fc = nn.Sequential(
            nn.Linear(512 * 7 * 7, 256),  # Output size of adaptive pooling is (512, 7, 7)
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_rate),
            nn.Linear(256, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_rate),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.backbone.features(x)  # Extract features
        x = self.adaptive_pool(x)      # Apply adaptive pooling
        x = torch.flatten(x, 1)        # Flatten the output
        x = self.fc(x)                 # Pass through the custom classifier
        return x

class MyDualModel(nn.Module):
    def __init__(self, backbone, num_classes=5, dropout_rate=0.5):
        super().__init__()

        backbone.fc = nn.Identity()

        self.backbone1 = copy.deepcopy(backbone)
        self.backbone2 = copy.deepcopy(backbone)

        self.fc = nn.Sequential(
            nn.Linear(512 * 2, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_rate),
            nn.Linear(256, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_rate),
            nn.Linear(128, num_classes)
        )

    def forward(self, images):
        image1, image2 = images

        x1 = self.backbone1(image1)
        x2 = self.backbone2(image2)

        x = torch.cat((x1, x2), dim=1)
        x = self.fc(x)
        return x

In [None]:
def get_model(backbone_name, mode, num_classes=5):
    if backbone_name == 'vgg16':
        backbone = models.vgg16(pretrained=True)
        if mode == 'single':
            return VGG16Model(backbone, num_classes=num_classes)
        elif mode == 'dual':
            return MyDualModel(backbone, num_classes=num_classes)  # Modify if you need a dual-mode VGG16
    else:
        backbone = getattr(models, backbone_name)(pretrained=True)
        if mode == 'single':
            return MyModel(backbone, num_classes=num_classes)
        elif mode == 'dual':
            return MyDualModel(backbone, num_classes=num_classes)

In [None]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
models_to_test = ['resnet18', 'resnet34']
#vgg_16  'densenet121', 'efficientnet_b0' didnt work expected other input sizes
results = {}

 # Default transform_train for reference
default_transform_train = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomCrop((210, 210)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.ColorJitter(brightness=(0.1, 0.9)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

augmentations = [
        ("Default", default_transform_train),
        ("With CutOut", transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.RandomCrop((210, 210)),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomVerticalFlip(p=0.5),
            transforms.ColorJitter(brightness=(0.1, 0.9)),
            transforms.ToTensor(),
            CutOut(mask_size=32, p=0.5),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])),
        ("With SLO Padding", transforms.Compose([
            transforms.Resize((256, 256)),
            SLORandomPad((224, 224)),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomVerticalFlip(p=0.5),
            transforms.ColorJitter(brightness=(0.1, 0.9)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])),
        ("With Fundus Rotation", transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.RandomCrop((210, 210)),
            FundRandomRotate(prob=0.7, degree=45),  # Increased rotation range for this experiment
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomVerticalFlip(p=0.5),
            transforms.ColorJitter(brightness=(0.1, 0.9)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])),
        ("Combined", transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.RandomCrop((210, 210)),
            SLORandomPad((224, 224)),
            FundRandomRotate(prob=0.5, degree=30),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomVerticalFlip(p=0.5),
            transforms.ColorJitter(brightness=(0.1, 0.9)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]))
    ]

transform_test = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

for backbone_name in models_to_test:
        for mode in ['single', 'dual']:
            for aug_name, transform_train in augmentations:
                # Load datasets with augmentations
                train_dataset = RetinopathyDataset('./DeepDRiD/train.csv', './DeepDRiD/train/', transform_train, mode)
                val_dataset = RetinopathyDataset('./DeepDRiD/val.csv', './DeepDRiD/val/', transform_test, mode)
                test_dataset = RetinopathyDataset('./DeepDRiD/test.csv', './DeepDRiD/test/', transform_test, mode, test=True)

                train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
                val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
                test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

                checkpoint_path = f"./{backbone_name}_{mode}_{aug_name.replace(' ', '_')}.pth"
                '''
                # Check if the checkpoint already exists
                if os.path.exists(checkpoint_path):
                    print(f"Checkpoint for {backbone_name} in {mode} mode with {aug_name} augmentation already exists. Skipping training.")
                    # Optionally, you can load the model here if you want to continue from the checkpoint
                    # model.load_state_dict(torch.load(checkpoint_path))
                    continue  # Skip this iteration if the checkpoint exists
                '''
                model = get_model(backbone_name, mode).to(device)
                criterion = nn.CrossEntropyLoss()
                optimizer = optim.Adam(model.parameters(), lr=0.001)
                lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

                print(f"Training {backbone_name} in {mode} mode with {aug_name} augmentations...")
                checkpoint_path = f"./{backbone_name}_{mode}_{aug_name.replace(' ', '_')}.pth"
                model, best_kappa = train_model(model, train_loader, val_loader, device, criterion, optimizer, lr_scheduler, num_epochs=5, checkpoint_path=checkpoint_path)
                results[f"{backbone_name}_{mode}_{aug_name}"] = best_kappa

 # Summarize results
best_model_name = max(results, key=results.get)
print(f"Best model: {best_model_name} with score {results[best_model_name]}")
print("Results:", results)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 104MB/s]


Training resnet18 in single mode with Default augmentations...

Epoch 1/5
Training: 100%|██████████| 38/38 [00:23<00:00,  1.65 batch/s, lr=1.0e-03, Loss=1.0995]
[Train] Kappa: 0.4783 Accuracy: 0.4250 Precision: 0.3882 Recall: 0.4250 Loss: 1.3730
[Train] Class 0: Precision: 0.6599, Recall: 0.8083
[Train] Class 1: Precision: 0.3250, Recall: 0.2708
[Train] Class 2: Precision: 0.2802, Recall: 0.2417
[Train] Class 3: Precision: 0.2921, Recall: 0.3833
[Train] Class 4: Precision: 0.1081, Recall: 0.0333
Evaluating: 100%|██████████| 13/13 [00:03<00:00,  3.57 batch/s]
[Val] Kappa: 0.5564 Accuracy: 0.5025 Precision: 0.3436 Recall: 0.5025

Epoch 2/5
Training: 100%|██████████| 38/38 [00:10<00:00,  3.60 batch/s, lr=1.0e-03, Loss=1.1421]
[Train] Kappa: 0.5727 Accuracy: 0.4658 Precision: 0.4256 Recall: 0.4658 Loss: 1.2589
[Train] Class 0: Precision: 0.7483, Recall: 0.9083
[Train] Class 1: Precision: 0.3398, Recall: 0.3625
[Train] Class 2: Precision: 0.2278, Recall: 0.1708
[Train] Class 3: Precision: 0



Training resnet18 in single mode with With CutOut augmentations...

Epoch 1/5
Training: 100%|██████████| 38/38 [00:12<00:00,  3.05 batch/s, lr=1.0e-03, Loss=1.2961]
[Train] Kappa: 0.4375 Accuracy: 0.3992 Precision: 0.3767 Recall: 0.3992 Loss: 1.4132
[Train] Class 0: Precision: 0.6499, Recall: 0.7889
[Train] Class 1: Precision: 0.2329, Recall: 0.2125
[Train] Class 2: Precision: 0.2600, Recall: 0.1625
[Train] Class 3: Precision: 0.2656, Recall: 0.4250
[Train] Class 4: Precision: 0.3000, Recall: 0.0250
Evaluating: 100%|██████████| 13/13 [00:03<00:00,  3.45 batch/s]
[Val] Kappa: 0.3607 Accuracy: 0.4150 Precision: 0.4056 Recall: 0.4150

Epoch 2/5
Training: 100%|██████████| 38/38 [00:10<00:00,  3.78 batch/s, lr=1.0e-03, Loss=1.2581]
[Train] Kappa: 0.5054 Accuracy: 0.4333 Precision: 0.3709 Recall: 0.4333 Loss: 1.3162
[Train] Class 0: Precision: 0.7152, Recall: 0.9000
[Train] Class 1: Precision: 0.2412, Recall: 0.2000
[Train] Class 2: Precision: 0.2511, Recall: 0.2417
[Train] Class 3: Precisio



Training resnet18 in single mode with With SLO Padding augmentations...

Epoch 1/5
Training: 100%|██████████| 38/38 [00:11<00:00,  3.19 batch/s, lr=1.0e-03, Loss=1.1157]
[Train] Kappa: 0.4167 Accuracy: 0.4275 Precision: 0.3916 Recall: 0.4275 Loss: 1.3477
[Train] Class 0: Precision: 0.6892, Recall: 0.8500
[Train] Class 1: Precision: 0.2558, Recall: 0.1833
[Train] Class 2: Precision: 0.2872, Recall: 0.3375
[Train] Class 3: Precision: 0.2770, Recall: 0.3208
[Train] Class 4: Precision: 0.2083, Recall: 0.0417
Evaluating: 100%|██████████| 13/13 [00:02<00:00,  6.01 batch/s]
[Val] Kappa: 0.5655 Accuracy: 0.3325 Precision: 0.3647 Recall: 0.3325

Epoch 2/5
Training: 100%|██████████| 38/38 [00:12<00:00,  3.10 batch/s, lr=1.0e-03, Loss=1.3773]
[Train] Kappa: 0.5729 Accuracy: 0.4792 Precision: 0.4289 Recall: 0.4792 Loss: 1.2229
[Train] Class 0: Precision: 0.7405, Recall: 0.9750
[Train] Class 1: Precision: 0.3865, Recall: 0.3333
[Train] Class 2: Precision: 0.2297, Recall: 0.2000
[Train] Class 3: Pre



Training resnet18 in single mode with With Fundus Rotation augmentations...

Epoch 1/5
Training: 100%|██████████| 38/38 [00:10<00:00,  3.62 batch/s, lr=1.0e-03, Loss=1.2634]
[Train] Kappa: 0.4390 Accuracy: 0.4258 Precision: 0.3974 Recall: 0.4258 Loss: 1.3712
[Train] Class 0: Precision: 0.6599, Recall: 0.8139
[Train] Class 1: Precision: 0.2982, Recall: 0.2708
[Train] Class 2: Precision: 0.2696, Recall: 0.2583
[Train] Class 3: Precision: 0.3011, Recall: 0.3375
[Train] Class 4: Precision: 0.2564, Recall: 0.0833
Evaluating: 100%|██████████| 13/13 [00:02<00:00,  4.61 batch/s]
[Val] Kappa: 0.2777 Accuracy: 0.4450 Precision: 0.2654 Recall: 0.4450

Epoch 2/5
Training: 100%|██████████| 38/38 [00:10<00:00,  3.68 batch/s, lr=1.0e-03, Loss=1.3714]
[Train] Kappa: 0.6162 Accuracy: 0.4983 Precision: 0.4569 Recall: 0.4983 Loss: 1.2073
[Train] Class 0: Precision: 0.7164, Recall: 0.9333
[Train] Class 1: Precision: 0.3889, Recall: 0.3792
[Train] Class 2: Precision: 0.3048, Recall: 0.2375
[Train] Class 3:



Training resnet18 in single mode with Combined augmentations...

Epoch 1/5
Training: 100%|██████████| 38/38 [00:10<00:00,  3.47 batch/s, lr=1.0e-03, Loss=1.7790]
[Train] Kappa: 0.3938 Accuracy: 0.3975 Precision: 0.3513 Recall: 0.3975 Loss: 1.4055
[Train] Class 0: Precision: 0.5743, Recall: 0.7944
[Train] Class 1: Precision: 0.2374, Recall: 0.2542
[Train] Class 2: Precision: 0.2915, Recall: 0.2708
[Train] Class 3: Precision: 0.3073, Recall: 0.2625
[Train] Class 4: Precision: 0.1176, Recall: 0.0167
Evaluating: 100%|██████████| 13/13 [00:02<00:00,  5.66 batch/s]
[Val] Kappa: 0.4264 Accuracy: 0.4125 Precision: 0.3430 Recall: 0.4125

Epoch 2/5
Training: 100%|██████████| 38/38 [00:11<00:00,  3.43 batch/s, lr=1.0e-03, Loss=1.6788]
[Train] Kappa: 0.5324 Accuracy: 0.4700 Precision: 0.4282 Recall: 0.4700 Loss: 1.2899
[Train] Class 0: Precision: 0.7438, Recall: 0.9111
[Train] Class 1: Precision: 0.3021, Recall: 0.2417
[Train] Class 2: Precision: 0.3252, Recall: 0.2792
[Train] Class 3: Precision: 



Training resnet18 in dual mode with Default augmentations...

Epoch 1/5
Training: 100%|██████████| 19/19 [00:10<00:00,  1.78 batch/s, lr=1.0e-03, Loss=1.3856]
[Train] Kappa: 0.3247 Accuracy: 0.3950 Precision: 0.3585 Recall: 0.3950 Loss: 1.4489
[Train] Class 0: Precision: 0.5773, Recall: 0.7056
[Train] Class 1: Precision: 0.2840, Recall: 0.3833
[Train] Class 2: Precision: 0.2778, Recall: 0.1667
[Train] Class 3: Precision: 0.3386, Recall: 0.3583
[Train] Class 4: Precision: 0.0526, Recall: 0.0167
Evaluating: 100%|██████████| 7/7 [00:02<00:00,  3.25 batch/s]
[Val] Kappa: 0.3119 Accuracy: 0.4250 Precision: 0.3519 Recall: 0.4250

Epoch 2/5
Training: 100%|██████████| 19/19 [00:10<00:00,  1.82 batch/s, lr=1.0e-03, Loss=1.2603]
[Train] Kappa: 0.4750 Accuracy: 0.4500 Precision: 0.3988 Recall: 0.4500 Loss: 1.3549
[Train] Class 0: Precision: 0.7476, Recall: 0.8556
[Train] Class 1: Precision: 0.2587, Recall: 0.3083
[Train] Class 2: Precision: 0.2787, Recall: 0.1417
[Train] Class 3: Precision: 0.335



Training resnet18 in dual mode with With CutOut augmentations...

Epoch 1/5
Training: 100%|██████████| 19/19 [00:10<00:00,  1.78 batch/s, lr=1.0e-03, Loss=1.5238]
[Train] Kappa: 0.3044 Accuracy: 0.3767 Precision: 0.3356 Recall: 0.3767 Loss: 1.4332
[Train] Class 0: Precision: 0.5690, Recall: 0.7556
[Train] Class 1: Precision: 0.2339, Recall: 0.2417
[Train] Class 2: Precision: 0.2846, Recall: 0.2917
[Train] Class 3: Precision: 0.2874, Recall: 0.2083
[Train] Class 4: Precision: 0.0370, Recall: 0.0167
Evaluating: 100%|██████████| 7/7 [00:02<00:00,  3.19 batch/s]
[Val] Kappa: 0.4672 Accuracy: 0.4300 Precision: 0.3144 Recall: 0.4300

Epoch 2/5
Training: 100%|██████████| 19/19 [00:10<00:00,  1.79 batch/s, lr=1.0e-03, Loss=1.1916]
[Train] Kappa: 0.4516 Accuracy: 0.4133 Precision: 0.3757 Recall: 0.4133 Loss: 1.3964
[Train] Class 0: Precision: 0.7050, Recall: 0.7833
[Train] Class 1: Precision: 0.2653, Recall: 0.2167
[Train] Class 2: Precision: 0.2595, Recall: 0.3417
[Train] Class 3: Precision: 0



Training resnet18 in dual mode with With SLO Padding augmentations...

Epoch 1/5
Training: 100%|██████████| 19/19 [00:11<00:00,  1.59 batch/s, lr=1.0e-03, Loss=1.7952]
[Train] Kappa: 0.3595 Accuracy: 0.3633 Precision: 0.3354 Recall: 0.3633 Loss: 1.4877
[Train] Class 0: Precision: 0.6055, Recall: 0.7333
[Train] Class 1: Precision: 0.2407, Recall: 0.2167
[Train] Class 2: Precision: 0.2000, Recall: 0.2667
[Train] Class 3: Precision: 0.2653, Recall: 0.2167
[Train] Class 4: Precision: 0.1250, Recall: 0.0333
Evaluating: 100%|██████████| 7/7 [00:02<00:00,  2.79 batch/s]
[Val] Kappa: 0.0587 Accuracy: 0.3050 Precision: 0.1165 Recall: 0.3050

Epoch 2/5
Training: 100%|██████████| 19/19 [00:11<00:00,  1.64 batch/s, lr=1.0e-03, Loss=1.2298]
[Train] Kappa: 0.4197 Accuracy: 0.4267 Precision: 0.3920 Recall: 0.4267 Loss: 1.3435
[Train] Class 0: Precision: 0.7500, Recall: 0.8667
[Train] Class 1: Precision: 0.2955, Recall: 0.4333
[Train] Class 2: Precision: 0.1957, Recall: 0.1500
[Train] Class 3: Precisi



Training resnet18 in dual mode with With Fundus Rotation augmentations...

Epoch 1/5
Training: 100%|██████████| 19/19 [00:10<00:00,  1.77 batch/s, lr=1.0e-03, Loss=1.3232]
[Train] Kappa: 0.3790 Accuracy: 0.3967 Precision: 0.3468 Recall: 0.3967 Loss: 1.3888
[Train] Class 0: Precision: 0.6025, Recall: 0.8167
[Train] Class 1: Precision: 0.2530, Recall: 0.1750
[Train] Class 2: Precision: 0.2958, Recall: 0.1750
[Train] Class 3: Precision: 0.2816, Recall: 0.4083
[Train] Class 4: Precision: 0.0000, Recall: 0.0000
Evaluating: 100%|██████████| 7/7 [00:02<00:00,  3.24 batch/s]
[Val] Kappa: 0.6254 Accuracy: 0.4900 Precision: 0.2760 Recall: 0.4900

Epoch 2/5
Training: 100%|██████████| 19/19 [00:10<00:00,  1.76 batch/s, lr=1.0e-03, Loss=1.6672]
[Train] Kappa: 0.4976 Accuracy: 0.4483 Precision: 0.4513 Recall: 0.4483 Loss: 1.3295
[Train] Class 0: Precision: 0.7413, Recall: 0.8278
[Train] Class 1: Precision: 0.2798, Recall: 0.3917
[Train] Class 2: Precision: 0.2889, Recall: 0.2167
[Train] Class 3: Pre



Training resnet18 in dual mode with Combined augmentations...

Epoch 1/5
Training: 100%|██████████| 19/19 [00:11<00:00,  1.72 batch/s, lr=1.0e-03, Loss=1.2841]
[Train] Kappa: 0.3073 Accuracy: 0.3583 Precision: 0.3213 Recall: 0.3583 Loss: 1.4380
[Train] Class 0: Precision: 0.6150, Recall: 0.7278
[Train] Class 1: Precision: 0.2143, Recall: 0.2000
[Train] Class 2: Precision: 0.2013, Recall: 0.2583
[Train] Class 3: Precision: 0.2685, Recall: 0.2417
[Train] Class 4: Precision: 0.0000, Recall: 0.0000
Evaluating: 100%|██████████| 7/7 [00:02<00:00,  3.20 batch/s]
[Val] Kappa: 0.5161 Accuracy: 0.4450 Precision: 0.2771 Recall: 0.4450

Epoch 2/5
Training: 100%|██████████| 19/19 [00:11<00:00,  1.72 batch/s, lr=1.0e-03, Loss=1.2895]
[Train] Kappa: 0.4658 Accuracy: 0.4400 Precision: 0.4166 Recall: 0.4400 Loss: 1.3509
[Train] Class 0: Precision: 0.7130, Recall: 0.8833
[Train] Class 1: Precision: 0.2328, Recall: 0.2250
[Train] Class 2: Precision: 0.2969, Recall: 0.1583
[Train] Class 3: Precision: 0.29

Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth
100%|██████████| 83.3M/83.3M [00:00<00:00, 93.0MB/s]


Training resnet34 in single mode with Default augmentations...

Epoch 1/5
Training: 100%|██████████| 38/38 [00:12<00:00,  2.93 batch/s, lr=1.0e-03, Loss=1.4693]
[Train] Kappa: 0.4413 Accuracy: 0.4142 Precision: 0.3685 Recall: 0.4142 Loss: 1.3831
[Train] Class 0: Precision: 0.6818, Recall: 0.8333
[Train] Class 1: Precision: 0.2594, Recall: 0.2875
[Train] Class 2: Precision: 0.2601, Recall: 0.2417
[Train] Class 3: Precision: 0.2805, Recall: 0.2875
[Train] Class 4: Precision: 0.0400, Recall: 0.0083
Evaluating: 100%|██████████| 13/13 [00:02<00:00,  5.40 batch/s]
[Val] Kappa: 0.5947 Accuracy: 0.4775 Precision: 0.4659 Recall: 0.4775

Epoch 2/5
Training: 100%|██████████| 38/38 [00:12<00:00,  2.95 batch/s, lr=1.0e-03, Loss=1.5012]
[Train] Kappa: 0.4922 Accuracy: 0.4350 Precision: 0.3885 Recall: 0.4350 Loss: 1.3193
[Train] Class 0: Precision: 0.7332, Recall: 0.9083
[Train] Class 1: Precision: 0.2339, Recall: 0.2125
[Train] Class 2: Precision: 0.2222, Recall: 0.1667
[Train] Class 3: Precision: 0



Training resnet34 in single mode with With CutOut augmentations...

Epoch 1/5
Training: 100%|██████████| 38/38 [00:12<00:00,  2.96 batch/s, lr=1.0e-03, Loss=1.3464]
[Train] Kappa: 0.3079 Accuracy: 0.3492 Precision: 0.3395 Recall: 0.3492 Loss: 1.4664
[Train] Class 0: Precision: 0.6415, Recall: 0.6361
[Train] Class 1: Precision: 0.2103, Recall: 0.2208
[Train] Class 2: Precision: 0.2137, Recall: 0.2083
[Train] Class 3: Precision: 0.2757, Recall: 0.3458
[Train] Class 4: Precision: 0.0714, Recall: 0.0333
Evaluating: 100%|██████████| 13/13 [00:02<00:00,  5.38 batch/s]
[Val] Kappa: 0.6648 Accuracy: 0.4850 Precision: 0.3266 Recall: 0.4850

Epoch 2/5
Training: 100%|██████████| 38/38 [00:12<00:00,  2.95 batch/s, lr=1.0e-03, Loss=1.1851]
[Train] Kappa: 0.4789 Accuracy: 0.4375 Precision: 0.3856 Recall: 0.4375 Loss: 1.3477
[Train] Class 0: Precision: 0.6320, Recall: 0.9444
[Train] Class 1: Precision: 0.2818, Recall: 0.2125
[Train] Class 2: Precision: 0.2400, Recall: 0.1250
[Train] Class 3: Precisio



Training resnet34 in single mode with With SLO Padding augmentations...

Epoch 1/5
Training: 100%|██████████| 38/38 [00:14<00:00,  2.58 batch/s, lr=1.0e-03, Loss=1.2173]
[Train] Kappa: 0.3980 Accuracy: 0.4017 Precision: 0.3630 Recall: 0.4017 Loss: 1.4043
[Train] Class 0: Precision: 0.6750, Recall: 0.7500
[Train] Class 1: Precision: 0.2667, Recall: 0.2667
[Train] Class 2: Precision: 0.2500, Recall: 0.2917
[Train] Class 3: Precision: 0.2857, Recall: 0.3250
[Train] Class 4: Precision: 0.0000, Recall: 0.0000
Evaluating: 100%|██████████| 13/13 [00:03<00:00,  4.09 batch/s]
[Val] Kappa: 0.5604 Accuracy: 0.4000 Precision: 0.3185 Recall: 0.4000

Epoch 2/5
Training: 100%|██████████| 38/38 [00:14<00:00,  2.58 batch/s, lr=1.0e-03, Loss=1.1921]
[Train] Kappa: 0.4833 Accuracy: 0.4367 Precision: 0.3946 Recall: 0.4367 Loss: 1.3490
[Train] Class 0: Precision: 0.6694, Recall: 0.9000
[Train] Class 1: Precision: 0.2687, Recall: 0.2250
[Train] Class 2: Precision: 0.2682, Recall: 0.2458
[Train] Class 3: Pre



Training resnet34 in single mode with With Fundus Rotation augmentations...

Epoch 1/5
Training: 100%|██████████| 38/38 [00:13<00:00,  2.90 batch/s, lr=1.0e-03, Loss=1.1312]
[Train] Kappa: 0.4338 Accuracy: 0.3967 Precision: 0.3718 Recall: 0.3967 Loss: 1.3867
[Train] Class 0: Precision: 0.6739, Recall: 0.7806
[Train] Class 1: Precision: 0.3118, Recall: 0.2417
[Train] Class 2: Precision: 0.1984, Recall: 0.2042
[Train] Class 3: Precision: 0.2755, Recall: 0.3375
[Train] Class 4: Precision: 0.1250, Recall: 0.0583
Evaluating: 100%|██████████| 13/13 [00:02<00:00,  5.22 batch/s]
[Val] Kappa: 0.5955 Accuracy: 0.4375 Precision: 0.2907 Recall: 0.4375

Epoch 2/5
Training: 100%|██████████| 38/38 [00:13<00:00,  2.87 batch/s, lr=1.0e-03, Loss=1.4538]
[Train] Kappa: 0.4959 Accuracy: 0.4242 Precision: 0.3699 Recall: 0.4242 Loss: 1.3217
[Train] Class 0: Precision: 0.6978, Recall: 0.8917
[Train] Class 1: Precision: 0.2500, Recall: 0.2542
[Train] Class 2: Precision: 0.2033, Recall: 0.1542
[Train] Class 3:



Training resnet34 in single mode with Combined augmentations...

Epoch 1/5
Training: 100%|██████████| 38/38 [00:13<00:00,  2.77 batch/s, lr=1.0e-03, Loss=1.4773]
[Train] Kappa: 0.2986 Accuracy: 0.3767 Precision: 0.3368 Recall: 0.3767 Loss: 1.4455
[Train] Class 0: Precision: 0.6210, Recall: 0.7056
[Train] Class 1: Precision: 0.2607, Recall: 0.2792
[Train] Class 2: Precision: 0.2574, Recall: 0.3250
[Train] Class 3: Precision: 0.2345, Recall: 0.2208
[Train] Class 4: Precision: 0.0000, Recall: 0.0000
Evaluating: 100%|██████████| 13/13 [00:02<00:00,  5.30 batch/s]
[Val] Kappa: 0.1382 Accuracy: 0.4025 Precision: 0.2017 Recall: 0.4025

Epoch 2/5
Training: 100%|██████████| 38/38 [00:13<00:00,  2.82 batch/s, lr=1.0e-03, Loss=1.7805]
[Train] Kappa: 0.4566 Accuracy: 0.4158 Precision: 0.3787 Recall: 0.4158 Loss: 1.3682
[Train] Class 0: Precision: 0.7214, Recall: 0.7694
[Train] Class 1: Precision: 0.2611, Recall: 0.1958
[Train] Class 2: Precision: 0.2716, Recall: 0.2625
[Train] Class 3: Precision: 



Training resnet34 in dual mode with Default augmentations...

Epoch 1/5
Training: 100%|██████████| 19/19 [00:12<00:00,  1.49 batch/s, lr=1.0e-03, Loss=1.2195]
[Train] Kappa: 0.3323 Accuracy: 0.3767 Precision: 0.3417 Recall: 0.3767 Loss: 1.4594
[Train] Class 0: Precision: 0.5845, Recall: 0.7111
[Train] Class 1: Precision: 0.3130, Recall: 0.3000
[Train] Class 2: Precision: 0.1880, Recall: 0.1833
[Train] Class 3: Precision: 0.2806, Recall: 0.3250
[Train] Class 4: Precision: 0.1000, Recall: 0.0167
Evaluating: 100%|██████████| 7/7 [00:02<00:00,  2.86 batch/s]
[Val] Kappa: -0.0659 Accuracy: 0.2300 Precision: 0.3250 Recall: 0.2300

Epoch 2/5
Training: 100%|██████████| 19/19 [00:12<00:00,  1.50 batch/s, lr=1.0e-03, Loss=1.4348]
[Train] Kappa: 0.4121 Accuracy: 0.4217 Precision: 0.3852 Recall: 0.4217 Loss: 1.3808
[Train] Class 0: Precision: 0.6696, Recall: 0.8444
[Train] Class 1: Precision: 0.2137, Recall: 0.2083
[Train] Class 2: Precision: 0.2715, Recall: 0.3417
[Train] Class 3: Precision: 0.38



Training resnet34 in dual mode with With CutOut augmentations...

Epoch 1/5
Training: 100%|██████████| 19/19 [00:12<00:00,  1.48 batch/s, lr=1.0e-03, Loss=1.3646]
[Train] Kappa: 0.3419 Accuracy: 0.3733 Precision: 0.3629 Recall: 0.3733 Loss: 1.4685
[Train] Class 0: Precision: 0.5687, Recall: 0.6667
[Train] Class 1: Precision: 0.3529, Recall: 0.2000
[Train] Class 2: Precision: 0.2533, Recall: 0.3167
[Train] Class 3: Precision: 0.2500, Recall: 0.3167
[Train] Class 4: Precision: 0.2105, Recall: 0.0667
Evaluating: 100%|██████████| 7/7 [00:02<00:00,  2.57 batch/s]
[Val] Kappa: 0.3134 Accuracy: 0.4450 Precision: 0.3433 Recall: 0.4450

Epoch 2/5
Training: 100%|██████████| 19/19 [00:12<00:00,  1.48 batch/s, lr=1.0e-03, Loss=1.8302]
[Train] Kappa: 0.5124 Accuracy: 0.4500 Precision: 0.4120 Recall: 0.4500 Loss: 1.3473
[Train] Class 0: Precision: 0.7378, Recall: 0.9222
[Train] Class 1: Precision: 0.2660, Recall: 0.2083
[Train] Class 2: Precision: 0.2945, Recall: 0.3583
[Train] Class 3: Precision: 0



Training resnet34 in dual mode with With SLO Padding augmentations...

Epoch 1/5
Training: 100%|██████████| 19/19 [00:14<00:00,  1.32 batch/s, lr=1.0e-03, Loss=1.5375]
[Train] Kappa: 0.3777 Accuracy: 0.3933 Precision: 0.3618 Recall: 0.3933 Loss: 1.5030
[Train] Class 0: Precision: 0.6124, Recall: 0.7111
[Train] Class 1: Precision: 0.2621, Recall: 0.2250
[Train] Class 2: Precision: 0.2095, Recall: 0.1833
[Train] Class 3: Precision: 0.3436, Recall: 0.4667
[Train] Class 4: Precision: 0.1500, Recall: 0.0500
Evaluating: 100%|██████████| 7/7 [00:02<00:00,  2.98 batch/s]
[Val] Kappa: 0.1996 Accuracy: 0.3850 Precision: 0.2750 Recall: 0.3850

Epoch 2/5
Training: 100%|██████████| 19/19 [00:14<00:00,  1.32 batch/s, lr=1.0e-03, Loss=1.3256]
[Train] Kappa: 0.5437 Accuracy: 0.4567 Precision: 0.4304 Recall: 0.4567 Loss: 1.3544
[Train] Class 0: Precision: 0.7571, Recall: 0.8833
[Train] Class 1: Precision: 0.2920, Recall: 0.3333
[Train] Class 2: Precision: 0.2901, Recall: 0.3167
[Train] Class 3: Precisi



Training resnet34 in dual mode with With Fundus Rotation augmentations...

Epoch 1/5
Training: 100%|██████████| 19/19 [00:12<00:00,  1.47 batch/s, lr=1.0e-03, Loss=1.2770]
[Train] Kappa: 0.3089 Accuracy: 0.3567 Precision: 0.3267 Recall: 0.3567 Loss: 1.4545
[Train] Class 0: Precision: 0.6077, Recall: 0.7056
[Train] Class 1: Precision: 0.2162, Recall: 0.1333
[Train] Class 2: Precision: 0.2000, Recall: 0.2333
[Train] Class 3: Precision: 0.2500, Recall: 0.3500
[Train] Class 4: Precision: 0.1111, Recall: 0.0167
Evaluating: 100%|██████████| 7/7 [00:02<00:00,  2.91 batch/s]
[Val] Kappa: 0.4669 Accuracy: 0.4450 Precision: 0.2726 Recall: 0.4450

Epoch 2/5
Training: 100%|██████████| 19/19 [00:12<00:00,  1.46 batch/s, lr=1.0e-03, Loss=1.1044]
[Train] Kappa: 0.4802 Accuracy: 0.4367 Precision: 0.4002 Recall: 0.4367 Loss: 1.3585
[Train] Class 0: Precision: 0.6695, Recall: 0.8889
[Train] Class 1: Precision: 0.3333, Recall: 0.2500
[Train] Class 2: Precision: 0.2613, Recall: 0.2417
[Train] Class 3: Pre



Training resnet34 in dual mode with Combined augmentations...

Epoch 1/5
Training: 100%|██████████| 19/19 [00:13<00:00,  1.40 batch/s, lr=1.0e-03, Loss=1.4038]
[Train] Kappa: 0.2854 Accuracy: 0.3650 Precision: 0.3218 Recall: 0.3650 Loss: 1.5121
[Train] Class 0: Precision: 0.5982, Recall: 0.7278
[Train] Class 1: Precision: 0.2611, Recall: 0.3417
[Train] Class 2: Precision: 0.1920, Recall: 0.2000
[Train] Class 3: Precision: 0.2584, Recall: 0.1917
[Train] Class 4: Precision: 0.0000, Recall: 0.0000
Evaluating: 100%|██████████| 7/7 [00:02<00:00,  2.91 batch/s]
[Val] Kappa: 0.0000 Accuracy: 0.3000 Precision: 0.0900 Recall: 0.3000

Epoch 2/5
Training: 100%|██████████| 19/19 [00:13<00:00,  1.44 batch/s, lr=1.0e-03, Loss=1.4376]
[Train] Kappa: 0.4972 Accuracy: 0.4450 Precision: 0.3876 Recall: 0.4450 Loss: 1.3758
[Train] Class 0: Precision: 0.7091, Recall: 0.8667
[Train] Class 1: Precision: 0.2969, Recall: 0.1583
[Train] Class 2: Precision: 0.2593, Recall: 0.2917
[Train] Class 3: Precision: 0.31

In [None]:
from tabulate import tabulate
sorted_results = sorted(results.items(), key=lambda x: x[1], reverse=True)
transformed_array = [(a.split('_')[0], a.split('_')[1], a.split('_')[2], b) for a, b in sorted_results]

# Print the sorted results as a table
print(tabulate(transformed_array, headers=["Model", "Mode", "Augmentation", "Kappa"], tablefmt="grid"))

+----------+--------+----------------------+----------+
| Model    | Mode   | Augmentation         |    Kappa |
| resnet18 | single | With Fundus Rotation | 0.734333 |
+----------+--------+----------------------+----------+
| resnet18 | single | With SLO Padding     | 0.701235 |
+----------+--------+----------------------+----------+
| resnet18 | dual   | With Fundus Rotation | 0.685714 |
+----------+--------+----------------------+----------+
| resnet34 | dual   | With CutOut          | 0.679217 |
+----------+--------+----------------------+----------+
| resnet34 | single | With CutOut          | 0.678935 |
+----------+--------+----------------------+----------+
| resnet34 | single | Combined             | 0.677118 |
+----------+--------+----------------------+----------+
| resnet34 | dual   | With SLO Padding     | 0.666294 |
+----------+--------+----------------------+----------+
| resnet34 | dual   | Combined             | 0.666151 |
+----------+--------+----------------------+----

# Task B: Two stage training with additional dataset(s)

1.  Choose a diabetic retinopathy dataset from either Kaggle DR Resized or APTOS 2019 Blindness Detection (links are provided below).
2.  Fine-tune an ImageNet pretrained model (e.g., ResNet18, ResNet34, VGG, EfficientNet, DenseNet) on the selected dataset by unfreezing all pretrained layers. If you have any difficulties, you can also use pretrained weights of Kaggle DR Resized (pretrained_DR_resize) to skip this step: https://www.kaggle.com/competitions/521153S-3005-final-project/data

3.   Next up, fine-tune this trained model on the DeepDRiD dataset (keep all the layers unfrozen) and see how it impacts your Cohen Kappa score.

4.   Save the fine-tuned model.
The goal of task (b) is to compare the performance of a deep model that is trained and fine-tuned on a task-specific dataset with that of a model that is first trained on a general dataset and then fine-tuned on the same task-specific dataset in task (a).
5.   List item



Download diabetic retinopathy dataset from kaggle

In [None]:
from google.colab import files
# Upload the Kaggle API Token (kaggle.json). You should create this token from: https://www.kaggle.com/settings
files.upload()


In [None]:
# Move kaggle API token to correct folder and verify
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets list
!pip install --upgrade kagglehub




In [None]:
# download diabetic retinopathy datasetyes
!kaggle datasets download -d mariaherrerot/aptos2019 -p /content/dataset_task_b/aptos2019
# !kaggle datasets download -d tanlikesmath/diabetic-retinopathy-resized -p /content/dataset_task_b/retinopathy-resized


In [None]:
!unzip /content/dataset_task_b/aptos2019/*.zip -d /content/dataset_task_b//aptos2019
# !unzip /content/dataset_task_b/retinopathy-resized/*.zip -d /content/dataset_task_b/retinopathy-resized


In [None]:
# Remove zip file
# !rm /content/dataset_task_b/aptos2019/*.zip
# !rm /content/dataset_task_b/retinopathy-resized/*.zip



In [None]:
import os
import pandas as pd
from torch.utils.data import Dataset
from PIL import Image

class APTOS2019Dataset(Dataset):
    def __init__(self, csv_file, image_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.image_dir = image_dir
        self.transform = transform
        self.data['img_path'] = self.data['id_code'].apply(lambda x: os.path.join(image_dir, f"{x}.png"))

    def __len__(self):
         return len(self.data)

    def __getitem__(self, index):
        row = self.data.iloc[index]
        image_path = row['img_path']
        image = Image.open(image_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        label = row['diagnosis']
        return image, label


In [None]:
transform_train_b = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomCrop((210, 210)),
    SLORandomPad((224, 224)),
    FundRandomRotate(prob=0.5, degree=30),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ColorJitter(brightness=(0.1, 0.9)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_test_b = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


def train_model_b(model, train_loader, val_loader, device, criterion, optimizer, lr_scheduler, num_epochs=5,
                checkpoint_path='model.pth'):
    best_model = model.state_dict()
    best_epoch = None
    best_val_kappa = -1.0

    for epoch in range(1, num_epochs + 1):
        print(f'\nEpoch {epoch}/{num_epochs}')
        running_loss = []
        all_preds = []
        all_labels = []
        model.train()

        with tqdm(total=len(train_loader), desc=f'Training', unit=' batch', file=sys.stdout) as pbar:
            for images, labels in train_loader:
                images = images.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels.long())
                loss.backward()
                optimizer.step()

                preds = torch.argmax(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
                running_loss.append(loss.item())
                pbar.set_postfix({'lr': f'{optimizer.param_groups[0]["lr"]:.1e}', 'Loss': f'{loss.item():.4f}'})
                pbar.update(1)

        lr_scheduler.step()
        epoch_loss = sum(running_loss) / len(running_loss)
        train_metrics = compute_metrics(all_preds, all_labels, per_class=True)
        kappa, accuracy, precision, recall = train_metrics[:4]

        print(f'[Train] Kappa: {kappa:.4f} Accuracy: {accuracy:.4f} '
              f'Precision: {precision:.4f} Recall: {recall:.4f} Loss: {epoch_loss:.4f}')

        if len(train_metrics) > 4:
            precision_per_class, recall_per_class = train_metrics[4:]
            for i, (precision, recall) in enumerate(zip(precision_per_class, recall_per_class)):
                print(f'[Train] Class {i}: Precision: {precision:.4f}, Recall: {recall:.4f}')

        # Evaluation on the validation set at the end of each epoch
        val_metrics = evaluate_model_b(model, val_loader, device)
        val_kappa, val_accuracy, val_precision, val_recall = val_metrics[:4]
        print(f'[Val] Kappa: {val_kappa:.4f} Accuracy: {val_accuracy:.4f} '
              f'Precision: {val_precision:.4f} Recall: {val_recall:.4f}')

        if val_kappa > best_val_kappa:
            best_val_kappa = val_kappa
            best_epoch = epoch
            best_model = model.state_dict()
            torch.save(best_model, checkpoint_path)

    print(f'[Val] Best kappa: {best_val_kappa:.4f}, Epoch {best_epoch}')
    return model, best_val_kappa

def evaluate_model_b(model, val_loader, device):
    model.eval()

    all_preds = []
    all_labels = []
    all_image_ids = []

    with tqdm(total=len(val_loader), desc=f'Evaluating', unit=' batch', file=sys.stdout) as pbar:
        for i, data in enumerate(val_loader):
            images, labels = data
            images = images.to(device)

            with torch.no_grad():
                outputs = model(images)
                preds = torch.argmax(outputs, 1)

            all_preds.extend(preds.cpu().numpy())
            start_idx = i * val_loader.batch_size
            end_idx = start_idx + len(images)
            image_ids = []
            for idx in range(start_idx, end_idx):
                image_id = val_loader.dataset.data.iloc[idx]['id_code']
                image_ids.append(image_id)

            all_image_ids.extend(image_ids)
            all_labels.extend(labels.numpy())
            pbar.update(1)
    metrics = compute_metrics(all_preds, all_labels)
    return metrics


def compute_metrics(preds, labels, per_class=False):
    kappa = cohen_kappa_score(labels, preds, weights='quadratic')
    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds, average='weighted', zero_division=0)
    recall = recall_score(labels, preds, average='weighted', zero_division=0)

    # Calculate and print precision and recall for each class
    if per_class:
        precision_per_class = precision_score(labels, preds, average=None, zero_division=0)
        recall_per_class = recall_score(labels, preds, average=None, zero_division=0)
        return kappa, accuracy, precision, recall, precision_per_class, recall_per_class

    return kappa, accuracy, precision, recall



In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
models_to_test = ['resnet18']
results_2_stage = {}
transform_train_b = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomCrop((210, 210)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=(0.1, 0.9)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
transform_test_b = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

default_transform_train = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomCrop((210, 210)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.ColorJitter(brightness=(0.1, 0.9)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

augmentations = [

        ("With SLO Padding", transforms.Compose([
            transforms.Resize((256, 256)),
            SLORandomPad((224, 224)),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomVerticalFlip(p=0.5),
            transforms.ColorJitter(brightness=(0.1, 0.9)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]))

    ]


In [None]:
def training_stage1(model_stage1, backbone_name):
  print("Stage 1: Fine-tuning on the specific dataset")
  # model_stage1 = get_model(backbone_name, mode='single').to(device)
  # Unfreeze all layers
  for param in model_stage1.parameters():
      param.requires_grad = True

  checkpoint_stage1 = f"./{backbone_name}_stage1_specific.pth"
  model_stage1, best_kappa_stage1 = train_model_b(
      model_stage1,
      train_loader,
      val_loader,
      device,
      criterion,
      optimizer,
      lr_scheduler,
      num_epochs=5,
      checkpoint_path=checkpoint_stage1
  )

  print(f"Stage 1 completed. Best Kappa score on specific dataset: {best_kappa_stage1}")
  return model_stage1



In [None]:
for backbone_name in models_to_test:
  for aug_name, transform_train in augmentations:

    train_csv_stage1 = './dataset_task_b/aptos2019/train_1.csv'
    val_csv_stage1 = './dataset_task_b/aptos2019/valid.csv'
    dataset_path_stage1 =  './dataset_task_b/aptos2019'

    train_dataset_stage1 = APTOS2019Dataset(csv_file=train_csv_stage1, image_dir=f"{dataset_path_stage1}/train_images/train_images", transform=transform_train_b)
    val_dataset_stage1 = APTOS2019Dataset(csv_file = val_csv_stage1, image_dir =  f"{dataset_path_stage1}/val_images/val_images", transform = transform_test_b )

    train_loader = DataLoader(train_dataset_stage1, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset_stage1, batch_size=32, shuffle=False)

    train_dataset_stage2 = RetinopathyDataset('./DeepDRiD/train.csv', './DeepDRiD/train/', transform_train, mode='single')
    val_dataset_stage2 = RetinopathyDataset('./DeepDRiD/val.csv', './DeepDRiD/val/', transform_test_b, mode='single')
    train_loader_stage2 = DataLoader(train_dataset_stage2, batch_size=32, shuffle=True)
    val_loader_stage2 = DataLoader(val_dataset_stage2, batch_size=32, shuffle=False)
    model_stage1 = get_model(backbone_name, mode='single').to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model_stage1.parameters(), lr=0.001)
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

    print(f"Training {backbone_name} in single mode with {aug_name} augmentations...")

    # train stage1: Fine-tune the trained model on optos2019
    model_stage1 = training_stage1(model_stage1, backbone_name)
    # train stage2: Fine-tune the trained model on DeepDRiD dataset
    print("Stage 2: Fine-tuning on the DeepDRiD dataset")

    model_stage2 = model_stage1
    optimizer_stage2 = optim.Adam(model_stage2.parameters(), lr=0.0001)
    lr_scheduler_stage2 = optim.lr_scheduler.StepLR(optimizer_stage2, step_size=10, gamma=0.1)

    checkpoint_stage2 = f"./{backbone_name}_stage2_deepdrid.pth"
    model_stage2, best_kappa_stage2 = train_model(
        model_stage2,
        train_loader_stage2,
        val_loader_stage2,
        device,
        criterion,
        optimizer_stage2,
        lr_scheduler_stage2,
        num_epochs=5,
        checkpoint_path=checkpoint_stage2
    )

    print(f"Stage 2 completed. Best Kappa score on DeepDRiD dataset: {best_kappa_stage2}")
    results_2_stage[f"{backbone_name}_single_{aug_name}"] = best_kappa_stage2
    torch.save(model_stage2.state_dict(), f"./{backbone_name}_stage2_final.pth")
    best_model_name = max(results_2_stage, key=results_2_stage.get)
    print(f"Best model: {best_model_name} with score {results_2_stage[best_model_name]}")
    print("Results:", results_2_stage)




In [None]:
from tabulate import tabulate
sorted_results = sorted(results_2_stage.items(), key=lambda x: x[1], reverse=True)
transformed_array = [(a.split('_')[0], a.split('_')[1], a.split('_')[2], b) for a, b in sorted_results]

# Print the sorted results as a table
print(tabulate(transformed_array, headers=["Model", "Mode", "Augmentation", "Kappa"], tablefmt="grid"))

# Task B Implementation2: Two stage training *with* pretrained weights

Same as task B but using pretrained weights to skip training stage 1 to save time

In [None]:
# download pretained weights folder
!pip install gdown
file_id = "1vjR2rCvkv-BdUCRd-KD_49fXZ0QlXo1q"
url = f"https://drive.google.com/uc?id=1vjR2rCvkv-BdUCRd-KD_49fXZ0QlXo1q"
output = "pretrained.zip"
gdown.download(url, output, quiet=False)

In [None]:
!unzip pretrained.zip

In [None]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# models_to_test = ['resnet18', 'resnet34', 'vgg16']
models_to_test = ['resnet34']
results_with_pretained = {}

 # Default transform_train for reference
default_transform_train = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomCrop((210, 210)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.ColorJitter(brightness=(0.1, 0.9)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

augmentations = [
        ("With SLO Padding", transforms.Compose([
            transforms.Resize((256, 256)),
            SLORandomPad((224, 224)),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomVerticalFlip(p=0.5),
            transforms.ColorJitter(brightness=(0.1, 0.9)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]))
    ]

transform_test = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

for backbone_name in models_to_test:
        for mode in ['single']:
            for aug_name, transform_train in augmentations:

                # Load datasets with augmentations
                train_dataset = RetinopathyDataset('./DeepDRiD/train.csv', './DeepDRiD/train/', transform_train, mode)
                val_dataset = RetinopathyDataset('./DeepDRiD/val.csv', './DeepDRiD/val/', transform_test, mode)
                test_dataset = RetinopathyDataset('./DeepDRiD/test.csv', './DeepDRiD/test/', transform_test, mode, test=True)

                train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
                val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
                test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

                checkpoint_path = f"./{backbone_name}_{mode}_{aug_name.replace(' ', '_')}.pth"
                '''
                # Check if the checkpoint already exists
                if os.path.exists(checkpoint_path):
                    print(f"Checkpoint for {backbone_name} in {mode} mode with {aug_name} augmentation already exists. Skipping training.")
                    # Optionally, you can load the model here if you want to continue from the checkpoint
                    # model.load_state_dict(torch.load(checkpoint_path))
                    continue  # Skip this iteration if the checkpoint exists
                '''
                model = get_model(backbone_name, mode).to(device)


                print("Model keys:", model.state_dict().keys())

                state_dict = torch.load(f"pretrained/{backbone_name}.pth")
                print("Weight keys:", state_dict.keys())


                # load weights on model
                state_dict = torch.load(f"pretrained/{backbone_name}.pth")
                renamed_state_dict = {f"backbone.{k}": v for k, v in state_dict.items()}
                model.load_state_dict(renamed_state_dict, strict=False)

                criterion = nn.CrossEntropyLoss()
                optimizer = optim.Adam(model.parameters(), lr=0.001)
                lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

                print(f"Training {backbone_name} in {mode} mode with {aug_name} augmentations...")
                checkpoint_path = f"./{backbone_name}_{mode}_{aug_name.replace(' ', '_')}.pth"
                model, best_kappa = train_model(model, train_loader, val_loader, device, criterion, optimizer, lr_scheduler, num_epochs=25, checkpoint_path=checkpoint_path)
                results_with_pretained[f"{backbone_name}_{mode}_{aug_name}"] = best_kappa

 # Summarize results
best_model_name_b = max(results_with_pretained, key=results_with_pretained.get)
print(f"Best model: {best_model_name_b} with score { results_with_pretained[best_model_name_b]}")
print("Results:", results_with_pretained)

In [None]:
from tabulate import tabulate
sorted_results = sorted(results_with_pretained.items(), key=lambda x: x[1], reverse=True)
transformed_array = [(a.split('_')[0], a.split('_')[1], a.split('_')[2], b) for a, b in sorted_results]

# Print the sorted results as a table
print(tabulate(transformed_array, headers=["Model", "Mode", "Augmentation", "Kappa"], tablefmt="grid"))

Combine with task E for visualization

In [None]:
# Plot the result for analysis
import matplotlib.pyplot as plt
import torch.nn.functional as F
evaluate_model(model, test_loader, device, test_only=True)

def plot_history(history):
    plt.figure(figsize=(12, 6))
    plt.plot(history['train_accuracy'], label='Train Accuracy', marker='o')
    plt.plot(history['val_accuracy'], label='Validation Accuracy', marker='o')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()
    plt.grid()
    plt.show()

    # Plot training and validation loss
    plt.figure(figsize=(12, 6))
    plt.plot(history['train_loss'], label='Train Loss', marker='o')
    plt.plot(history['val_loss'], label='Validation Loss', marker='o')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    plt.grid()
    plt.show()

In [None]:
def train_model(model, train_loader, val_loader, device, criterion, optimizer, lr_scheduler, num_epochs=25,
                checkpoint_path='model.pth'):
    best_model = model.state_dict()
    best_epoch = None
    best_val_kappa = -1.0  # Initialize the best kappa score

    # Initialize containers for storing metrics
    history = {
        'train_loss': [],
        'train_accuracy': [],
        'val_loss': [],
        'val_accuracy': [],
        'val_kappa': [],  # Store kappa scores for validation
        'best_kappa': None,  # Best kappa score
        'best_epoch': None,  # Epoch with best kappa
    }

    for epoch in range(1, num_epochs + 1):
        print(f'\nEpoch {epoch}/{num_epochs}')
        running_loss = []
        all_preds = []
        all_labels = []

        model.train()

        with tqdm(total=len(train_loader), desc=f'Training', unit=' batch', file=sys.stdout) as pbar:
            for images, labels in train_loader:
                if not isinstance(images, list):
                    images = images.to(device)  # single image case
                else:
                    images = [x.to(device) for x in images]  # dual images case

                labels = labels.to(device)

                optimizer.zero_grad()

                outputs = model(images)
                loss = criterion(outputs, labels.long())

                loss.backward()
                optimizer.step()

                preds = torch.argmax(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

                running_loss.append(loss.item())

                pbar.set_postfix({'lr': f'{optimizer.param_groups[0]["lr"]:.1e}', 'Loss': f'{loss.item():.4f}'})
                pbar.update(1)

        lr_scheduler.step()

        # Calculate epoch-level metrics
        epoch_loss = sum(running_loss) / len(running_loss)
        train_metrics = compute_metrics(all_preds, all_labels, per_class=False)
        train_kappa, train_accuracy, _, _ = train_metrics[:4]

        print(f'[Train] Kappa: {train_kappa:.4f} Accuracy: {train_accuracy:.4f} Loss: {epoch_loss:.4f}')

        # Store training metrics
        history['train_loss'].append(epoch_loss)
        history['train_accuracy'].append(train_accuracy)

        # Validation
        val_loss, val_metrics = evaluate_model(model, val_loader, device, return_loss=True)
        val_kappa, val_accuracy, _, _ = val_metrics[:4]
        print(f'[Val] Kappa: {val_kappa:.4f} Accuracy: {val_accuracy:.4f} Loss: {val_loss:.4f}')

        # Store validation metrics
        history['val_loss'].append(val_loss)
        history['val_accuracy'].append(val_accuracy)
        history['val_kappa'].append(val_kappa)

        # Save the best model
        if val_kappa > best_val_kappa:
            best_val_kappa = val_kappa
            best_epoch = epoch
            best_model = model.state_dict()
            torch.save(best_model, checkpoint_path)

    # Store best kappa and epoch in history
    history['best_kappa'] = best_val_kappa
    history['best_epoch'] = best_epoch

    print(f'[Val] Best kappa: {best_val_kappa:.4f}, Epoch {best_epoch}')

    return model, history


def evaluate_model(model, test_loader, device, test_only=False, prediction_path='./test_predictions.csv', return_loss=False):
    model.eval()

    all_preds = []
    all_labels = []
    all_image_ids = []
    running_loss = []  # To calculate loss if return_loss is True

    with tqdm(total=len(test_loader), desc=f'Evaluating', unit=' batch', file=sys.stdout) as pbar:
        for i, data in enumerate(test_loader):

            if test_only:
                images = data
            else:
                images, labels = data

            if not isinstance(images, list):
                images = images.to(device)  # single image case
            else:
                images = [x.to(device) for x in images]  # dual images case

            with torch.no_grad():
                outputs = model(images)
                preds = torch.argmax(outputs, 1)

                if return_loss and not test_only:
                    loss = nn.CrossEntropyLoss()(outputs, labels.to(device).long())
                    running_loss.append(loss.item())

            if not isinstance(images, list):
                # Single image case
                all_preds.extend(preds.cpu().numpy())
                image_ids = [
                    os.path.basename(test_loader.dataset.data[idx]['img_path']) for idx in
                    range(i * test_loader.batch_size, i * test_loader.batch_size + len(images))
                ]
                all_image_ids.extend(image_ids)
                if not test_only:
                    all_labels.extend(labels.numpy())
            else:
                # Dual images case
                for k in range(2):
                    all_preds.extend(preds.cpu().numpy())
                    image_ids = [
                        os.path.basename(test_loader.dataset.data[idx][f'img_path{k + 1}']) for idx in
                        range(i * test_loader.batch_size, i * test_loader.batch_size + len(images[k]))
                    ]
                    all_image_ids.extend(image_ids)
                    if not test_only:
                        all_labels.extend(labels.numpy())

            pbar.update(1)

    # Save predictions to CSV file for Kaggle online evaluation
    if test_only:
        df = pd.DataFrame({
            'ID': all_image_ids,
            'TARGET': all_preds
        })
        df.to_csv(prediction_path, index=False)
        print(f'[Test] Predictions saved to {os.path.abspath(prediction_path)}')
    else:
        metrics = compute_metrics(all_preds, all_labels)
        if return_loss:
            avg_loss = sum(running_loss) / len(running_loss) if running_loss else None
            return avg_loss, metrics
        return metrics

In [None]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# models_to_test = ['resnet18', 'resnet34', 'vgg16']
models_to_test = ['resnet18']
results_with_pretained = {}

 # Default transform_train for reference
default_transform_train = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomCrop((210, 210)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.ColorJitter(brightness=(0.1, 0.9)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

augmentations = [
        ("With SLO Padding", transforms.Compose([
            transforms.Resize((256, 256)),
            SLORandomPad((224, 224)),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomVerticalFlip(p=0.5),
            transforms.ColorJitter(brightness=(0.1, 0.9)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]))
    ]

transform_test = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

for backbone_name in models_to_test:
        for mode in ['single']:
            for aug_name, transform_train in augmentations:

                # Load datasets with augmentations
                train_dataset = RetinopathyDataset('./DeepDRiD/train.csv', './DeepDRiD/train/', transform_train, mode)
                val_dataset = RetinopathyDataset('./DeepDRiD/val.csv', './DeepDRiD/val/', transform_test, mode)
                test_dataset = RetinopathyDataset('./DeepDRiD/test.csv', './DeepDRiD/test/', transform_test, mode, test=True)

                train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
                val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
                test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

                checkpoint_path = f"./{backbone_name}_{mode}_{aug_name.replace(' ', '_')}.pth"
                '''
                # Check if the checkpoint already exists
                if os.path.exists(checkpoint_path):
                    print(f"Checkpoint for {backbone_name} in {mode} mode with {aug_name} augmentation already exists. Skipping training.")
                    # Optionally, you can load the model here if you want to continue from the checkpoint
                    # model.load_state_dict(torch.load(checkpoint_path))
                    continue  # Skip this iteration if the checkpoint exists
                '''
                model = get_model(backbone_name, mode).to(device)


                print("Model keys:", model.state_dict().keys())

                state_dict = torch.load(f"pretrained/{backbone_name}.pth")
                print("Weight keys:", state_dict.keys())


                # load weights on model
                state_dict = torch.load(f"pretrained/{backbone_name}.pth")
                renamed_state_dict = {f"backbone.{k}": v for k, v in state_dict.items()}
                model.load_state_dict(renamed_state_dict, strict=False)

                criterion = nn.CrossEntropyLoss()
                optimizer = optim.Adam(model.parameters(), lr=0.001)
                lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

                print(f"Training {backbone_name} in {mode} mode with {aug_name} augmentations...")
                checkpoint_path = f"./{backbone_name}_{mode}_{aug_name.replace(' ', '_')}.pth"
                model, history = train_model(model, train_loader, val_loader, device, criterion, optimizer, lr_scheduler, num_epochs=25, checkpoint_path=checkpoint_path)
                results_with_pretained[f"{backbone_name}_{mode}_{aug_name}"] =history['best_kappa']
                # Plot history
                evaluate_model(model, test_loader, device, test_only=True)
                plot_history(history)

 # Summarize results
best_model_name_b = max(results_with_pretained, key=results_with_pretained.get)
print(f"Best model: {best_model_name_b} with score { results_with_pretained[best_model_name_b]}")
print("Results:", results_with_pretained)

# Task C: Incorporate attention mechanisms in the model
1. Implement attention mechanisms (e.g., self-attention, channel attention, or spatial attention)
in your DeepDRiD model architecture.
2. Evaluate the impact of attention mechanisms on model performance.

The goal of task C is to apply and see how attention is affecting the model and its performance.

In [None]:
# Self-Attention Model
class MyModelSelfAttention(nn.Module):
    def __init__(self, backbone, num_classes=5, dropout_rate=0.5):
        super().__init__()

        self.backbone = backbone
        self.backbone.fc = nn.Identity()

        self.query = nn.Conv2d(512, 64, kernel_size=1)
        self.key = nn.Conv2d(512, 64, kernel_size=1)
        self.value = nn.Conv2d(512, 512, kernel_size=1)
        self.gamma = nn.Parameter(torch.zeros(1))

        self.fc = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_rate),
            nn.Linear(256, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_rate),
            nn.Linear(128, num_classes)
        )

    def self_attention(self, x):
        batch_size, C, H, W = x.size()
        query = self.query(x).view(batch_size, -1, H * W).permute(0, 2, 1)
        key = self.key(x).view(batch_size, -1, H * W)
        energy = torch.bmm(query, key)
        attention = F.softmax(energy, dim=-1)
        value = self.value(x).view(batch_size, -1, H * W)

        out = torch.bmm(value, attention.permute(0, 2, 1))
        out = out.view(batch_size, C, H, W)
        return self.gamma * out + x

    def forward(self, x):
        x = self.backbone(x)
        x = self.self_attention(x.unsqueeze(-1).unsqueeze(-1))
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Channel-Attention Model
class MyModelChannelAttention(nn.Module):
    def __init__(self, backbone, num_classes=5, dropout_rate=0.5):
        super().__init__()

        self.backbone = backbone
        self.backbone.fc = nn.Identity()

        self.fc1 = nn.Conv2d(512, 32, kernel_size=1)
        self.fc2 = nn.Conv2d(32, 512, kernel_size=1)

        self.fc = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_rate),
            nn.Linear(256, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_rate),
            nn.Linear(128, num_classes)
        )

    def channel_attention(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)
        avg_out = self.fc2(F.relu(self.fc1(avg_pool)))
        max_out = self.fc2(F.relu(self.fc1(max_pool)))
        scale = torch.sigmoid(avg_out + max_out)
        return x * scale

    def forward(self, x):
        x = self.backbone(x)
        x = self.channel_attention(x.unsqueeze(-1).unsqueeze(-1))
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Spatial-Attention Model
class MyModelSpatialAttention(nn.Module):
    def __init__(self, backbone, num_classes=5, dropout_rate=0.5):
        super().__init__()

        self.backbone = backbone
        self.backbone.fc = nn.Identity()

        self.conv = nn.Conv2d(2, 1, kernel_size=7, padding=3, bias=False)

        self.fc = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_rate),
            nn.Linear(256, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_rate),
            nn.Linear(128, num_classes)
        )

    def spatial_attention(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        scale = torch.sigmoid(self.conv(torch.cat([avg_out, max_out], dim=1)))
        return x * scale

    def forward(self, x):
        x = self.backbone(x)
        x = self.spatial_attention(x.unsqueeze(-1).unsqueeze(-1))
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


In [None]:
import torch.optim as optim
from tabulate import tabulate
import torch.nn.functional as F

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Fixed parameters
backbone_name = "resnet18"
mode = "single"  # Assuming single input for simplicity
aug_name = "With SLO Padding"

transform_train = transforms.Compose([
    transforms.Resize((256, 256)),
    SLORandomPad((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ColorJitter(brightness=(0.1, 0.9)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load datasets
train_dataset = RetinopathyDataset('./DeepDRiD/train.csv', './DeepDRiD/train/', transform_train, mode)
val_dataset = RetinopathyDataset('./DeepDRiD/val.csv', './DeepDRiD/val/', transform_test, mode)
test_dataset = RetinopathyDataset('./DeepDRiD/test.csv', './DeepDRiD/test/', transform_test, mode, test=True)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define attention-enhanced models
attention_models = {
    "SelfAttention": MyModelSelfAttention,
    "ChannelAttention": MyModelChannelAttention,
    "SpatialAttention": MyModelSpatialAttention
}

results = {}

for attn_name, model_class in attention_models.items():
    print(f"Testing {backbone_name} with {attn_name} attention...")

    # Instantiate model
    backbone = get_model(backbone_name, mode)  # Load backbone (e.g., ResNet18)
    model = model_class(backbone=backbone).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

    # Define checkpoint path
    checkpoint_path = f"./{backbone_name}_{mode}_{aug_name.replace(' ', '_')}_{attn_name}.pth"

    # Skip training if checkpoint exists
    if os.path.exists(checkpoint_path):
        print(f"Checkpoint for {attn_name} attention already exists. Skipping training.")
        model.load_state_dict(torch.load(checkpoint_path))
    else:
        # Train the model
        model, best_kappa = train_model(model, train_loader, val_loader, device, criterion, optimizer, lr_scheduler, num_epochs=5, checkpoint_path=checkpoint_path)
        torch.save(model.state_dict(), checkpoint_path)
        results[attn_name] = best_kappa
        print(f"{attn_name} Attention Test Kappa: {best_kappa:.4f}")

    # Evaluate the model
    evaluate_model(model, test_loader, device, test_only=True)


# Summarize results
best_attention = max(results, key=results.get)
print(f"Best Attention Mechanism: {best_attention} with Kappa: {results[best_attention]:.4f}")
print("Results:")
print(tabulate([(k, v) for k, v in results.items()], headers=["Attention", "Kappa"], tablefmt="pretty"))


In [None]:
# Instantiate model
backbone = get_model(backbone_name, mode)  # Load backbone (e.g., ResNet18)
model = MyModelSelfAttention(backbone=backbone).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)


        # Train the model
model, best_kappa = train_model(model, train_loader, val_loader, device, criterion, optimizer, lr_scheduler, num_epochs=num_epochs, checkpoint_path=checkpoint_path)
torch.save(model.state_dict(), checkpoint_path)
results[attn_name] = best_kappa
print(f"{attn_name} Attention Test Kappa: {best_kappa:.4f}")

# Evaluate the model
evaluate_model(model, test_loader, device, test_only=True)

# Task D: Compare the performance of different models and strategies
1. Use at least three transfer models that you've trained using task B and perform ensemble learning. Try out the following ensemble techniques (Stacking, Boosting, Weighted Average, Max Voting, Bagging) and analyze whether the performance increases or not.
2. Try out different image preprocessing techniques such as, Ben Graham, Circle Cropping, CLAHE, adding gaussian blur, sharpening up the images etc.

The goal of task D is to perform ensemble learning by training various models and combining their predictions and analyzing whether it boosts the performance. Along with that, applying multiple preprocessing techniques to see if that has any effect on the model.

In [None]:
# Download the model weights trained from Task B
import gdown
file_id = "1lsF9AD0s9-cLdBa4ROj9_x-JGpB4stcu"
url = f"https://drive.google.com/uc?id=1lsF9AD0s9-cLdBa4ROj9_x-JGpB4stcu"
output = "saved_model_task_b.zip"
gdown.download(url, output, quiet=False)

In [None]:
!unzip saved_model_task_b.zip

In [None]:
def load_model(model_name, num_classes, device):
    model = get_model(model_name, 'single', num_classes)  # assuming mode is always 'single'
    return model.to(device)

def load_model_weights(model, weights_path, device='cpu'):
    # Load the saved state dict into the model
    model.load_state_dict(torch.load(weights_path, map_location=device))
    model.to(device)
    return model

In [None]:
import torch.nn as nn
import torch
from torchvision import transforms
import cv2
import numpy as np
from sklearn.metrics import cohen_kappa_score
from torch.utils.data import DataLoader

# Task D: Compare performance using preprocessing and ensemble learning

mode = 'single'

# Load and Train Models with Preprocessing and Ensembles
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Preprocessing Functions
def ben_graham_preprocessing(image):
    return cv2.GaussianBlur(image, (5, 5), 0)

def circle_cropping(image):
    h, w = image.shape[:2]
    center = (int(w / 2), int(h / 2))
    radius = min(center[0], center[1], h - center[1], w - center[0])
    mask = np.zeros((h, w), dtype=np.uint8)
    cv2.circle(mask, center, radius, (255, 255, 255), -1)
    return cv2.bitwise_and(image, image, mask=mask)

def apply_clahe(image):
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    cl = clahe.apply(l)
    return cv2.merge((cl, a, b))

def sharpen_image(image):
    kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
    return cv2.filter2D(image, -1, kernel)

# Ensemble Model
class EnsembleModel(nn.Module):
    def __init__(self, models, ensemble_type='weighted_average', weights=None):
        super(EnsembleModel, self).__init__()
        self.models = nn.ModuleList(models)  # Ensure models are registered as submodules
        self.ensemble_type = ensemble_type
        self.weights = weights if weights else [1 / len(models)] * len(models)

    def forward(self, x):
        predictions = [model(x) for model in self.models]

        if self.ensemble_type == 'weighted_average':
            weighted_preds = torch.stack(predictions, dim=0) * torch.tensor(self.weights).view(-1, 1, 1).to(x.device)
            return weighted_preds.sum(dim=0)
        elif self.ensemble_type == 'max_voting':
            stacked_preds = torch.stack(predictions, dim=0)
            return torch.mode(stacked_preds, dim=0)[0]
        else:
            raise ValueError(f"Unknown ensemble type: {self.ensemble_type}")

# Load datasets (Assumed that RetinopathyDataset is already implemented)
train_dataset = RetinopathyDataset('./DeepDRiD/train.csv', './DeepDRiD/train/', transform_train, mode)
val_dataset = RetinopathyDataset('./DeepDRiD/val.csv', './DeepDRiD/val/', transform_test, mode)
test_dataset = RetinopathyDataset('./DeepDRiD/test.csv', './DeepDRiD/test/', transform_test, mode, test=True)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Preprocessing functions (not used here directly but can be applied on images)
preprocess_functions = [
    ("Ben Graham", ben_graham_preprocessing),
    ("Circle Cropping", circle_cropping),
    ("CLAHE", apply_clahe),
    ("Sharpening", sharpen_image)
]

# List of models to test
models_list = [
    get_model('resnet18', 'single', num_classes=5),  # Use get_model to create the model
    get_model('resnet34', 'single', num_classes=5),
    get_model('vgg16', 'single', num_classes=5)
]

# Load model weights for each model
model_weights_paths = [
    'saved_model_task_b/resnet18_single_With_SLO_Padding.pth',
    'saved_model_task_b/resnet34_single_With_SLO_Padding.pth',
    'saved_model_task_b/vgg16_single_With_SLO_Padding.pth'
]

# Load weights for each model and ensure they are on the correct device
for model, weights_path in zip(models_list, model_weights_paths):
    model = load_model_weights(model, weights_path, device)
    model.to(device)  # Ensure the model is on the correct device

# Ensemble types to test
ensemble_results = []
ensemble_types = ['weighted_average']
for ensemble_type in ensemble_types:
    print(f"Evaluating with ensemble type: {ensemble_type}")

    # Initialize ensemble model
    ensemble_model = EnsembleModel(models_list, ensemble_type).to(device)

    # Set optimizer for the ensemble model (which now correctly has parameters)
    optimizer = torch.optim.Adam(ensemble_model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    # Train ensemble model using your `train_model` function
    ensemble_model, history = train_model(
        ensemble_model, train_loader, val_loader, device, criterion, optimizer, lr_scheduler, num_epochs=15,
        checkpoint_path='ensemble_model.pth'
    )

    # Evaluate ensemble model using `evaluate_model` function
    val_loss, val_metrics = evaluate_model(ensemble_model, val_loader, device, return_loss=True)
    val_kappa, val_accuracy, _, _ = val_metrics[:4]

    print(f"[Val] Ensemble Model Kappa: {val_kappa:.4f} Accuracy: {val_accuracy:.4f} Loss: {val_loss:.4f}")

        # Store results
    ensemble_results.append({
        'type': ensemble_type,
        'kappa': val_kappa,
        'accuracy': val_accuracy,
        'loss': val_loss,
        'history': history
    })
    evaluate_model(ensemble_model, test_loader, device, test_only=True)


In [None]:
# Sort results by kappa
sorted_results = sorted(ensemble_results, key=lambda x: x['kappa'], reverse=True)

# Print sorted kappas
print("\nSorted Ensemble Model Kappa Scores:")
for result in sorted_results:
    print(f"Ensemble Type: {result['type']} | Kappa: {result['kappa']:.4f}")

# Plot accuracy and loss for each ensemble type
fig, axes = plt.subplots(1, 2, figsize=(12, 6))

# Accuracy Plot
for result in ensemble_results:
    history = result['history']
    axes[0].plot(history['train_accuracy'], label=f"Train ({result['type']})")
    axes[0].plot(history['val_accuracy'], label=f"Val ({result['type']})", linestyle='--')
axes[0].set_title("Accuracy over Epochs")
axes[0].set_xlabel("Epochs")
axes[0].set_ylabel("Accuracy")
axes[0].legend()

# Loss Plot
for result in ensemble_results:
    history = result['history']
    axes[1].plot(history['train_loss'], label=f"Train ({result['type']})")
    axes[1].plot(history['val_loss'], label=f"Val ({result['type']})", linestyle='--')
axes[1].set_title("Loss over Epochs")
axes[1].set_xlabel("Epochs")
axes[1].set_ylabel("Loss")
axes[1].legend()

plt.tight_layout()
plt.show()

In [None]:
import cv2
import numpy as np
from PIL import Image, ImageFilter
import torch
from torchvision import transforms
from torch.utils.data import DataLoader

# Custom Transformations
class CLAHETransform:
    def __init__(self, clip_limit=1.0, tile_grid_size=(8, 8)):  # Less aggressive CLAHE
        self.clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=tile_grid_size)

    def __call__(self, img):
        img = np.array(img)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        img = self.clahe.apply(img)
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
        return Image.fromarray(img)

class GaussianBlurTransform:
    def __init__(self, radius=1):  # Reduced blur radius
        self.radius = radius

    def __call__(self, img):
        return img.filter(ImageFilter.GaussianBlur(self.radius))

class CircleCropTransform:
    def __init__(self, size=(224, 224), crop_fraction=0.8):  # Slightly less aggressive circle crop
        self.size = size
        self.crop_fraction = crop_fraction

    def __call__(self, img):
        img = img.resize(self.size)
        np_img = np.array(img)
        mask = np.zeros_like(np_img)
        center = (np_img.shape[1]//2, np_img.shape[0]//2)
        radius = int(min(center) * self.crop_fraction)  # Reducing the mask size
        cv2.circle(mask, center, radius, (1, 1, 1), -1)
        np_img = np_img * mask
        return Image.fromarray(np_img)

class SharpenTransform:
    def __init__(self, strength=1):  # Reduced sharpening strength
        self.kernel = np.array([[-1, -1, -1], [-1, 9 * strength,-1], [-1, -1, -1]])

    def __call__(self, img):
        img = np.array(img)
        img = cv2.filter2D(img, -1, self.kernel)
        return Image.fromarray(img)

# Setup transformations
transform_train = transforms.Compose([
    transforms.Resize((256, 256)),
    SLORandomPad((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ColorJitter(brightness=(0.1, 0.9)),
    CLAHETransform(clip_limit=0.5),  # Milder CLAHE
    #GaussianBlurTransform(radius=0.5),  # Less aggressive blur
    #FundRandomRotate(prob=0.7, degree=45),
    CircleCropTransform(crop_fraction=0.5),  # Milder circle cropping
    #SharpenTransform(strength=0.5),  # Less sharpening
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Instantiate model and other components
backbone_name = "resnet18"
mode = "single"  # Assuming single input for simplicity
aug_name = "SLO_Gaussian"

# Load datasets
train_dataset = RetinopathyDataset('./DeepDRiD/train.csv', './DeepDRiD/train/', transform_train, mode)
val_dataset = RetinopathyDataset('./DeepDRiD/val.csv', './DeepDRiD/val/', transform_test, mode)
test_dataset = RetinopathyDataset('./DeepDRiD/test.csv', './DeepDRiD/test/', transform_test, mode, test=True)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

model = get_model(backbone_name, mode).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

# Train the model and record history
checkpoint_path = f"./{backbone_name}{mode}{aug_name.replace(' ', '_')}.pth"
model, history = train_model(
    model, train_loader, val_loader, device, criterion, optimizer, lr_scheduler, num_epochs=10, checkpoint_path=checkpoint_path
)
torch.save(model.state_dict(), checkpoint_path)


# Task E Creating Visualizations and Explainable AI.



1.   Implement visualizations (e.g., scatter plots and line graphs) for your models' losses and accuracies on training and validation datasets to analyze the convergence and overall performance of the model.
2.   Use Explainable AI techniques such as GradCAM to analyze what features in the image are contributing the most and the least in the model's decision-making process. Please also include a few visualization results in the report.
The goal of task(e) is to visualize the performance






In [None]:
def train_model(model, train_loader, val_loader, device, criterion, optimizer, lr_scheduler, num_epochs=25,
                checkpoint_path='model.pth'):
    best_model = model.state_dict()
    best_epoch = None
    best_val_kappa = -1.0  # Initialize the best kappa score

    # Initialize containers for storing metrics
    history = {
        'train_loss': [],
        'train_accuracy': [],
        'val_loss': [],
        'val_accuracy': [],
        'val_kappa': [],  # Store kappa scores for validation
        'best_kappa': None,  # Best kappa score
        'best_epoch': None,  # Epoch with best kappa
    }

    for epoch in range(1, num_epochs + 1):
        print(f'\nEpoch {epoch}/{num_epochs}')
        running_loss = []
        all_preds = []
        all_labels = []

        model.train()

        with tqdm(total=len(train_loader), desc=f'Training', unit=' batch', file=sys.stdout) as pbar:
            for images, labels in train_loader:
                if not isinstance(images, list):
                    images = images.to(device)  # single image case
                else:
                    images = [x.to(device) for x in images]  # dual images case

                labels = labels.to(device)

                optimizer.zero_grad()

                outputs = model(images)
                loss = criterion(outputs, labels.long())

                loss.backward()
                optimizer.step()

                preds = torch.argmax(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

                running_loss.append(loss.item())

                pbar.set_postfix({'lr': f'{optimizer.param_groups[0]["lr"]:.1e}', 'Loss': f'{loss.item():.4f}'})
                pbar.update(1)

        lr_scheduler.step()

        # Calculate epoch-level metrics
        epoch_loss = sum(running_loss) / len(running_loss)
        train_metrics = compute_metrics(all_preds, all_labels, per_class=False)
        train_kappa, train_accuracy, _, _ = train_metrics[:4]

        print(f'[Train] Kappa: {train_kappa:.4f} Accuracy: {train_accuracy:.4f} Loss: {epoch_loss:.4f}')

        # Store training metrics
        history['train_loss'].append(epoch_loss)
        history['train_accuracy'].append(train_accuracy)

        # Validation
        val_loss, val_metrics = evaluate_model(model, val_loader, device, return_loss=True)
        val_kappa, val_accuracy, _, _ = val_metrics[:4]
        print(f'[Val] Kappa: {val_kappa:.4f} Accuracy: {val_accuracy:.4f} Loss: {val_loss:.4f}')

        # Store validation metrics
        history['val_loss'].append(val_loss)
        history['val_accuracy'].append(val_accuracy)
        history['val_kappa'].append(val_kappa)

        # Save the best model
        if val_kappa > best_val_kappa:
            best_val_kappa = val_kappa
            best_epoch = epoch
            best_model = model.state_dict()
            torch.save(best_model, checkpoint_path)

    # Store best kappa and epoch in history
    history['best_kappa'] = best_val_kappa
    history['best_epoch'] = best_epoch

    print(f'[Val] Best kappa: {best_val_kappa:.4f}, Epoch {best_epoch}')

    return model, history


def evaluate_model(model, test_loader, device, test_only=False, prediction_path='./test_predictions.csv', return_loss=False):
    model.eval()

    all_preds = []
    all_labels = []
    all_image_ids = []
    running_loss = []  # To calculate loss if return_loss is True

    with tqdm(total=len(test_loader), desc=f'Evaluating', unit=' batch', file=sys.stdout) as pbar:
        for i, data in enumerate(test_loader):

            if test_only:
                images = data
            else:
                images, labels = data

            if not isinstance(images, list):
                images = images.to(device)  # single image case
            else:
                images = [x.to(device) for x in images]  # dual images case

            with torch.no_grad():
                outputs = model(images)
                preds = torch.argmax(outputs, 1)

                if return_loss and not test_only:
                    loss = nn.CrossEntropyLoss()(outputs, labels.to(device).long())
                    running_loss.append(loss.item())

            if not isinstance(images, list):
                # Single image case
                all_preds.extend(preds.cpu().numpy())
                image_ids = [
                    os.path.basename(test_loader.dataset.data[idx]['img_path']) for idx in
                    range(i * test_loader.batch_size, i * test_loader.batch_size + len(images))
                ]
                all_image_ids.extend(image_ids)
                if not test_only:
                    all_labels.extend(labels.numpy())
            else:
                # Dual images case
                for k in range(2):
                    all_preds.extend(preds.cpu().numpy())
                    image_ids = [
                        os.path.basename(test_loader.dataset.data[idx][f'img_path{k + 1}']) for idx in
                        range(i * test_loader.batch_size, i * test_loader.batch_size + len(images[k]))
                    ]
                    all_image_ids.extend(image_ids)
                    if not test_only:
                        all_labels.extend(labels.numpy())

            pbar.update(1)

    # Save predictions to CSV file for Kaggle online evaluation
    if test_only:
        df = pd.DataFrame({
            'ID': all_image_ids,
            'TARGET': all_preds
        })
        df.to_csv(prediction_path, index=False)
        print(f'[Test] Predictions saved to {os.path.abspath(prediction_path)}')
    else:
        metrics = compute_metrics(all_preds, all_labels)
        if return_loss:
            avg_loss = sum(running_loss) / len(running_loss) if running_loss else None
            return avg_loss, metrics
        return metrics

In [None]:
import matplotlib.pyplot as plt
import torch.nn.functional as F

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Setup transformations
transform_train = transforms.Compose([
    transforms.Resize((256, 256)),
    SLORandomPad((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ColorJitter(brightness=(0.1, 0.9)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Instantiate model and other components
backbone_name = "resnet18"
mode = "single"  # Assuming single input for simplicity
aug_name = "With SLO Padding"

# Load datasets
train_dataset = RetinopathyDataset('./DeepDRiD/train.csv', './DeepDRiD/train/', transform_train, mode)
val_dataset = RetinopathyDataset('./DeepDRiD/val.csv', './DeepDRiD/val/', transform_test, mode)
test_dataset = RetinopathyDataset('./DeepDRiD/test.csv', './DeepDRiD/test/', transform_test, mode, test=True)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

backbone = get_model(backbone_name, mode)  # Load backbone (e.g., ResNet34)
model = MyModelSelfAttention(backbone=backbone).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

# Train the model and record history
checkpoint_path = f"./{backbone_name}{mode}{aug_name.replace(' ', '_')}.pth"
model, history = train_model(
    model, train_loader, val_loader, device, criterion, optimizer, lr_scheduler, num_epochs=10, checkpoint_path=checkpoint_path
)
torch.save(model.state_dict(), checkpoint_path)

In [None]:
def plot_history(history):
    plt.figure(figsize=(12, 6))
    plt.plot(history['train_accuracy'], label='Train Accuracy', marker='o')
    plt.plot(history['val_accuracy'], label='Validation Accuracy', marker='o')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()
    plt.grid()
    plt.show()

    # Plot training and validation loss
    plt.figure(figsize=(12, 6))
    plt.plot(history['train_loss'], label='Train Loss', marker='o')
    plt.plot(history['val_loss'], label='Validation Loss', marker='o')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    plt.grid()
    plt.show()

In [None]:
evaluate_model(model, test_loader, device, test_only=True)
plot_history(history)
# Plot training and validation accuracy


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import torch
import numpy as np
from PIL import Image

class GradCAM:
    def __init__(self, model, target_layer_name):
        self.model = model
        self.target_layer_name = target_layer_name
        self.activations = None
        self.gradients = None

        # Register hooks
        self.hook_layers()

    def hook_layers(self):
        def forward_hook(module, input, output):
            self.activations = output.detach()  # Store activations

        def backward_hook(module, grad_input, grad_output):
            self.gradients = grad_output[0]  # Store gradients

        target_layer = dict(self.model.named_modules())[self.target_layer_name]
        target_layer.register_forward_hook(forward_hook)
        target_layer.register_backward_hook(backward_hook)

    def __call__(self, input_tensor, target_class=None):
        input_tensor.requires_grad = True
        output = self.model(input_tensor)

        if target_class is None:
            target_class = torch.argmax(output, dim=1).item()

        # Backward pass to get gradients
        self.model.zero_grad()
        target = output[0][target_class]
        target.backward()

        # Check if gradients and activations are captured
        if self.gradients is None or self.activations is None:
            raise ValueError("Gradients or activations are None. Make sure hooks are correctly set.")

        heatmap = self.generate_heatmap(self.gradients, self.activations)
        return heatmap

    def generate_heatmap(self, gradients, activations):
        # Pool gradients across spatial dimensions
        pooled_gradients = torch.mean(gradients, dim=[0, 2, 3])

        # Weight activations by gradients
        activations = activations[0]  # Remove batch dimension
        for i in range(len(pooled_gradients)):
            activations[i, :, :] *= pooled_gradients[i]

        # Create heatmap by averaging the weighted activations
        heatmap = torch.mean(activations, dim=0).cpu().numpy()
        heatmap = np.maximum(heatmap, 0)  # Apply ReLU to heatmap

        # Resize heatmap to match the input image size (224, 224)
        heatmap = Image.fromarray(heatmap)  # Convert to PIL Image
        heatmap = heatmap.resize((224, 224), Image.ANTIALIAS)  # Resize without cv2
        heatmap = np.array(heatmap)  # Convert back to numpy array

        # Normalize heatmap
        heatmap = heatmap / np.max(heatmap)
        return heatmap


In [None]:
def visualize_gradcam(input_image, heatmap, alpha=0.5):
    # Convert heatmap to RGB
    heatmap = np.uint8(255 * heatmap)
    heatmap = np.stack([heatmap] * 3, axis=2)  # Convert to 3-channel
    heatmap = to_pil_image(heatmap)
    heatmap = heatmap.resize(input_image.size, resample=Image.BILINEAR)

    # Superimpose heatmap on the original image
    heatmap = np.array(heatmap)
    input_image = np.array(input_image)
    superimposed_image = np.uint8(input_image * (1 - alpha) + heatmap * alpha)

    plt.figure(figsize=(8, 8))
    plt.subplot(1, 3, 1)
    plt.title("Original Image")
    plt.imshow(input_image)
    plt.axis('off')

    plt.subplot(1, 3, 2)
    plt.title("Grad-CAM Heatmap")
    plt.imshow(heatmap)
    plt.axis('off')

    plt.subplot(1, 3, 3)
    plt.title("Overlay")
    plt.imshow(superimposed_image)
    plt.axis('off')

    plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def visualize_gradcam_full(image, heatmaps, layer_names, num_columns=5):
    """
    Visualize multiple GradCAM heatmaps in a grid layout with a specified number of columns.

    Args:
        image: The original image.
        heatmaps: A list of heatmaps generated by GradCAM for each layer.
        layer_names: A list of layer names corresponding to the heatmaps.
        num_columns: Number of columns for the grid layout (default is 3).
    """
    num_layers = len(heatmaps)
    rows = (num_layers // num_columns) + (num_layers % num_columns != 0)  # Calculate rows for given columns

    # Create a figure with a grid of subplots
    fig, axes = plt.subplots(rows, num_columns, figsize=(num_columns * 5, 5 * rows))  # Adjust the size as needed
    axes = axes.flatten()  # Flatten the axes to make it easier to index

    # Show the original image in the first subplot
    axes[0].imshow(np.array(image))
    axes[0].set_title('Original Image')
    axes[0].axis('off')

    # Loop through the layers and their heatmaps
    for i, (heatmap, layer_name) in enumerate(zip(heatmaps, layer_names)):
        ax = axes[i + 1]  # Start from index 1, since index 0 is for the original image
        ax.imshow(heatmap, cmap='jet', alpha=0.5)  # Overlay the heatmap on the image
        ax.set_title(f"Heatmap: {layer_name}")
        ax.axis('off')

    # Hide any unused subplots (if number of heatmaps is not a multiple of num_columns)
    for j in range(num_layers + 1, len(axes)):
        axes[j].axis('off')

    # Adjust layout and show the plot
    plt.tight_layout()
    plt.show()


In [None]:
for name, module in model.named_modules():
    print(name)

In [None]:
from PIL import Image
import torch
import torch.nn.functional as F
from torchvision import transforms

# Assuming 'device' is defined somewhere as 'cuda' or 'cpu'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load and preprocess image
image_path = r"DeepDRiD\test\347\347_l1.jpg"
image = Image.open(image_path).convert('RGB')
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# Add batch dimension (unsqueeze(0)) and move the tensor to the device
input_tensor = transform(image).unsqueeze(0).to(device)  # Now shape will be [1, 3, 224, 224]
print(input_tensor.shape)
# Ensure the model is on the same device (GPU or CPU)
model = model.to(device)

# Initialize GradCAM and generate heatmap
gradcam = GradCAM(model=model, target_layer_name='backbone.layer1')  # Adjust target_layer_name if needed
heatmap = gradcam(input_tensor)  # Pass the correct shaped tensor to GradCAM

# Visualize the result
visualize_gradcam(image, heatmap)  # Assuming visualize_gradcam is defined


In [None]:
# Initialize lists to store heatmaps and layer names
heatmaps = []
layer_names = []

# Loop through all the layers of the model
for name, module in model.named_modules():
    print(f"Processing layer: {name}")

    try:
        # Initialize GradCAM for the current layer
        gradcam = GradCAM(model=model, target_layer_name=name)  # Use the current layer name

        # Generate heatmap for the current layer
        heatmap = gradcam(input_tensor)  # Pass the correct shaped tensor to GradCAM

        # Store the heatmap and corresponding layer name
        heatmaps.append(heatmap)
        layer_names.append(name)

    except Exception as e:
        # Print the error and continue with the next layer
        print(f"Error processing layer {name}: {e}")
        continue

# Visualize the heatmaps in a 3-column grid
visualize_gradcam_full(image, heatmaps, layer_names)
