### Train for MICCAI challenge on colab using data on gDrive

In [1]:
# setup
!apt-get update
!apt-get install git
!pip install python-dotenv
!pip install loguru
!pip install efficientnet_pytorch
!pip install wandb
!pip install imbalanced-learn

0% [Working]            Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
0% [Waiting for headers] [Waiting for headers] [1 InRelease 3,626 B/3,626 B 100%] [Connecting to r2u0% [Waiting for headers] [Waiting for headers] [Connected to r2u.stat.illinois.edu (192.17.190.167)]                                                                                                    Hit:2 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
                                                                                                    Hit:3 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
0% [Waiting for headers] [Waiting for headers] [Connected to r2u.stat.illinois.edu (192.17.190.167)]                                                                                                    Hit:4 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
0% [Waiting for headers] [Waiting for head

### Clone the repository and add it to python path

In [2]:
# clone repo in order to have modules available
import os
import sys
from pathlib import Path
# Define the parameters
username = "bscheuringer"
access_token = "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"
repo_name = "AILS-MICCAI-UWF4DR-Challenge"
repo_clone_url = f"https://{username}:{access_token}@github.com/moritsih/{repo_name}.git"
repo_path = f'/content/{repo_name}'

# Check if the repository already exists
if not os.path.isdir(repo_path):
    !git clone {repo_clone_url}
else:
    print("Repository already exists.")

# navigate to repo directory in order to have working imports
%cd {repo_path}

#!git checkout bsc_colab  # TODO remove when branch is not needed anymore

# add repo path to sys path
if repo_path not in sys.path:
    sys.path.append(repo_path)

# Print sys.path to verify
print("Python Path:", sys.path)

Cloning into 'AILS-MICCAI-UWF4DR-Challenge'...
remote: Enumerating objects: 980, done.[K
remote: Counting objects: 100% (10/10), done.[K
remote: Compressing objects: 100% (10/10), done.[K
remote: Total 980 (delta 1), reused 2 (delta 0), pack-reused 970[K
Receiving objects: 100% (980/980), 86.40 MiB | 26.80 MiB/s, done.
Resolving deltas: 100% (637/637), done.
/content/AILS-MICCAI-UWF4DR-Challenge
Branch 'bsc_colab' set up to track remote branch 'bsc_colab' from 'origin'.
Switched to a new branch 'bsc_colab'
Python Path: ['/content', '/env/python', '/usr/lib/python310.zip', '/usr/lib/python3.10', '/usr/lib/python3.10/lib-dynload', '', '/usr/local/lib/python3.10/dist-packages', '/usr/lib/python3/dist-packages', '/usr/local/lib/python3.10/dist-packages/IPython/extensions', '/root/.ipython', '/content/AILS-MICCAI-UWF4DR-Challenge']


In [None]:
# load and unzip data
!python ./tools/download_data_and_chkpts.py

### Optionally resize deepdrid images in order to save computation time
NOTE: this will REPLACE original images !

In [5]:
import os
from PIL import Image

def resize_image(image_path, size):
    with Image.open(image_path) as img:
        img = img.resize(size)
        img.save(image_path)

def process_directory(root_dir, size):
    if not os.path.isdir(root_dir):
        print(f"Error: The directory '{root_dir}' does not exist.")
        return

    image_count = 0
    
    for dirpath, _, filenames in os.walk(root_dir):
        for filename in filenames:
            if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff')):
                image_path = os.path.join(dirpath, filename)
                resize_image(image_path, size)
                image_count += 1

    print(f"Processed {image_count} images.")


process_directory('data/external/DeepDRiD', (1016, 800) )

Processed 256 images.


### Verify if project has been successfully cloned and added to python path

In [6]:
# test repo import
!ls {repo_path}

# try importing a custom class
try:
    from ails_miccai_uwf4dr_challenge.dataset_strategy import Task1Strategy

    print("Import successful!")
except ImportError as e:
    print("Import failed:", e)

'ls' is not recognized as an internal or external command,
operable program or batch file.

KeyboardInterrupt



In [1]:
# imports
import torch
import torch.nn as nn
from sklearn.metrics import roc_auc_score, average_precision_score
from torch import optim
from torch.utils.data import DataLoader
import time

from ails_miccai_uwf4dr_challenge.models.metrics import sensitivity_score, specificity_score
from ails_miccai_uwf4dr_challenge.models.trainer import Metric, DefaultMetricsEvaluationStrategy, Trainer, TrainingContext, MetricCalculatedHook, PersistBestModelOnEpochEndHook, EpochEndHook, ModelResultsAndLabels

In [2]:
# connect to gDrive
from pathlib import Path
run_on_colab = True
if run_on_colab:
    from google.colab import drive
    drive.mount('/content/drive')
    my_data_base_path = Path("/content/drive/My Drive/JKU/AILS_CHALLENGE_2024")
else:
    my_data_base_path = Path("local_runs")


In [3]:
# select device for training
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device: " + str(device))

Device: cpu


In [4]:
# login to wandb
use_wandb = True
if use_wandb:
    import wandb
    #wandb.login()

### Green Channel Enhancement
A Hybrid Algorithm to Enhance Colour Retinal Fundus Images Using a Wiener Filter and CLAHE

see https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8329119/

In [8]:
import cv2
import numpy as np
from skimage import restoration
import torch
from torchvision.transforms import v2

class GreenChannelEnhancement:
    def __call__(self, img):
        # Convert to numpy array if it's a tensor
        if isinstance(img, torch.Tensor):
            img = img.numpy().transpose((1, 2, 0))

        # Ensure the image is in the correct format
        img = img.astype(np.float32)

        # Separate the channels
        r, g, b = cv2.split(img)

        # Apply Wiener filter to the green channel
        psf = np.ones((5, 5)) / 25
        g_filtered = restoration.wiener(g, psf, balance=0.1)

        # Apply CLAHE to the filtered green channel
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        g_enhanced = clahe.apply((g_filtered * 255).astype(np.uint8))
        g_enhanced = g_enhanced / 255.0  # Normalize back to range [0, 1]

        # Ensure all channels are the same type
        r = r.astype(np.float32)
        g_enhanced = g_enhanced.astype(np.float32)
        b = b.astype(np.float32)

        # Merge the enhanced green channel back with the original red and blue channels
        enhanced_img = cv2.merge((r, g_enhanced, b))

        # Convert back to tensor
        enhanced_img = torch.from_numpy(enhanced_img.transpose((2, 0, 1)))
        return enhanced_img

### Model Definitions

In [13]:
# EfficientNet B0
from efficientnet_pytorch import EfficientNet
class Task1EfficientNetB0(nn.Module):
    def __init__(self, learning_rate=1e-3):
        super(Task1EfficientNetB0, self).__init__()

        self.learning_rate = learning_rate

        # Get model and replace the last layer
        self.model = EfficientNet.from_pretrained('efficientnet-b0', num_classes=1)
        self.loss_fn = nn.BCEWithLogitsLoss()

        # Freeze all layers except the last one
        #for param in self.model.parameters():
        #    param.requires_grad = False

        # Unfreeze the last layer
        #for param in self.model._fc.parameters():
        #    param.requires_grad = True

    def forward(self, x):
        return self.model(x)

    def predict(self, x):
        with torch.no_grad():
            pred = torch.sigmoid(self(x))
        return pred

In [14]:
# Automorph
from efficientnet_pytorch import EfficientNet
class AutoMorphModel(nn.Module):
    def __init__(self, pretrained=True):
        super(AutoMorphModel, self).__init__()

        # code taken from https://github.com/rmaphoh/AutoMorph/blob/main/M1_Retinal_Image_quality_EyePACS/model.py
        self.model = EfficientNet.from_pretrained('efficientnet-b4')
        self.model._fc = nn.Identity()
        net_fl = nn.Sequential(
            nn.Linear(1792, 256),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(64, 3)
        )
        self.model._fc = net_fl
        if pretrained:
            checkpoint_path = Path().resolve() / "models" / "AutoMorph" / "automorph_best_loss_checkpoint.pth"
            self.model.load_state_dict(torch.load(checkpoint_path, map_location='cpu'))
            print(f"Loaded pretrained Automorph model checkpoint from {checkpoint_path}")

        # add a final layer that outputs single value
        self.model._fc.add_module("7", nn.Linear(3, 1))

    def forward(self, x):
        return self.model(x)


In [9]:
# EfficientNet0 with extended classifier
from efficientnet_pytorch import EfficientNet

class Task1EfficientNetB0Extended(nn.Module):
    def __init__(self, learning_rate=1e-3):
        super(Task1EfficientNetB0Extended, self).__init__()

        self.learning_rate = learning_rate

        # Get model and replace the last layer
        self.model = EfficientNet.from_pretrained('efficientnet-b0')

        # Determine the number of input features for the classifier
        in_features = self.model._fc.in_features

        # Replace the last layer with a custom classifier block
        self.model._fc = nn.Sequential(
            nn.Linear(in_features, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.3),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(512, 64),
            nn.ReLU(),
            #nn.Dropout(p=0.4),
            nn.Linear(64, 1)
        )

        self.loss_fn = nn.BCEWithLogitsLoss()

        # Freeze all layers except the last one
        for param in self.model.parameters():
            param.requires_grad = False

        # Unfreeze the last layer
        for param in self.model._fc.parameters():
            param.requires_grad = True

    def forward(self, x):
        return self.model(x)

    def predict(self, x):
        with torch.no_grad():
            pred = torch.sigmoid(self(x))
        return pred




In [16]:
# EfficientNetV2

# Import necessary libraries
import torch
import torch.nn as nn
from torchvision.models import efficientnet_v2_s

class Task1EfficientNetV2(nn.Module):
    def __init__(self, learning_rate=1e-3):
        super(Task1EfficientNetV2, self).__init__()

        self.learning_rate = learning_rate

        # Get the EfficientNetV2 model
        self.model = efficientnet_v2_s(weights="IMAGENET1K_V1")

        # Replace the entire classifier block
        in_features = self.model.classifier[1].in_features
        self.model.classifier = nn.Sequential(
            nn.Linear(in_features, 512),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(512, 64),
            nn.ReLU(),
            nn.Dropout(p=0.4),
            nn.Linear(64, 1)
        )
        self.loss_fn = nn.BCEWithLogitsLoss()

    def forward(self, x):
        return self.model(x)

    def predict(self, x):
        with torch.no_grad():
            pred = torch.sigmoid(self(x))
        return pred


### Transformations

In [10]:
from torchvision import transforms
import torch

augment_for_task_1_training = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),  # Convert to float32 tensor and scale
    #GreenChannelEnhancement(),  # Apply Wiener filter and CLAHE
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(degrees=5, expand=True),
    # transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.Resize(size=(224, 224)),
    transforms.Normalize(mean=[0.406, 0.456, 0.485], std=[0.225, 0.224, 0.229])
])

# Augmentation pipeline for validation
augment_for_task_1_validation = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),  # Convert to float32 tensor and scale
    #GreenChannelEnhancement(),  # Apply Wiener filter and CLAHE
    transforms.Resize(size=(224, 224)),
    transforms.Normalize(mean=[0.406, 0.456, 0.485], std=[0.225, 0.224, 0.229])
])

In [11]:
#model = Task1EfficientNetB0(1e-4)

#model = AutoMorphModel(pretrained=True)

model = Task1EfficientNetB0Extended(1e-4)

#state_dict = model.load_state_dict(torch.load(my_data_base_path / 'Task1EfficientNetB0Extended_best_weights_2024-07-23_06-34-11_tough-cosmos-713.pth', map_location='cpu'))

#model = Task1EfficientNetV2()


model.to(device)
model_name = model.__class__.__name__

Loaded pretrained weights for efficientnet-b0


In [None]:
# setup dataset
from ails_miccai_uwf4dr_challenge.dataset_strategy import CombinedDatasetStrategy, Task1Strategy, DatasetBuilder, CustomDataset

# setup dataset

dataset_strategy = CombinedDatasetStrategy() # ALL DATA
task_strategy = Task1Strategy() #TASK 1


# Build dataset
dataset_builder = DatasetBuilder(dataset_strategy, task_strategy, split_ratio=0.8)

In [12]:
# training config
print("Training model: ",model_name)

metrics = [
        Metric('auroc', roc_auc_score),
        Metric('auprc', average_precision_score),
        Metric('accuracy', lambda y_true, y_pred: (y_pred.round() == y_true).mean()),
        Metric('sensitivity', sensitivity_score),
        Metric('specificity', specificity_score)
    ]

class WandbLoggingHook(MetricCalculatedHook):
        def on_metric_calculated(self, training_context: TrainingContext, metric: Metric, result, last_metric_for_epoch: bool):
            import wandb
            wandb.log(data={metric.name: result}, commit=last_metric_for_epoch)

class FreezeSwitchHook(EpochEndHook):
    def __init__(self, unfreeze_on_epoch=5):
        self.epoch_cnt = 0
        self.unfreeze_on_epoch = unfreeze_on_epoch
    
    def on_epoch_end(self, training_context: TrainingContext, train_results: ModelResultsAndLabels, val_results: ModelResultsAndLabels):
        self.epoch_cnt += 1
        if self.epoch_cnt == self.unfreeze_on_epoch:
            for param in model.model.parameters():
                param.requires_grad = True
            print(f"Unfreezing all layers on epoch [{self.epoch_cnt}].")
                

metrics_eval_strategy = DefaultMetricsEvaluationStrategy(metrics)

if(use_wandb):
    metrics_eval_strategy.register_metric_calculated_hook(WandbLoggingHook())

config = {
    "learning_rate": 1e-4,
    "dataset": dataset_strategy.__class__.__name__,
    "epochs": 15,
    "batch_size": 4,
    "model_type": model.__class__.__name__
}

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.AdamW(model.parameters(), lr=config["learning_rate"])
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)

train_data, val_data = dataset_builder.build()
train_dataset = CustomDataset(train_data, transform=augment_for_task_1_training)
val_dataset = CustomDataset(val_data, transform=augment_for_task_1_validation)

train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=config['batch_size'], shuffle=False, num_workers=4)


trainer = Trainer(model, train_loader, val_loader, criterion, optimizer, lr_scheduler, device,
                        metrics_eval_strategy=metrics_eval_strategy)



Training model:  Task1EfficientNetB0Extended




In [13]:
# start training

if use_wandb:
    wandb.init(entity='miccai-challenge-2024' ,project='task1', config=config)
    postfix = wandb.run.name
    print(f'wandb run: {wandb.run.name}')
else:
    postfix = time.strftime("%H-%M-%S")

print(f"Start training [{config['model_type']}] on [{config['dataset']}] dataset for [{config['epochs']}] epochs with batch size [{config['batch_size']}]")

# build a file name for the model weights containing current timestamp and the model class
training_date = time.strftime("%Y-%m-%d")
persist_model_hook = PersistBestModelOnEpochEndHook(my_data_base_path / f"{model_name}_weights_{training_date}_{postfix}.pth", print_train_results=True)
trainer.add_epoch_end_hook(persist_model_hook)
#trainer.add_epoch_end_hook(FreezeSwitchHook(unfreeze_on_epoch=5))

trainer.train(num_epochs=config["epochs"])
print("Training finished.")


Start training [Task1EfficientNetB0Extended] on [UWF4DR-Original] dataset for [15] epochs with batch size [4]


Epoch 1/15 - Avg train Loss: 0.615525: 100%|██████████| 127/127 [04:16<00:00,  2.02s/it]
Epoch 1/15 - Avg val Loss: 0.541329: 100%|██████████| 32/32 [00:59<00:00,  1.86s/it]


New best weights found at epoch 1 with validation loss: 0.5413. Model saved to local_runs\Task1EfficientNetB0Extended_weights_2024-07-24_15-04-17.pth
Epoch 1/15 Summary : Train Loss: 0.6155, Val Loss: 0.5413, LR: 1.00e-04, auroc: 0.8915, auprc: 0.9376, accuracy: 0.6719, sensitivity: 0.7907, specificity: 0.8571, avg_train_loss: 0.6155, avg_val_loss: 0.5413


Epoch 2/15 - Starting training... :   0%|          | 0/127 [00:16<?, ?it/s]


KeyboardInterrupt: 

### Build Codalab submission (zip) file

In [15]:
# build a submission zip file
from tools.build_submission import SubmissionBuilder

path_to_codalab_model_file = './models/task_1_EfficientNet0/model.py'
path_to_checkpoint_file = persist_model_hook.save_path
submission_zip_label = f"{model_name}_submission_{training_date}_{postfix}"
output_dir = my_data_base_path

builder = SubmissionBuilder(model_file=path_to_codalab_model_file, checkpoint_file=path_to_checkpoint_file, label=submission_zip_label, output_dir=output_dir)
builder.create_submission_zip()

Testing instantiating the model and predict based on a random image...
Loaded pretrained weights for efficientnet-b0
Prediction on random image: 0.5337051153182983
Creating submission zip file 'local_runs\Task1EfficientNetB0Extended_submission_2024-07-24_15-04-17.zip'
-- Adding file metadata
-- Adding file model.py
-- Adding file Task1EfficientNetB0Extended_weights_2024-07-24_15-04-17.pth
Submission zip file 'local_runs\Task1EfficientNetB0Extended_submission_2024-07-24_15-04-17.zip' created successfully.


In [None]:
if use_wandb:
    wandb.finish()  # finish the run

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,█▄▃▄▃▃▃▂▂▂▂▂▁▂▂▁▂▂▂▁▁▁▂▂▂▂▂▂▂▁
auprc,▁▅▅▅▄▅▆▆▇▇▇▇▇▇███▇██▇▇▇▇▇▇▇▇▇▇
auroc,▁▅▄▄▄▅▆▆▇▇▇▆▇▇███▇██▇▇▇▇▇▇▇▇▇▇
avg_train_loss,█▆▅▆▅▄▅▄▃▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▃▃▃▃▂▂▂▂▁▁▁▂▁▂▁▂▂▁▁▁▁▂▂▂▂▂▂▂▂
sensitivity,▃▅▁▄▂▃▂▅▄▅▂▃▄█▂▂▃▄▄▄▃▃▂▃▂▃▃▅▂▃
specificity,▆▇█▇▇▇█▄█▇██▆▁█████████████▆██

0,1
accuracy,0.0
auprc,0.9831
auroc,0.97773
avg_train_loss,0.02176
avg_val_loss,0.23098
sensitivity,0.86957
specificity,1.0
