# Biomarker detection in OLIVES using pretrained Models


### Step 1: Import data
Consistent for all models. Only change output size!

In [1]:
import os
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ExponentialLR
from torch.utils.data import Dataset, Subset, DataLoader
from torchvision import transforms, models
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, f1_score, classification_report
import numpy as np
from tqdm import tqdm

# set the size of the image according to your model needs
imageSize = 224 # ResNet works with 224x224 pixels

# Custom Dataset
class BiomarkerDataset(Dataset):
    def __init__(self, label_file, transform=None, num_frames=0):
        """
        Args:
            label_file (str): Path to the CSV file.
            transform (callable, optional): Transform to be applied on a sample.
            num_frames (int): Number of adjacent frames to use in the input sequence (1 adjacent frame -> 3 consecutive images).
        """
        self.data = pd.read_csv(label_file)
        self.transform = transform
        self.num_frames = num_frames
        
        # Exclude indices which don't have enough adjacent images
        self.valid_indices = self.data[(self.data.iloc[:, 1] > num_frames) & (self.data.iloc[:, 1] < (50-num_frames))].index.tolist()

    def __len__(self):
        # we can't use the length of the data since we have to exclude the first and last image (for num_frames=1) of each OCT scan
        return len(self.valid_indices)

    def __getitem__(self, idx):
        
        # Base path
        img_base_path = '/storage/ice1/shared/d-pace_community/makerspace-datasets/MEDICAL/OLIVES/OLIVES'
        
        # Get the actual data index
        index = self.valid_indices[idx]
        
        # Initialize
        images = []
        
        # Load a sequence of consecutive images
        for i in range(index - self.num_frames, index + self.num_frames +1):
            img_path = img_base_path + self.data.iloc[i, 0]
            img = Image.open(img_path).convert("L") # 'L' is for grayscale; can be removed!?
            
            if self.transform is not None:
                # apply data transformations (transforms it to tensor)
                img = self.transform(img)
            
            # stack torch tensor
            img = img.squeeze(0)  # Removes the first dimension if it's 1
            images.append(img)
        
        # Stack the 3 grayscale images along the channel dimension
        # Resulting tensor shape will be [3, H, W]
        images = torch.stack(images, dim=0)
        # print(images.shape) # debugging
        
        # Biomarker columns
        labels = torch.tensor(self.data.iloc[index, 2:18].astype(float), dtype=torch.float32)
        
        # Get extra clinical data
        clinical_data = {
            "Eye_ID": self.data.iloc[index, 18],
            "BCVA": self.data.iloc[index, 19],
            "CST": self.data.iloc[index, 20],
            "Patient_ID": self.data.iloc[index, 21],
        }
        
        return images, labels, clinical_data
    
    
# Define transformers

# Values for normalization taken from example paper
mean = 0.1706
std = 0.2112

# train with data augmentation
train_transformer = transforms.Compose([   
    # transforms.RandomCrop((0.7, 1.0)),  # RandomCrop between 70% to 100% of original size
    # transforms.RandomPerspective(distortion_scale=0.2, p=0.5, fill=0),  # Add perspective shift
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Adjust color properties
    transforms.RandomRotation(degrees=10, fill=0),  # Rotates randomly between + and - degree and fills new pixels with black
    transforms.RandomHorizontalFlip(p=0.5),  # Random horizontal flip
    transforms.Resize(imageSize), # Resize to models needs
    transforms.ToTensor(),  # Convert image to tensor
    transforms.Normalize(mean, std) # we have to calculate these values for our dataset
])
# train without data augmentation
test_transformer = transforms.Compose([   
    transforms.Resize(imageSize), # Resize to models needs
    transforms.CenterCrop(imageSize), # shouldn't do anything
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])


# set up train loader (just example since cross validation uses new ones)
train_dataset = BiomarkerDataset(label_file='OLIVES_Dataset_Labels/BiomarkerLabel_train_data.csv', transform=train_transformer, num_frames=1)
trainloader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4, drop_last=True, pin_memory=True)

# set up test loader (this one actually is being used)
test_dataset = BiomarkerDataset(label_file='OLIVES_Dataset_Labels/BiomarkerLabel_train_data.csv', transform=test_transformer, num_frames=1)
testloader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=32, pin_memory=True)


### Step 2: Train model
Could be easily adapted for different models. Uses cross-validation.

In [2]:
## --- Settings ---
num_epochs=30
batch_size=64
num_workers=32 # need this amount of CPUs for parallel data loading
k_folds=5
patience=5  # Number of epochs to wait for improvement

# get to cuda
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## ---- Model ----
# Import pretrained model (choose one of them based on performance)
# model = models.resnet50(pretrained=True)
model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT) # this is like pre-trained true
model.name = "ResNet50"
# Adapt it to the given task (update final layer)
model.fc = nn.Linear(model.fc.in_features, 16)  # Number of output classes: 16 (biomarkers)
# shift to GPU
model = model.to(device)

# Loss function, optimizer nad Learning rate sheduler
loss_fn = nn.BCEWithLogitsLoss()  # For multi-label classification
optimizer = optim.Adam(model.parameters(), lr=1e-4)
# optimizer = optim.Adam(model.parameters(), lr=1e-5, weight_decay=0.9) # NOT GOOD: This set every f1 score to 0!!
    # weight decay to reduce overfitting 
scheduler = ExponentialLR(optimizer, gamma=0.9)

# Creates all needed folders to store the model weights if they don't exist already
os.makedirs("ModelWeights_TempSaves", exist_ok=True)
os.makedirs(f"TrainedModels/{model.name}", exist_ok=True)



In [3]:
# --- Train/Test Loops ---
# Training loop
def train_loop(model, train_loader, optimizer, loss_fn, device):
    # Set model to train mode
    model.train()
    
    # Initialize
    running_loss = 0.0
    all_preds = []
    all_labels = []
    
    # for images, labels, _ in train_loader:
    for images, labels, _ in tqdm(train_loader, desc="Training"):
        # shift to cuda
        images = images.to(device)
        labels = labels.to(device)
        
        # Zero the parameter gradients 
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(images)
        loss = loss_fn(outputs, labels)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        # Track predictions and labels for metrics calculation
        running_loss += loss.item() * images.size(0)
        all_preds.append(outputs)
        all_labels.append(labels)
    
    # Average loss
    avg_loss = running_loss / len(train_loader.dataset)
    
    return avg_loss

# test loop
def test_loop(model, test_loader, loss_fn, device):
    # Set model to evaluation mode
    model.eval()
    
    # Initialize
    running_loss = 0.0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels, _ in test_loader:
        # for images, labels, _ in tqdm(val_loader, desc="Validating"):
            # Store labels since they won't be altered
            # all_labels.append(labels.numpy())
            
            # Shift to cuda
            images = images.to(device)
            labels = labels.to(device)
            
            # Forward pass
            outputs = model(images)
            
            # Get metrics
            loss = loss_fn(outputs, labels)
            running_loss += loss.item() * images.size(0)
            
            # Sigmoid activation to get probabilities, then threshold at 0.5 for binary classification
            preds = torch.sigmoid(outputs) > 0.5 
            # preds = (torch.sigmoid(outputs) > 0.5).int()  # Apply sigmoid and threshold at 0.5

            # Store (numpy for easier processing later)
            all_preds.append(preds.cpu().numpy())
            all_labels.append(labels.cpu().numpy())

    # Calculate average loss
    avg_loss = running_loss / len(test_loader.dataset)

    # Convert lists of predictions and labels into a 2D array where each row is a sample, each column is a biomarker
    all_preds = np.concatenate(all_preds, axis=0)  # Shape: (num_samples, num_biomarkers)
    all_labels = np.concatenate(all_labels, axis=0)  # Shape: (num_samples, num_biomarkers)
    
    # Calculate F1 score for each biomarker (column) independently
#     f1_scores = []
#     for i in range(all_labels.shape[1]):  # Iterate over each biomarker
#         f1 = f1_score(all_labels[:, i], all_preds[:, i], average='binary')  # Compute F1 score for the ith biomarker
#         f1_scores.append(f1)

    # Average loss
    val_loss = running_loss / len(test_loader.dataset)
    # return val_loss, f1_scores, all_preds, all_labels
    return val_loss, all_preds, all_labels


# --- Cross-Validation ---

# Initialize object to split dataset in kfold
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=0)

fold_metrics = []
    
for fold, (train_idx, val_idx) in enumerate(kfold.split(train_dataset)):
    print(f"Fold {fold+1}/{k_folds}")

    # Split the training dataset into training and validation folds
    train_fold = Subset(train_dataset, train_idx)
    val_fold = Subset(train_dataset, val_idx)
    
    # Set up Dataloaders
    train_loader = DataLoader(train_fold, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_fold, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)

    # Reset parameters
    best_val_loss = float('inf')
    best_val_f1 = 0.0
    counter_NoImprovement = 0

    for epoch in range(num_epochs):
    # for epoch in tqdm(range(num_epochs), desc="Training Epochs", unit="epoch"):
        print(f"Epoch {epoch+1}/{num_epochs}")

        # Train the model for one epoch
        train_loss = train_loop(model, train_loader, optimizer, loss_fn, device)
        print(f"Train Loss: {train_loss:.4f}")

        # Validate the model after training using validation fold
        val_loss, all_preds, all_labels = test_loop(model, val_loader, loss_fn, device)
        val_f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)
        print(f"Validation Loss: {val_loss:.4f}, Validation F1: {val_f1:.4f}")
        
        # Update learning rate
        scheduler.step()
        
        # Early stopping logic: Check if F1 improved
        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            no_improvement = 0  # Reset counter
            torch.save(model.state_dict(), f"ModelWeights_TempSaves/best_{model.name}_fold_{fold+1}.pth")
        else:
            no_improvement += 1

        # Stop training if no improvement for 'patience' epochs
        if no_improvement >= patience:
            print("Early stopping triggered.")
            break
        
#         # Save the best model based on validation loss
#         if val_loss < best_val_loss:
#             best_val_loss = val_loss
#             torch.save(model.state_dict(), f"ModelWeights_TempSaves/best_{model.name}_fold_{fold+1}.pth")

    # Load the weights of the model with the best validationg loss
    model.load_state_dict(torch.load(f"ModelWeights_TempSaves/best_{model.name}_fold_{fold+1}.pth", weights_only=True))
 
    # Get Accuracy
    # preds = (torch.sigmoid(torch.tensor(all_preds)) > 0.5).int()  # Apply sigmoid and threshold at 0.5
    val_accuracy = accuracy_score(all_labels, all_preds)
    fold_metrics.append(val_accuracy)
    print(f"Validation Accuracy for Fold {fold+1}: {val_accuracy:.4f}")

avg_accuracy = np.mean(fold_metrics)
print(f"\nAverage Accuracy over all folds: {avg_accuracy:.4f}")


Fold 1/5
Epoch 1/30


Training: 100%|██████████| 90/90 [00:10<00:00,  8.55it/s]


Train Loss: 0.3047
Validation Loss: 0.2011, Validation F1: 0.6926
Epoch 2/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.56it/s]

Train Loss: 0.1741





Validation Loss: 0.1617, Validation F1: 0.7725
Epoch 3/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.15it/s]

Train Loss: 0.1425





Validation Loss: 0.1419, Validation F1: 0.8074
Epoch 4/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.08it/s]

Train Loss: 0.1220





Validation Loss: 0.1272, Validation F1: 0.8427
Epoch 5/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.66it/s]

Train Loss: 0.1077





Validation Loss: 0.1174, Validation F1: 0.8493
Epoch 6/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.13it/s]

Train Loss: 0.0976





Validation Loss: 0.1083, Validation F1: 0.8633
Epoch 7/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.27it/s]

Train Loss: 0.0886





Validation Loss: 0.1028, Validation F1: 0.8693
Epoch 8/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.14it/s]

Train Loss: 0.0822





Validation Loss: 0.0974, Validation F1: 0.8840
Epoch 9/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.87it/s]

Train Loss: 0.0777





Validation Loss: 0.0927, Validation F1: 0.8902
Epoch 10/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.37it/s]

Train Loss: 0.0728





Validation Loss: 0.0914, Validation F1: 0.8916
Epoch 11/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.19it/s]

Train Loss: 0.0678





Validation Loss: 0.0913, Validation F1: 0.8960
Epoch 12/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.77it/s]

Train Loss: 0.0650





Validation Loss: 0.0897, Validation F1: 0.8990
Epoch 13/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.19it/s]

Train Loss: 0.0619





Validation Loss: 0.0892, Validation F1: 0.8972
Epoch 14/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.37it/s]

Train Loss: 0.0588





Validation Loss: 0.0865, Validation F1: 0.8997
Epoch 15/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.60it/s]

Train Loss: 0.0573





Validation Loss: 0.0872, Validation F1: 0.9022
Epoch 16/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.33it/s]

Train Loss: 0.0540





Validation Loss: 0.0887, Validation F1: 0.8972
Epoch 17/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.29it/s]

Train Loss: 0.0518





Validation Loss: 0.0826, Validation F1: 0.9046
Epoch 18/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.98it/s]

Train Loss: 0.0502





Validation Loss: 0.0858, Validation F1: 0.9024
Epoch 19/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.76it/s]

Train Loss: 0.0494





Validation Loss: 0.0857, Validation F1: 0.9037
Epoch 20/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.06it/s]

Train Loss: 0.0485





Validation Loss: 0.0819, Validation F1: 0.9110
Epoch 21/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.90it/s]

Train Loss: 0.0477





Validation Loss: 0.0849, Validation F1: 0.9088
Epoch 22/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.56it/s]

Train Loss: 0.0454





Validation Loss: 0.0837, Validation F1: 0.9059
Epoch 23/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.08it/s]

Train Loss: 0.0454





Validation Loss: 0.0838, Validation F1: 0.9053
Epoch 24/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.55it/s]

Train Loss: 0.0430





Validation Loss: 0.0853, Validation F1: 0.9034
Epoch 25/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.28it/s]

Train Loss: 0.0440





Validation Loss: 0.0848, Validation F1: 0.9081
Early stopping triggered.
Validation Accuracy for Fold 1: 0.6074
Fold 2/5
Epoch 1/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.92it/s]

Train Loss: 0.0558





Validation Loss: 0.0419, Validation F1: 0.9529
Epoch 2/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.17it/s]

Train Loss: 0.0545





Validation Loss: 0.0408, Validation F1: 0.9540
Epoch 3/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.18it/s]

Train Loss: 0.0533





Validation Loss: 0.0417, Validation F1: 0.9525
Epoch 4/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.69it/s]

Train Loss: 0.0522





Validation Loss: 0.0429, Validation F1: 0.9523
Epoch 5/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.00it/s]

Train Loss: 0.0507





Validation Loss: 0.0421, Validation F1: 0.9533
Epoch 6/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.99it/s]

Train Loss: 0.0503





Validation Loss: 0.0445, Validation F1: 0.9472
Epoch 7/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.35it/s]

Train Loss: 0.0502





Validation Loss: 0.0429, Validation F1: 0.9500
Early stopping triggered.
Validation Accuracy for Fold 2: 0.7670
Fold 3/5
Epoch 1/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.70it/s]

Train Loss: 0.0535





Validation Loss: 0.0423, Validation F1: 0.9499
Epoch 2/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.24it/s]

Train Loss: 0.0525





Validation Loss: 0.0423, Validation F1: 0.9503
Epoch 3/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.54it/s]

Train Loss: 0.0525





Validation Loss: 0.0421, Validation F1: 0.9522
Epoch 4/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.80it/s]

Train Loss: 0.0512





Validation Loss: 0.0429, Validation F1: 0.9499
Epoch 5/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.58it/s]

Train Loss: 0.0496





Validation Loss: 0.0428, Validation F1: 0.9521
Epoch 6/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.26it/s]

Train Loss: 0.0499





Validation Loss: 0.0421, Validation F1: 0.9504
Epoch 7/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.57it/s]

Train Loss: 0.0495





Validation Loss: 0.0428, Validation F1: 0.9481
Epoch 8/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.72it/s]

Train Loss: 0.0497





Validation Loss: 0.0438, Validation F1: 0.9495
Early stopping triggered.
Validation Accuracy for Fold 3: 0.7593
Fold 4/5
Epoch 1/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.75it/s]

Train Loss: 0.0524





Validation Loss: 0.0403, Validation F1: 0.9524
Epoch 2/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.26it/s]

Train Loss: 0.0514





Validation Loss: 0.0401, Validation F1: 0.9546
Epoch 3/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.55it/s]

Train Loss: 0.0507





Validation Loss: 0.0399, Validation F1: 0.9558
Epoch 4/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.63it/s]

Train Loss: 0.0507





Validation Loss: 0.0397, Validation F1: 0.9562
Epoch 5/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.87it/s]

Train Loss: 0.0500





Validation Loss: 0.0403, Validation F1: 0.9563
Epoch 6/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.95it/s]

Train Loss: 0.0502





Validation Loss: 0.0424, Validation F1: 0.9530
Epoch 7/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.92it/s]

Train Loss: 0.0500





Validation Loss: 0.0410, Validation F1: 0.9532
Epoch 8/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.91it/s]

Train Loss: 0.0500





Validation Loss: 0.0412, Validation F1: 0.9550
Epoch 9/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.26it/s]

Train Loss: 0.0500





Validation Loss: 0.0417, Validation F1: 0.9533
Epoch 10/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.97it/s]

Train Loss: 0.0496





Validation Loss: 0.0405, Validation F1: 0.9549
Early stopping triggered.
Validation Accuracy for Fold 4: 0.7831
Fold 5/5
Epoch 1/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.01it/s]

Train Loss: 0.0506





Validation Loss: 0.0388, Validation F1: 0.9525
Epoch 2/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.65it/s]

Train Loss: 0.0511





Validation Loss: 0.0391, Validation F1: 0.9545
Epoch 3/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.07it/s]

Train Loss: 0.0509





Validation Loss: 0.0395, Validation F1: 0.9524
Epoch 4/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.78it/s]

Train Loss: 0.0497





Validation Loss: 0.0398, Validation F1: 0.9553
Epoch 5/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.85it/s]

Train Loss: 0.0508





Validation Loss: 0.0391, Validation F1: 0.9551
Epoch 6/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.73it/s]

Train Loss: 0.0495





Validation Loss: 0.0390, Validation F1: 0.9560
Epoch 7/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.36it/s]

Train Loss: 0.0511





Validation Loss: 0.0389, Validation F1: 0.9581
Epoch 8/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.75it/s]

Train Loss: 0.0505





Validation Loss: 0.0397, Validation F1: 0.9523
Epoch 9/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.76it/s]

Train Loss: 0.0503





Validation Loss: 0.0402, Validation F1: 0.9517
Epoch 10/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.83it/s]

Train Loss: 0.0505





Validation Loss: 0.0400, Validation F1: 0.9523
Epoch 11/30


Training: 100%|██████████| 90/90 [00:05<00:00, 16.32it/s]

Train Loss: 0.0505





Validation Loss: 0.0397, Validation F1: 0.9533
Epoch 12/30


Training: 100%|██████████| 90/90 [00:05<00:00, 15.78it/s]

Train Loss: 0.0504





Validation Loss: 0.0393, Validation F1: 0.9527
Early stopping triggered.
Validation Accuracy for Fold 5: 0.7822

Average Accuracy over all folds: 0.7398


Now Test

In [15]:
# ============================
# 5. Evaluation
# ============================

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def evaluate_model(model, loader):
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for batch in tqdm(loader, desc="Evaluating", unit="batch"):
            if not batch:  # Handle empty batches
                continue

            images, labels, clinical_data = batch
            images, labels = images.to(device), labels.to(device)
            outputs = model(images) # .logits
            y_true.append(labels.cpu().numpy())
            y_pred.append(outputs.cpu().numpy())
    y_true = np.vstack(y_true)
    y_pred = np.vstack(y_pred)
    return y_true, y_pred

# Evaluate
y_true, y_pred = evaluate_model(model, testloader)

# Convert predicted probabilities to binary predictions
y_pred_binary = (sigmoid(y_pred) > 0.5).astype(int)
# y_pred_binary = (y_pred > 0.5).astype(int)

# Ensure `y_true` is binary
y_true_binary = (y_true > 0.5).astype(int)

# Metrics
report = classification_report(y_true_binary, y_pred_binary,zero_division=0)
report_data = classification_report(y_true_binary, y_pred_binary, output_dict=True,zero_division=0) # this is not clean to print but easier to extract
weighted_f1 = report_data['weighted avg']['f1-score']
samples_f1 = report_data['samples avg']['f1-score']
print("Classification Report:")
print(report)

# Store the predicitons in a csv file
# get the biomarker names
BiomarkerLabel_df = pd.read_csv('OLIVES_Dataset_Labels/BiomarkerLabel_train_data.csv')
biomarkers = BiomarkerLabel_df.columns[2:18] # Extract label columns names (biomarkers)

# Convert the predictions into a pandas DataFrame with biomarker names as columns
df_predictions = pd.DataFrame(y_pred_binary, columns=biomarkers)

# Add the "Index" name (as the row index)
df_predictions.insert(0, "Index", df_predictions.index.to_series().apply(lambda x: f"{x+1:04d}"))

# Save the DataFrame to a CSV file
df_predictions.to_csv("predictions_biomarkers.csv",  index=False)

Evaluating: 100%|██████████| 112/112 [00:03<00:00, 37.07batch/s]


Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.62      0.75        69
           1       0.90      0.65      0.76       516
           2       0.89      0.86      0.88        29
           3       0.96      0.66      0.78       277
           4       0.93      0.93      0.93      4699
           5       0.98      0.97      0.98      2130
           6       1.00      0.99      0.99      4068
           7       0.97      0.95      0.96       677
           8       0.90      0.89      0.90      2102
           9       0.00      0.00      0.00         7
          10       0.99      0.97      0.98      2285
          11       0.97      0.94      0.95      3028
          12       0.96      0.90      0.93       180
          13       0.00      0.00      0.00         9
          14       1.00      0.90      0.95        10
          15       0.75      0.21      0.33        57

   micro avg       0.96      0.93      0.95     20143
   

Save the trained model

In [6]:
# Save model weights to a file
torch.save(model.state_dict(), f"TrainedModels/{model.name}/{model.name}_f1weighted{weighted_f1:.4f}_k{k_folds}_e{num_epochs}_p{patience}_weights.pth")
# torch.save(model.state_dict(), f"TrainedModels/{model.name}/{model.name}_f1samples{samples_f1:.4f}_k{k_folds}_e{num_epochs}_p{patience}_weights.pth")