# Biomarker Detection in OLIVES using a Pretrained ResNet50 Model and Fusion Mechanisms for Clinical Data


### Step 1: Import Data
Consistent for all models. Only change output size!

In [1]:
import os
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ExponentialLR
from torch.utils.data import Dataset, Subset, DataLoader
from torchvision import transforms, models
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, f1_score, classification_report
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

# set the size of the image according to your model needs
imageSize = 224 # ResNet works with 224x224 pixels

# Custom Dataset
class BiomarkerDataset(Dataset):
    def __init__(self, label_file, transform=None, num_frames=0):
        """
        Args:
            label_file (str): Path to the CSV file.
            transform (callable, optional): Transform to be applied on a sample.
            num_frames (int): Number of adjacent frames to use in the input sequence (1 adjacent frame -> 3 consecutive images).
        """
        self.data = pd.read_csv(label_file)
        self.transform = transform
        self.num_frames = num_frames
        
        # Normalization
        self.bcva_mean = self.data.iloc[:, 19].mean()
        self.bcva_std = self.data.iloc[:, 19].std()
        self.cst_mean = self.data.iloc[:, 20].mean()
        self.cst_std = self.data.iloc[:, 20].std()

        
        # Exclude indices which don't have enough adjacent images
        self.valid_indices = self.data[(self.data.iloc[:, 1] > num_frames) & (self.data.iloc[:, 1] < (50-num_frames))].index.tolist()

    def __len__(self):
        # we can't use the length of the data since we have to exclude the first and last image (for num_frames=1) of each OCT scan
        return len(self.valid_indices)

    def __getitem__(self, idx):
        
        # Base path
        img_base_path = '/storage/ice1/shared/d-pace_community/makerspace-datasets/MEDICAL/OLIVES/OLIVES'
        
        # Get the actual data index
        index = self.valid_indices[idx]
        
        # Initialize
        images = []
        
        # Load a sequence of consecutive images
        for i in range(index - self.num_frames, index + self.num_frames +1):
            img_path = img_base_path + self.data.iloc[i, 0]
            img = Image.open(img_path).convert("L") # 'L' is for grayscale; can be removed!?
            
            if self.transform is not None:
                # apply data transformations (transforms it to tensor)
                img = self.transform(img)
            
            # stack torch tensor
            img = img.squeeze(0)  # Removes the first dimension if it's 1
            images.append(img)
        
        # Stack the 3 grayscale images along the channel dimension
        # Resulting tensor shape will be [3, H, W]
        images = torch.stack(images, dim=0)
        # print(images.shape) # debugging
        
        # Biomarker columns
        labels = torch.tensor(self.data.iloc[index, 2:18].astype(float), dtype=torch.float32)
        
        # Get clinical data:
        eye_id = self.data.iloc[index, 18]
        bcva = self.data.iloc[index, 19]
        cst = self.data.iloc[index, 20]
        patient_id = self.data.iloc[index, 21]
        
        # Normalize
        bcva = (bcva - self.bcva_mean) / self.bcva_std
        cst = (cst - self.cst_mean) / self.cst_std

        # Convert clinical data to tensor
        clinical_data = torch.tensor([eye_id, bcva, cst, patient_id], dtype=torch.float32)
        
        return images, labels, clinical_data
    
    
# Define transformers

# Values for normalization taken from example paper
mean = 0.1706
std = 0.2112

# train with data augmentation
train_transformer = transforms.Compose([
    # WORSE PERFORMANCE # transforms.RandomPerspective(distortion_scale=0.1, p=0.5, fill=0),  # Add perspective shift
    # WORSE PERFORMANCE # transforms.RandomResizedCrop(size=imageSize, scale=(0.9, 1.0)), # RandomCrop between 70% to 100% of original size
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Adjust color properties
    transforms.RandomHorizontalFlip(p=0.5),  # Random horizontal flip
    transforms.RandomRotation(degrees=10, fill=0),  # Rotates randomly between + and - degree and fills new pixels with black
    transforms.Resize(imageSize), # Resize to models needs
    transforms.ToTensor(),  # Convert image to tensor
    transforms.Normalize(mean, std) # we have to calculate these values for our dataset
])
# train without data augmentation
test_transformer = transforms.Compose([   
    transforms.Resize(imageSize), # Resize to models needs
    transforms.CenterCrop(imageSize), # shouldn't do anything
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])



# set up train loader (just example since cross validation uses new ones)
train_dataset = BiomarkerDataset(label_file='OLIVES_Dataset_Labels/BiomarkerLabel_train_data.csv', transform=train_transformer, num_frames=1)
trainloader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4, drop_last=True, pin_memory=True)

# set up test loader (this one actually is being used)
test_dataset = BiomarkerDataset(label_file='OLIVES_Dataset_Labels/BiomarkerLabel_train_data.csv', transform=test_transformer, num_frames=1)
testloader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=16, pin_memory=True)


### Step 2: Train Model
First we initialize our model as well as some training parameters.

In [2]:
## --- Settings ---
num_epochs=40
batch_size=64
num_workers=16 # need this amount of CPUs for parallel data loading
k_folds=5
patience=8  # Number of epochs to wait for improvement

# get to cuda
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## ---- Fusion model ----
class FusionModel(nn.Module):
    def __init__(self, num_eyeID, num_patientID):
        super(FusionModel, self).__init__()
        
        # Image feature extractor
        self.image_model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        self.image_model.fc = nn.Linear(self.image_model.fc.in_features, 16)  # Adapt it to the given task (update final layer)
        # Load the saved model weights
        self.image_model.load_state_dict(torch.load('TrainedModels/ResNet50/ResNet50_f1w0.9504_k5_e35_p7_weights.pth', weights_only=True))
        # Freeze the weights so they are not updated during training
#         for param in self.image_model.parameters():
#             param.requires_grad = False
        
        # Embedding layers for categorical data
        # These are only categorial data. Therefore just using the numerical value doesn't fit
        # The only difference a high patient ID f.e. should make, is that it's a new patient. Shouldn't lead to higher percentages of a biomaker
        # Alternativ: One-Hot Encoding
        self.patient_id_embedding = nn.Embedding(num_patientID, 16) # 16-dim embedding
        self.eye_id_embedding = nn.Embedding(num_eyeID, 16)
        
        # Fully connected layers for numerical clinical data (e.g., BCVA, CST)
        self.clinical_fc = nn.Sequential(
            nn.Linear(2, 16),
            nn.ReLU(),
            nn.Dropout(0.3)
        )
        
        # Combine and compress all clinical data
        self.clinical_fusion_fc = nn.Sequential(
            nn.Linear(16 + 16 + 16, 64),  # Combine clinical data
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 16)
        )
        
        # Attention mechanism
        self.attention = nn.Sequential(
            nn.Linear(16 + 16, 32),  # Combined image and clinical features
            nn.ReLU(),
            nn.Linear(32, 2),        # Output: attention weights for image and clinical features
            nn.Softmax(dim=1)        # Normalize weights
        )
        
        # Final fusion layers
        self.fusion_fc = nn.Sequential(
            nn.Linear(16, 64),  # Combine weighted features
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64, 16)  # 16 output biomarkers
        )
    
    def forward(self, image, clinical_data):
        # Extract features from the image using ResNet50
        image_features = self.image_model(image)
        
        # Clinical Data
        # Split clinical data into components
        eye_id = clinical_data[:, 0].long()
        bcva = clinical_data[:, 1].view(-1, 1).float()
        cst = clinical_data[:, 2].view(-1, 1).float()
        patient_id = clinical_data[:, 3].long()
        # Embed categorical clinical data
        eye_id_features = self.eye_id_embedding(eye_id).squeeze(1)
        patient_id_features = self.patient_id_embedding(patient_id).squeeze(1)
        # Process numerical clinical data
        numerical_features = torch.cat([bcva, cst], dim=1)
        clinical_features = self.clinical_fc(numerical_features)
        # Combine all clinical features
        combined_clinical_features = torch.cat([patient_id_features, eye_id_features, clinical_features], dim=1)
        # Compress combined clinical features
        fused_clinical_features = self.clinical_fusion_fc(combined_clinical_features)
        
        # Attention Mechanism
        # Compute attention weights
        combined_features = torch.cat([image_features, fused_clinical_features], dim=1)
        attention_weights = self.attention(combined_features)
        # Apply attention weights to image and clinical features
        image_weighted = attention_weights[:, 0].unsqueeze(1) * image_features
        clinical_weighted = attention_weights[:, 1].unsqueeze(1) * fused_clinical_features       
        # Combine weighted features
        fused_features = image_weighted + clinical_weighted
   
        # Final prediction through fusion layers
        output = self.fusion_fc(fused_features)
        
        return output

# Make sure there are enough different IDs available
num_eyeID = 100
num_patientID = 300
# Create the model
model = FusionModel(num_eyeID,num_patientID).to(device)
model.name = "FusionModel_ResNet50"

# DIDN'T IMPROVE # Compute class weights
# # We need to get some information on the class balance fro the original dataset
# BiomarkerLabel_df = pd.read_csv('OLIVES_Dataset_Labels/Biomarker_Clinical_Data_Images.csv')
# BiomarkerLabel_df = BiomarkerLabel_df.fillna(0) # NaN fix
# labels = BiomarkerLabel_df.columns[2:18] # Extract label columns names (biomarkers)
# df_labels = BiomarkerLabel_df[labels]
# # Compute class weights
# class_counts = df_labels.sum(axis=0).astype(int)
# # num_classes = len(class_counts)
# total_samples = len(df_labels)
# class_weights = total_samples / class_counts
# class_weights = torch.tensor(class_weights, dtype=torch.float32)          
                                
# Loss function, optimizer nad Learning rate sheduler
# loss_fn = nn.BCEWithLogitsLoss(pos_weight=class_weights.to(device)) # Loss function for multi-label classification
loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
# optimizer = optim.Adam(model.parameters(), lr=1e-5, weight_decay=0.9) # NOT GOOD: This set every f1 score to 0!!
    # weight decay to reduce overfitting 
scheduler = ExponentialLR(optimizer, gamma=0.9)

# Creates all needed folders to store the model weights if they don't exist already
os.makedirs("ModelWeights_TempSaves", exist_ok=True)
os.makedirs(f"TrainedModels/{model.name}", exist_ok=True)



Now we go over to the training process where we do a cross-validation.

In [3]:
# --- Train/Test Loops ---
# Training loop
def train_loop(model, train_loader, optimizer, loss_fn, device):
    # Set model to train mode
    model.train()
    
    # Initialize
    running_loss = 0.0
    all_preds = []
    all_labels = []
    
    # for images, labels, _ in train_loader:
    for images, labels, clinical_data in tqdm(train_loader, desc="Training"):
        # shift to cuda
        images = images.to(device)
        labels = labels.to(device)
        clinical_data = clinical_data.to(device)
        
        # Zero the parameter gradients 
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(images, clinical_data)
        loss = loss_fn(outputs, labels)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        # Track predictions and labels for metrics calculation
        running_loss += loss.item() * images.size(0)
        all_preds.append(outputs)
        all_labels.append(labels)
    
    # Average loss
    avg_loss = running_loss / len(train_loader.dataset)
    
    return avg_loss

# test loop
def test_loop(model, test_loader, loss_fn, device):
    # Set model to evaluation mode
    model.eval()
    
    # Initialize
    running_loss = 0.0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels, clinical_data in test_loader:
        # for images, labels, _ in tqdm(val_loader, desc="Validating"):
            # Store labels since they won't be altered
            # all_labels.append(labels.numpy())
            
            # Shift to cuda
            images = images.to(device)
            labels = labels.to(device)
            clinical_data = clinical_data.to(device)
        
            # Forward pass
            outputs = model(images, clinical_data)
            
            # Get metrics
            loss = loss_fn(outputs, labels)
            running_loss += loss.item() * images.size(0)
            
            # Sigmoid activation to get probabilities, then threshold at 0.5 for binary classification
            preds = torch.sigmoid(outputs) > 0.5 
            # preds = (torch.sigmoid(outputs) > 0.5).int()  # Apply sigmoid and threshold at 0.5

            # Store (numpy for easier processing later)
            all_preds.append(preds.cpu().numpy())
            all_labels.append(labels.cpu().numpy())

    # Calculate average loss
    avg_loss = running_loss / len(test_loader.dataset)

    # Convert lists of predictions and labels into a 2D array where each row is a sample, each column is a biomarker
    all_preds = np.concatenate(all_preds, axis=0)  # Shape: (num_samples, num_biomarkers)
    all_labels = np.concatenate(all_labels, axis=0)  # Shape: (num_samples, num_biomarkers)
    
    # Calculate F1 score for each biomarker (column) independently
#     f1_scores = []
#     for i in range(all_labels.shape[1]):  # Iterate over each biomarker
#         f1 = f1_score(all_labels[:, i], all_preds[:, i], average='binary')  # Compute F1 score for the ith biomarker
#         f1_scores.append(f1)

    # Average loss
    val_loss = running_loss / len(test_loader.dataset)
    # return val_loss, f1_scores, all_preds, all_labels
    return val_loss, all_preds, all_labels


# --- Cross-Validation ---

# Initialize object to split dataset in kfold
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=0)

fold_metrics = []
    
for fold, (train_idx, val_idx) in enumerate(kfold.split(train_dataset)):
    print(f"Fold {fold+1}/{k_folds}")

    # Split the training dataset into training and validation folds
    train_fold = Subset(train_dataset, train_idx)
    val_fold = Subset(train_dataset, val_idx)
    
    # Set up Dataloaders
    train_loader = DataLoader(train_fold, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_fold, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)

    # Reset parameters
    best_val_loss = float('inf')
    best_val_f1 = 0.0
    counter_NoImprovement = 0

    for epoch in range(num_epochs):
    # for epoch in tqdm(range(num_epochs), desc="Training Epochs", unit="epoch"):
        print(f"Epoch {epoch+1}/{num_epochs}")

        # Train the model for one epoch
        train_loss = train_loop(model, train_loader, optimizer, loss_fn, device)
        print(f"Train Loss: {train_loss:.4f}")

        # Validate the model after training using validation fold
        val_loss, all_preds, all_labels = test_loop(model, val_loader, loss_fn, device)
        val_f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)
        print(f"Validation Loss: {val_loss:.4f}, Validation F1: {val_f1:.4f}")
        
        # Update learning rate
        scheduler.step()
        
        # Early stopping logic: Check if F1 improved
        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            no_improvement = 0  # Reset counter
            torch.save(model.state_dict(), f"ModelWeights_TempSaves/best_{model.name}_fold_{fold+1}.pth")
        else:
            no_improvement += 1

        # Stop training if no improvement for 'patience' epochs
        if no_improvement >= patience:
            print("Early stopping triggered.")
            break
        
#         # Save the best model based on validation loss
#         if val_loss < best_val_loss:
#             best_val_loss = val_loss
#             torch.save(model.state_dict(), f"ModelWeights_TempSaves/best_{model.name}_fold_{fold+1}.pth")

    # Load the weights of the model with the best validationg loss
    model.load_state_dict(torch.load(f"ModelWeights_TempSaves/best_{model.name}_fold_{fold+1}.pth", weights_only=True))
 
    # Get Accuracy
    # preds = (torch.sigmoid(torch.tensor(all_preds)) > 0.5).int()  # Apply sigmoid and threshold at 0.5
    val_accuracy = accuracy_score(all_labels, all_preds)
    fold_metrics.append(val_accuracy)
    print(f"Validation Accuracy for Fold {fold+1}: {val_accuracy:.4f}")

avg_accuracy = np.mean(fold_metrics)
print(f"\nAverage Accuracy over all folds: {avg_accuracy:.4f}")


Fold 1/5
Epoch 1/40


Training: 100%|██████████| 90/90 [00:10<00:00,  8.45it/s]

Train Loss: 0.5967





Validation Loss: 0.3051, Validation F1: 0.5929
Epoch 2/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.65it/s]

Train Loss: 0.3597





Validation Loss: 0.2092, Validation F1: 0.7026
Epoch 3/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.47it/s]

Train Loss: 0.2961





Validation Loss: 0.1764, Validation F1: 0.7659
Epoch 4/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.36it/s]

Train Loss: 0.2547





Validation Loss: 0.1563, Validation F1: 0.7977
Epoch 5/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.50it/s]

Train Loss: 0.2295





Validation Loss: 0.1415, Validation F1: 0.8234
Epoch 6/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.85it/s]

Train Loss: 0.2109





Validation Loss: 0.1341, Validation F1: 0.8374
Epoch 7/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.98it/s]

Train Loss: 0.1948





Validation Loss: 0.1255, Validation F1: 0.8533
Epoch 8/40


Training: 100%|██████████| 90/90 [00:08<00:00, 10.14it/s]

Train Loss: 0.1811





Validation Loss: 0.1211, Validation F1: 0.8537
Epoch 9/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.96it/s]


Train Loss: 0.1707
Validation Loss: 0.1145, Validation F1: 0.8666
Epoch 10/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.90it/s]

Train Loss: 0.1640





Validation Loss: 0.1110, Validation F1: 0.8696
Epoch 11/40


Training: 100%|██████████| 90/90 [00:08<00:00, 10.11it/s]

Train Loss: 0.1564





Validation Loss: 0.1066, Validation F1: 0.8746
Epoch 12/40


Training: 100%|██████████| 90/90 [00:08<00:00, 10.07it/s]

Train Loss: 0.1495





Validation Loss: 0.1072, Validation F1: 0.8699
Epoch 13/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.91it/s]


Train Loss: 0.1447
Validation Loss: 0.1033, Validation F1: 0.8824
Epoch 14/40


Training: 100%|██████████| 90/90 [00:08<00:00, 10.14it/s]

Train Loss: 0.1423





Validation Loss: 0.1026, Validation F1: 0.8793
Epoch 15/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.91it/s]

Train Loss: 0.1377





Validation Loss: 0.1016, Validation F1: 0.8789
Epoch 16/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.91it/s]

Train Loss: 0.1351





Validation Loss: 0.0999, Validation F1: 0.8847
Epoch 17/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.94it/s]

Train Loss: 0.1316





Validation Loss: 0.0999, Validation F1: 0.8839
Epoch 18/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.81it/s]

Train Loss: 0.1294





Validation Loss: 0.0972, Validation F1: 0.8880
Epoch 19/40


Training: 100%|██████████| 90/90 [00:08<00:00, 10.00it/s]

Train Loss: 0.1273





Validation Loss: 0.0994, Validation F1: 0.8877
Epoch 20/40


Training: 100%|██████████| 90/90 [00:08<00:00, 10.02it/s]

Train Loss: 0.1246





Validation Loss: 0.0979, Validation F1: 0.8896
Epoch 21/40


Training: 100%|██████████| 90/90 [00:08<00:00, 10.02it/s]

Train Loss: 0.1222





Validation Loss: 0.0963, Validation F1: 0.8930
Epoch 22/40


Training: 100%|██████████| 90/90 [00:08<00:00, 10.03it/s]

Train Loss: 0.1216





Validation Loss: 0.0981, Validation F1: 0.8922
Epoch 23/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.72it/s]

Train Loss: 0.1205





Validation Loss: 0.0956, Validation F1: 0.8947
Epoch 24/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.85it/s]

Train Loss: 0.1184





Validation Loss: 0.0965, Validation F1: 0.8916
Epoch 25/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.97it/s]

Train Loss: 0.1156





Validation Loss: 0.0950, Validation F1: 0.8936
Epoch 26/40


Training: 100%|██████████| 90/90 [00:08<00:00, 10.02it/s]

Train Loss: 0.1164





Validation Loss: 0.0956, Validation F1: 0.8944
Epoch 27/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.86it/s]


Train Loss: 0.1168
Validation Loss: 0.0940, Validation F1: 0.8930
Epoch 28/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.91it/s]

Train Loss: 0.1149





Validation Loss: 0.0963, Validation F1: 0.8935
Epoch 29/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.89it/s]

Train Loss: 0.1138





Validation Loss: 0.0958, Validation F1: 0.8922
Epoch 30/40


Training: 100%|██████████| 90/90 [00:08<00:00, 10.11it/s]

Train Loss: 0.1139





Validation Loss: 0.0950, Validation F1: 0.8925
Epoch 31/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.83it/s]

Train Loss: 0.1137





Validation Loss: 0.0953, Validation F1: 0.8906
Early stopping triggered.
Validation Accuracy for Fold 1: 0.5731
Fold 2/5
Epoch 1/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.82it/s]

Train Loss: 0.1272





Validation Loss: 0.0602, Validation F1: 0.9360
Epoch 2/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.90it/s]

Train Loss: 0.1246





Validation Loss: 0.0606, Validation F1: 0.9362
Epoch 3/40


Training: 100%|██████████| 90/90 [00:08<00:00, 10.09it/s]

Train Loss: 0.1242





Validation Loss: 0.0606, Validation F1: 0.9372
Epoch 4/40


Training: 100%|██████████| 90/90 [00:08<00:00, 10.06it/s]

Train Loss: 0.1245





Validation Loss: 0.0613, Validation F1: 0.9333
Epoch 5/40


Training: 100%|██████████| 90/90 [00:08<00:00, 10.10it/s]

Train Loss: 0.1239





Validation Loss: 0.0604, Validation F1: 0.9347
Epoch 6/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.89it/s]

Train Loss: 0.1219





Validation Loss: 0.0611, Validation F1: 0.9371
Epoch 7/40


Training: 100%|██████████| 90/90 [00:08<00:00, 10.03it/s]

Train Loss: 0.1218





Validation Loss: 0.0604, Validation F1: 0.9382
Epoch 8/40


Training: 100%|██████████| 90/90 [00:08<00:00, 10.05it/s]

Train Loss: 0.1232





Validation Loss: 0.0611, Validation F1: 0.9344
Epoch 9/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.99it/s]


Train Loss: 0.1206
Validation Loss: 0.0610, Validation F1: 0.9343
Epoch 10/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.75it/s]

Train Loss: 0.1200





Validation Loss: 0.0621, Validation F1: 0.9335
Epoch 11/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.75it/s]

Train Loss: 0.1203





Validation Loss: 0.0601, Validation F1: 0.9370
Epoch 12/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.98it/s]


Train Loss: 0.1227
Validation Loss: 0.0596, Validation F1: 0.9369
Epoch 13/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.92it/s]

Train Loss: 0.1214





Validation Loss: 0.0603, Validation F1: 0.9376
Epoch 14/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.68it/s]


Train Loss: 0.1210
Validation Loss: 0.0611, Validation F1: 0.9358
Epoch 15/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.79it/s]

Train Loss: 0.1204





Validation Loss: 0.0603, Validation F1: 0.9363
Early stopping triggered.
Validation Accuracy for Fold 2: 0.7355
Fold 3/5
Epoch 1/40


Training: 100%|██████████| 90/90 [00:08<00:00, 10.06it/s]

Train Loss: 0.1214





Validation Loss: 0.0649, Validation F1: 0.9329
Epoch 2/40


Training: 100%|██████████| 90/90 [00:08<00:00, 10.16it/s]

Train Loss: 0.1207





Validation Loss: 0.0657, Validation F1: 0.9294
Epoch 3/40


Training: 100%|██████████| 90/90 [00:08<00:00, 10.08it/s]

Train Loss: 0.1202





Validation Loss: 0.0654, Validation F1: 0.9301
Epoch 4/40


Training: 100%|██████████| 90/90 [00:08<00:00, 10.01it/s]

Train Loss: 0.1217





Validation Loss: 0.0657, Validation F1: 0.9295
Epoch 5/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.91it/s]


Train Loss: 0.1210
Validation Loss: 0.0651, Validation F1: 0.9316
Epoch 6/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.93it/s]

Train Loss: 0.1204





Validation Loss: 0.0660, Validation F1: 0.9270
Epoch 7/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.81it/s]

Train Loss: 0.1197





Validation Loss: 0.0658, Validation F1: 0.9297
Epoch 8/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.81it/s]

Train Loss: 0.1195





Validation Loss: 0.0652, Validation F1: 0.9294
Epoch 9/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.87it/s]


Train Loss: 0.1206
Validation Loss: 0.0662, Validation F1: 0.9282
Early stopping triggered.
Validation Accuracy for Fold 3: 0.7159
Fold 4/5
Epoch 1/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.82it/s]

Train Loss: 0.1207





Validation Loss: 0.0623, Validation F1: 0.9354
Epoch 2/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.89it/s]

Train Loss: 0.1202





Validation Loss: 0.0632, Validation F1: 0.9336
Epoch 3/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.82it/s]

Train Loss: 0.1218





Validation Loss: 0.0631, Validation F1: 0.9337
Epoch 4/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.76it/s]

Train Loss: 0.1218





Validation Loss: 0.0629, Validation F1: 0.9353
Epoch 5/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.99it/s]

Train Loss: 0.1218





Validation Loss: 0.0639, Validation F1: 0.9340
Epoch 6/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.98it/s]


Train Loss: 0.1217
Validation Loss: 0.0624, Validation F1: 0.9349
Epoch 7/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.85it/s]

Train Loss: 0.1220





Validation Loss: 0.0639, Validation F1: 0.9339
Epoch 8/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.80it/s]

Train Loss: 0.1211





Validation Loss: 0.0638, Validation F1: 0.9346
Epoch 9/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.79it/s]

Train Loss: 0.1222





Validation Loss: 0.0628, Validation F1: 0.9340
Early stopping triggered.
Validation Accuracy for Fold 4: 0.7243
Fold 5/5
Epoch 1/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.86it/s]

Train Loss: 0.1212





Validation Loss: 0.0630, Validation F1: 0.9312
Epoch 2/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.87it/s]

Train Loss: 0.1220





Validation Loss: 0.0633, Validation F1: 0.9306
Epoch 3/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.67it/s]


Train Loss: 0.1222
Validation Loss: 0.0633, Validation F1: 0.9298
Epoch 4/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.74it/s]

Train Loss: 0.1210





Validation Loss: 0.0621, Validation F1: 0.9338
Epoch 5/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.90it/s]

Train Loss: 0.1205





Validation Loss: 0.0624, Validation F1: 0.9327
Epoch 6/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.86it/s]

Train Loss: 0.1235





Validation Loss: 0.0626, Validation F1: 0.9317
Epoch 7/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.95it/s]

Train Loss: 0.1216





Validation Loss: 0.0621, Validation F1: 0.9307
Epoch 8/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.70it/s]

Train Loss: 0.1216





Validation Loss: 0.0625, Validation F1: 0.9362
Epoch 9/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.82it/s]

Train Loss: 0.1237





Validation Loss: 0.0627, Validation F1: 0.9315
Epoch 10/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.95it/s]

Train Loss: 0.1218





Validation Loss: 0.0640, Validation F1: 0.9300
Epoch 11/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.82it/s]

Train Loss: 0.1225





Validation Loss: 0.0619, Validation F1: 0.9315
Epoch 12/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.90it/s]

Train Loss: 0.1215





Validation Loss: 0.0618, Validation F1: 0.9329
Epoch 13/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.98it/s]

Train Loss: 0.1217





Validation Loss: 0.0629, Validation F1: 0.9305
Epoch 14/40


Training: 100%|██████████| 90/90 [00:08<00:00, 10.00it/s]

Train Loss: 0.1218





Validation Loss: 0.0630, Validation F1: 0.9306
Epoch 15/40


Training: 100%|██████████| 90/90 [00:09<00:00,  9.92it/s]

Train Loss: 0.1206





Validation Loss: 0.0639, Validation F1: 0.9313
Epoch 16/40


Training: 100%|██████████| 90/90 [00:08<00:00, 10.01it/s]

Train Loss: 0.1206





Validation Loss: 0.0637, Validation F1: 0.9320
Early stopping triggered.
Validation Accuracy for Fold 5: 0.7360

Average Accuracy over all folds: 0.6970


### Step 3: Test Model
Simple evaluation

In [None]:
# OPTIONAL: Import weights from a previous training
# weightsPath = "TrainedModels/FusionModel_ResNet50/FusionModel_ResNet50_f1w0.9241_f1m0.5392_k5_e40_p8_weights.pth"
# model.load_state_dict(torch.load(weightsPath, weights_only=True)) # Load weights

In [4]:

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def evaluate_model(model, loader):
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for batch in tqdm(loader, desc="Evaluating", unit="batch"):
            if not batch:  # Handle empty batches
                continue

            images, labels, clinical_data = batch
            images, labels, clinical_data = images.to(device), labels.to(device), clinical_data.to(device)
            # outputs = model(images) # .logits
            outputs = model(images,clinical_data) # .logits
            y_true.append(labels.cpu().numpy())
            y_pred.append(outputs.cpu().numpy())
    y_true = np.vstack(y_true)
    y_pred = np.vstack(y_pred)
    return y_true, y_pred

# Evaluate
y_true, y_pred = evaluate_model(model, testloader)

# Convert predicted probabilities to binary predictions
y_pred_binary = (sigmoid(y_pred) > 0.5).astype(int)
# y_pred_binary = (y_pred > 0.5).astype(int)

# Ensure `y_true` is binary
y_true_binary = (y_true > 0.5).astype(int)

# Metrics
report = classification_report(y_true_binary, y_pred_binary,zero_division=0)
report_data = classification_report(y_true_binary, y_pred_binary, output_dict=True,zero_division=0) # this is not clean to print but easier to extract
weighted_f1 = report_data['weighted avg']['f1-score']
macro_f1 = report_data['macro avg']['f1-score']
print("Classification Report:")
print(report)

# Store the predicitons in a csv file
# get the biomarker names
BiomarkerLabel_df = pd.read_csv('OLIVES_Dataset_Labels/BiomarkerLabel_train_data.csv')
biomarkers = BiomarkerLabel_df.columns[2:18] # Extract label columns names (biomarkers)

# Convert the predictions into a pandas DataFrame with biomarker names as columns
df_predictions = pd.DataFrame(y_pred_binary, columns=biomarkers)

# Add the "Index" name (as the row index)
df_predictions.insert(0, "Index", df_predictions.index.to_series().apply(lambda x: f"{x+1:04d}"))

# Save the DataFrame to a CSV file
df_predictions.to_csv(f"TrainedModels/{model.name}/{model.name}_f1w{weighted_f1:.4f}_f1m{macro_f1:.4f}_predicitions.csv",  index=False)

Evaluating: 100%|██████████| 112/112 [00:04<00:00, 25.80batch/s]

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        69
           1       0.94      0.52      0.67       516
           2       0.00      0.00      0.00        29
           3       0.98      0.37      0.54       277
           4       0.92      0.91      0.92      4699
           5       0.99      0.96      0.97      2130
           6       0.99      0.99      0.99      4068
           7       0.99      0.84      0.91       677
           8       0.92      0.88      0.90      2102
           9       0.00      0.00      0.00         7
          10       0.97      0.96      0.97      2285
          11       0.96      0.91      0.93      3028
          12       0.99      0.70      0.82       180
          13       0.00      0.00      0.00         9
          14       0.00      0.00      0.00        10
          15       0.00      0.00      0.00        57

   micro avg       0.96      0.91      0.93     20143
   




Save the trained model

In [5]:
# Save model weights to a file
torch.save(model.state_dict(), f"TrainedModels/{model.name}/{model.name}_f1w{weighted_f1:.4f}_f1m{macro_f1:.4f}_k{k_folds}_e{num_epochs}_p{patience}_weights.pth")
# torch.save(model.state_dict(), f"TrainedModels/{model.name}/{model.name}_f1w{weighted_f1:.4f}_k{k_folds}_e{num_epochs}_p{patience}_weights.pth")
# torch.save(model.state_dict(), f"TrainedModels/{model.name}/{model.name}_f1m{macro_f1:.4f}_k{k_folds}_e{num_epochs}_p{patience}_weights.pth")