In [1]:
import os
import pandas as pd
from PIL import Image
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score
import torch.nn.functional as F
import random

## Task 4. Ensemble Method

Task 4 combines the fine-tuned models from Task 3.1 (ResNet18 SE and EfficientNet SE)
using a simple probability-averaging ensemble. No retraining is needed: each model
produces predictions, and the ensemble output is the mean of their probabilities.

Different functions are reused from previous tasks.





In [2]:
# ========================
# Dataset preparation
# ========================
class RetinaMultiLabelDataset(Dataset):
    def __init__(self, csv_file, image_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        img_path = os.path.join(self.image_dir, row.iloc[0])
        img = Image.open(img_path).convert("RGB")
        labels = torch.tensor(row[1:].values.astype("float32"))
        if self.transform:
            img = self.transform(img)
        return img, labels

class RetinaTestDataset(Dataset):
    def __init__(self, csv_file, image_dir, transform=None):
        df = pd.read_csv(csv_file)
        self.ids = df.iloc[:, 0].values  # first column -> id/ID
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, idx):
        img_id = self.ids[idx]
        img_path = os.path.join(self.image_dir, img_id)
        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return img, img_id


In [3]:
# ========================
# build model
# ========================
from torchvision.models import resnet18, ResNet18_Weights
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights

def build_model(backbone="resnet18", num_classes=3, pretrained=True):
    if backbone == "resnet18":
        weights = ResNet18_Weights.IMAGENET1K_V1 if pretrained else None
        model = resnet18(weights=weights)
        model.fc = nn.Linear(model.fc.in_features, num_classes)

    elif backbone == "efficientnet":
        weights = EfficientNet_B0_Weights.IMAGENET1K_V1 if pretrained else None
        model = efficientnet_b0(weights=weights)
        model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)

    else:
        raise ValueError("Unsupported backbone")

    return model


In [4]:
def get_probs_on_split(
    backbone,
    attention,
    ckpt_path,
    csv_path,
    image_dir,
    img_size,
    batch_size,
    device,
):
    """
    Load a trained model and compute sigmoid probabilities + labels on a given split.
    Used for Task 4 ensemble.
    """
    # --- transforms (same as training) ---
    transform = transforms.Compose(
        [
            transforms.Resize((img_size, img_size)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
        ]
    )

    ds = RetinaMultiLabelDataset(csv_path, image_dir, transform)
    loader = DataLoader(ds, batch_size=batch_size, shuffle=False, num_workers=0)

    # build model
    if backbone.lower() in ["vit", "vit_b16", "vit-b16"]:
        model = build_vit_b16_model(num_classes=3, pretrained=False)
    else:
        model = build_model(backbone, num_classes=3, pretrained=False)
        model = add_attention(model, backbone=backbone, attention=attention, num_heads=None)

    state_dict = torch.load(ckpt_path, map_location=device)
    model.load_state_dict(state_dict, strict=False)
    model.to(device)
    model.eval()

    all_probs = []
    all_labels = []

    with torch.no_grad():
        for imgs, labels in loader:
            imgs = imgs.to(device)
            outputs = model(imgs)
            probs = torch.sigmoid(outputs).cpu().numpy()
            all_probs.extend(probs)
            all_labels.extend(labels.numpy())

    return np.array(all_labels), np.array(all_probs)


In [5]:
def ensemble_offsite_task4(
    models_cfg,
    offsite_csv,
    offsite_img_dir,
    img_size,
    batch_size,
    device,
    weights=None,
):
    """
    Task 4 ensemble: weighted average of probabilities from multiple trained models.
    models_cfg: list of dicts, each like
        {
            "name": "resnet18_se",
            "backbone": "resnet18",
            "attention": "se",
            "ckpt_path": "./checkpoints/task3/csu_task3_1_resnet18_se.pt",
            "num_heads": None,  # if you want to extend
        }
    weights: list or None. If None -> equal weights.
    """
    probs_list = []
    y_true_ref = None

    # --- collect probs from each model ---
    for cfg in models_cfg:
        backbone  = cfg["backbone"]
        attention = cfg.get("attention", "none")
        ckpt_path = cfg["ckpt_path"]

        print(f"Ensemble member: {backbone} | {attention} | {ckpt_path}")

        y_true, probs = get_probs_on_split(
            backbone=backbone,
            attention=attention,
            ckpt_path=ckpt_path,
            csv_path=offsite_csv,
            image_dir=offsite_img_dir,
            img_size=img_size,
            batch_size=batch_size,
            device=device,
        )

        if y_true_ref is None:
            y_true_ref = y_true
        else:
            # sanity check: labels should match across models
            assert np.array_equal(y_true_ref, y_true), "Label mismatch between ensemble members!"

        probs_list.append(probs)

    probs_stack = np.stack(probs_list, axis=0)   # [num_models, N, 3]

    # --- weights ---
    num_models = len(models_cfg)
    if weights is None:
        weights = np.ones(num_models, dtype=np.float32) / num_models
    else:
        weights = np.array(weights, dtype=np.float32)
        weights = weights / weights.sum()

    # weighted average over first axis (models)
    ensemble_probs = np.tensordot(weights, probs_stack, axes=(0, 0))  # [N, 3]

    # global threshold 0.5
    y_pred_ens = (ensemble_probs > 0.5).astype(int)

    df_offsite_ens = evaluating_metrics(
        y_true_ref,
        y_pred_ens,
        backbone="Ensemble",
        task_name="task4_ensemble",
        split_name="offsite",
    )

    return df_offsite_ens


In [6]:
# FocalLoss
class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2.0, reduction="mean"):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.reduction = reduction

        if alpha is not None:
            alpha = torch.tensor(alpha, dtype=torch.float32)
        self.alpha = alpha

    def forward(self, logits, targets):
        """
        logits: [B, C] raw model outputs
        targets: [B, C] in {0,1}
        """
        bce_loss = F.binary_cross_entropy_with_logits(
            logits, targets, reduction="none"
        )
        probs = torch.sigmoid(logits)
        p_t = probs * targets + (1 - probs) * (1 - targets)

        if self.alpha is not None:
            alpha = self.alpha.to(logits.device)
            # broadcast alpha if it's per-class
            if alpha.dim() == 1:
                alpha = alpha.view(1, -1)  # [1, C]
            alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
        else:
            alpha_t = 1.0

        # focal modulation
        focal_factor = (1.0 - p_t) ** self.gamma
        loss = alpha_t * focal_factor * bce_loss  # [B, C]

        if self.reduction == "mean":
            return loss.mean()
        elif self.reduction == "sum":
            return loss.sum()
        else:
            return loss  # [B, C]


In [7]:
# ClassBalancedBCELoss
def compute_class_frequency_weights_from_csv(train_csv_path, num_classes=3):
    df = pd.read_csv(train_csv_path)
    label_cols = df.columns[1 : 1 + num_classes]  # skip ID
    pos_counts = df[label_cols].sum(axis=0).values.astype(np.float32)
    total = len(df)

    # positive frequency per class
    freq = pos_counts / (total + 1e-6)

    # inverse frequency as weights
    inv_freq = 1.0 / (freq + 1e-6)
    inv_freq = inv_freq / inv_freq.mean()

    return torch.tensor(inv_freq, dtype=torch.float32)
class ClassBalancedBCELoss(nn.Module):
    def __init__(self, class_weights, reduction="mean"):
        super(ClassBalancedBCELoss, self).__init__()
        self.class_weights = class_weights  
        self.reduction = reduction

    def forward(self, logits, targets):
        """
        logits: [B, C]
        targets: [B, C]
        """
        bce = F.binary_cross_entropy_with_logits(
            logits, targets, reduction="none"
        )

        w = self.class_weights.to(logits.device).view(1, -1)
        loss = bce * w

        if self.reduction == "mean":
            return loss.mean()
        elif self.reduction == "sum":
            return loss.sum()
        else:
            return loss


In [8]:
# Squeeze-and-Excitation
class SEBlock(nn.Module):
    """
    Squeeze-and-Excitation for 2D feature maps: (B, C, H, W) -> (B, C, H, W)
    """
    def __init__(self, channels, reduction=16):
        super().__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channels, channels // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channels // reduction, channels, bias=False),
            nn.Sigmoid(),
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)      # (B, C)
        y = self.fc(y).view(b, c, 1, 1)      # (B, C, 1, 1)
        return x * y                         # channel-wise rescale
        
# Multi-Head Attention
class MHABlock(nn.Module):
    """
    Multi-head self-attention on a sequence of tokens of dim embed_dim.
    Input: x of shape (B, N, C)  (N = number of spatial locations)
    Output: same shape.
    """
    def __init__(self, embed_dim, num_heads=4):
        super().__init__()
        self.mha = nn.MultiheadAttention(
            embed_dim=embed_dim,
            num_heads=num_heads,
            batch_first=True,
        )
        self.norm = nn.LayerNorm(embed_dim)

    def forward(self, x):
        # x: (B, N, C)
        attn_out, _ = self.mha(x, x, x)  # self-attention
        x = x + attn_out                 # residual
        x = self.norm(x)
        return x


In [9]:
# Wrap model and insert attention to it
class ResNetWithAttention(nn.Module):
    """
    Wraps a ResNet18-like model and inserts SE or MHA after layer4.
    """
    def __init__(self, base_model, attention="se", num_heads=4):
        super().__init__()
        self.attention = attention

        # Copy ResNet structure
        self.conv1   = base_model.conv1
        self.bn1     = base_model.bn1
        self.relu    = base_model.relu
        self.maxpool = base_model.maxpool
        self.layer1  = base_model.layer1
        self.layer2  = base_model.layer2
        self.layer3  = base_model.layer3
        self.layer4  = base_model.layer4
        self.avgpool = base_model.avgpool
        self.fc      = base_model.fc

        # Number of channels after layer4
        channels = self.layer4[-1].conv2.out_channels

        if attention == "se":
            self.attn = SEBlock(channels)
        elif attention == "mha":
            self.attn = MHABlock(embed_dim=channels, num_heads=num_heads)
        else:
            self.attn = None

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)    # (B, C, H, W)

        if self.attn is not None:
            if self.attention == "se":
                x = self.attn(x)
            else:  # MHA over spatial tokens
                b, c, h, w = x.shape
                x_flat = x.view(b, c, h * w).permute(0, 2, 1)
                x_flat = self.attn(x_flat)
                x = x_flat.permute(0, 2, 1).view(b, c, h, w)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x


class EfficientNetWithAttention(nn.Module):
    """
    Wraps an EfficientNet-like model and inserts SE or MHA after features.
    """
    def __init__(self, base_model, attention="se", num_heads=4):
        super().__init__()
        self.attention = attention

        self.features = base_model.features
        # Some efficientnets have .avgpool, else use AdaptiveAvgPool2d(1)
        self.avgpool = getattr(base_model, "avgpool", nn.AdaptiveAvgPool2d(1))
        self.classifier = base_model.classifier

        # Get channel dim from classifier input
        if isinstance(self.classifier, nn.Sequential):
            for m in self.classifier.modules():
                if isinstance(m, nn.Linear):
                    in_features = m.in_features
                    break
        else:
            in_features = self.classifier.in_features

        channels = in_features  # after global pooling

        if attention == "se":
            self.attn = SEBlock(channels)
        elif attention == "mha":
            self.attn = MHABlock(embed_dim=channels, num_heads=num_heads)
        else:
            self.attn = None

    def forward(self, x):
        x = self.features(x)              # (B, C, H, W)

        # For SE/MHA we want to operate on spatial feature map
        if self.attn is not None:
            if self.attention == "se":
                # Apply SE in 2D form
                x = self.attn(x)
            else:
                # MHA over spatial tokens
                b, c, h, w = x.shape
                x_flat = x.view(b, c, h * w).permute(0, 2, 1)
                x_flat = self.attn(x_flat)
                x = x_flat.permute(0, 2, 1).view(b, c, h, w)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


In [10]:
# Small helper to attach attention to a backbone
def add_attention(model, backbone, attention="none", num_heads=4):
    if attention is None or attention == "none":
        return model

    if backbone == "resnet18":
        return ResNetWithAttention(model, attention=attention, num_heads=num_heads)
    elif backbone == "efficientnet":
        return EfficientNetWithAttention(model, attention=attention, num_heads=num_heads)
    else:
        raise ValueError(f"Attention wrapper not implemented for backbone: {backbone}")


In [11]:
def generate_kaggle_submission_ensemble(
    models_cfg,
    onsite_csv,
    onsite_image_dir,
    img_size=256,
    batch_size=32,
    out_csv="submission_ensemble.csv",
    threshold=0.5,
    best=False,
    weights=None,       # optional list of weights per model; if None -> equal
):
    """
    Task 4: Kaggle submission using an ensemble of trained models.
    
    models_cfg: list of dicts, each like:
        {
            "backbone": "resnet18",
            "attention": "se",              # "none", "se", "mha"
            "ckpt_path": "./checkpoints/task3/csu_task3_1_resnet18_se.pt",
            "num_heads": None,              # or 4/5 for mha if needed
        }
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    transform = transforms.Compose(
        [
            transforms.Resize((img_size, img_size)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
        ]
    )

    # read Kaggle template
    template = pd.read_csv(onsite_csv)
    id_col_name = template.columns[0]

    # dataset & loader
    test_ds = RetinaTestDataset(onsite_csv, onsite_image_dir, transform)
    test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=0)

    # collect probs from each model
    all_model_probs = []
    ids = []

    for mi, cfg in enumerate(models_cfg):
        backbone  = cfg["backbone"]
        attention = cfg.get("attention", "none")
        ckpt_path = cfg["ckpt_path"]
        num_heads = cfg.get("num_heads", None)

        print(f"[Ensemble member {mi+1}] {backbone} | attention={attention} | ckpt={ckpt_path}")

        # build model matching training
        model = build_model(backbone, num_classes=3, pretrained=False)
        model = add_attention(model, backbone=backbone, attention=attention, num_heads=num_heads)
        model = model.to(device)

        state_dict = torch.load(ckpt_path, map_location=device)
        model.load_state_dict(state_dict, strict=False)
        model.eval()

        model_probs = []
        model_ids = []

        with torch.no_grad():
            for imgs, img_ids in test_loader:
                imgs = imgs.to(device)
                outputs = model(imgs)
                probs = torch.sigmoid(outputs).cpu().numpy()
                model_probs.append(probs)
                model_ids.extend(img_ids)

        model_probs = np.concatenate(model_probs, axis=0)

        # On first model, record the ID order
        if mi == 0:
            ids = np.array(model_ids)
        else:
            # sanity: make sure IDs match
            assert np.array_equal(ids, np.array(model_ids)), "ID order mismatch between ensemble models!"

        all_model_probs.append(model_probs)

    all_model_probs = np.stack(all_model_probs, axis=0)  # [num_models, N, 3]

    # --- weights for ensemble ---
    num_models = all_model_probs.shape[0]
    if weights is None:
        weights = np.ones(num_models, dtype=np.float32) / num_models
    else:
        weights = np.array(weights, dtype=np.float32)
        weights = weights / weights.sum()

    # weighted average over models axis (0)
    probs_all = np.tensordot(weights, all_model_probs, axes=(0, 0))  # [N, 3]

    # align to template IDs
    template_ids = template[id_col_name].values

    if not np.array_equal(template_ids, ids):
        print("WARNING: IDs in template and predictions do not match exactly in order!")
        id_to_idx = {image_id: i for i, image_id in enumerate(ids)}
        reorder_idx = [id_to_idx[x] for x in template_ids]
        probs_all = probs_all[reorder_idx, :]

    # thresholding
    if best:
        thr = np.array(threshold, dtype=float)
        if thr.ndim == 0:  # scalar threshold
            bin_preds = (probs_all >= thr).astype(int)
        else:              # per-class thresholds
            bin_preds = (probs_all >= thr.reshape(1, -1)).astype(int)
    else:
        bin_preds = (probs_all >= threshold).astype(int)

    template["D"] = bin_preds[:, 0]
    template["G"] = bin_preds[:, 1]
    template["A"] = bin_preds[:, 2]

    out_dir = "submission"
    os.makedirs(out_dir, exist_ok=True)
    out_path = os.path.join(out_dir, out_csv)
    tmp_path = out_path + ".tmp"
    template.to_csv(tmp_path, index=False)
    os.replace(tmp_path, out_path)

    print(f"Kaggle ensemble submission saved to: {out_path}")


In [12]:
def evaluating_metrics(y_true, y_pred, backbone, task_name,split_name):
    
    disease_names = ["DR", "Glaucoma", "AMD"]
    rows = []
    f1_list = []

    print(f"\n{split_name.upper()} test results for {backbone} - {task_name}")

    for i, disease in enumerate(disease_names):
        yt = y_true[:, i]
        yp = y_pred[:, i]

        acc = accuracy_score(yt, yp)
        precision = precision_score(yt, yp, zero_division=0)
        recall = recall_score(yt, yp, zero_division=0)
        f1 = f1_score(yt, yp, zero_division=0)
        kappa = cohen_kappa_score(yt, yp)

        f1_list.append(f1)

        """# print in the required format (optional)
        print(f"{disease} Results [{backbone}] ({split_name})")
        print(f"Accuracy : {acc:.4f}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall   : {recall:.4f}")
        print(f"F1-score : {f1:.4f}")
        print(f"Kappa    : {kappa:.4f}")
        print("-----")"""

        rows.append({
            "Backbone": backbone,
            "Task": task_name,
            "Split": split_name,
            "Disease": disease,
            "Accuracy": acc,
            "Precision": precision,
            "Recall": recall,
            "F1-score": f1,
            "Kappa": kappa,
        })

    avg_f1 = sum(f1_list) / len(f1_list)
    print(f"Average F1 over 3 diseases ({split_name}): {avg_f1:.4f}\n")

    rows.append({
        "Backbone": backbone,
        "Task": task_name,
        "Split": split_name,
        "Disease": "Average F1",
        "Accuracy": None,
        "Precision": None,
        "Recall": None,
        "F1-score": avg_f1,
        "Kappa": None,
    })

    return pd.DataFrame(rows)

## Configuration

This section defines all dataset paths, pretrained backbone locations, and key training hyperparameters. Adjust these values to match your local setup before running the experiments. 

In [13]:
# Configuration (edit paths here)


# Labeled splits
train_csv = "train.csv"
val_csv = "val.csv"
offsite_test_csv = "offsite_test.csv"

train_img_dir = "./images/train"
val_img_dir = "./images/val"
offsite_img_dir = "./images/offsite_test"

# unlabeled onsite test (for kaggle submission)
onsite_csv = "onsite_test_submission.csv"
onsite_img_dir = "./images/onsite_test"

img_size = 256
epochs = 20
batch_size = 32
lr = 1e-4
save_dir = "checkpoints"

# per-class alpha based on disease frequency
df = pd.read_csv(train_csv)
label_cols = df.columns[1:4]  
pos_counts = df[label_cols].sum(axis=0).values.astype(np.float32)
total = len(df)
freq = pos_counts / (total + 1e-6)
alpha_vec = 1.0 - freq
alpha_vec = alpha_vec / alpha_vec.max()

In [14]:
import os
import re

def load_trained_models_from_checkpoints(root="./checkpoints"):
    model_list = []

    pattern = r"csu_(task[1-3]_[0-9])_([a-zA-Z0-9]+)(?:_(se|mha))?"

    for root_dir, dirs, files in os.walk(root):
        for fname in files:
            if fname.endswith(".pt"):
                clean_name = fname.replace(" ", "").replace(".pt", "")
                m = re.match(pattern, clean_name)
                if m:
                    task_name = m.group(1)
                    backbone = m.group(2)
                    attention = m.group(3) or "none"

                    # Task 3.2 special num_heads
                    if attention == "mha" and task_name == "task3_2":
                        if backbone == "resnet18":
                            num_heads = 4
                        elif backbone == "efficientnet":
                            num_heads = 5
                        else:
                            num_heads = None
                    else:
                        num_heads = None

                    model_list.append({
                        "task_name": task_name,
                        "backbone": backbone,
                        "attention": attention,
                        "num_heads": num_heads,
                        "ckpt_path": os.path.join(root_dir, fname)
                    })

    return model_list


### Ensemble â€” Model Selection and Evaluation

For Task 4, we load the trained checkpoints from Task 3.1 (ResNet18 SE and EfficientNet SE)
and build an ensemble using equal-weight averaging of their prediction probabilities.
The same model list is used both for offsite evaluation and for generating the final
Kaggle submission to ensure consistency.

In [15]:
trained_models = load_trained_models_from_checkpoints("./checkpoints")

# Only Task 3.1 models, only resnet18 + efficientnet
models_task4 = [
    m for m in trained_models
    if m["task_name"] == "task3_1"
    and m["backbone"] in ["resnet18", "efficientnet"]
]

print("Ensemble models for Task 4:")
for m in models_task4:
    print(m)



device = torch.device(torch.cuda.is_available() and "cuda" or "cpu")

df_task4 = ensemble_offsite_task4(
    models_cfg=models_task4,
    offsite_csv=offsite_test_csv,
    offsite_img_dir=offsite_img_dir,
    img_size=img_size,
    batch_size=batch_size,
    device=device,
    weights=None,
)
generate_kaggle_submission_ensemble(
    models_cfg=models_task4,                
    onsite_csv=onsite_csv,
    onsite_image_dir=onsite_img_dir,
    img_size=img_size,
    batch_size=batch_size,
    out_csv="submission_task4_ensemble.csv",
    threshold=0.5,
    best=False,
    weights=None,                        
)

display(df_task4)


Ensemble models for Task 4:
{'task_name': 'task3_1', 'backbone': 'efficientnet', 'attention': 'se', 'num_heads': None, 'ckpt_path': './checkpoints\\task3\\csu_task3_1_efficientnet_se.pt'}
{'task_name': 'task3_1', 'backbone': 'resnet18', 'attention': 'se', 'num_heads': None, 'ckpt_path': './checkpoints\\task3\\csu_task3_1_resnet18_se.pt'}
Ensemble member: efficientnet | se | ./checkpoints\task3\csu_task3_1_efficientnet_se.pt
Ensemble member: resnet18 | se | ./checkpoints\task3\csu_task3_1_resnet18_se.pt

OFFSITE test results for Ensemble - task4_ensemble
Average F1 over 3 diseases (offsite): 0.7942

[Ensemble member 1] efficientnet | attention=se | ckpt=./checkpoints\task3\csu_task3_1_efficientnet_se.pt
[Ensemble member 2] resnet18 | attention=se | ckpt=./checkpoints\task3\csu_task3_1_resnet18_se.pt
Kaggle ensemble submission saved to: submission\submission_task4_ensemble.csv


Unnamed: 0,Backbone,Task,Split,Disease,Accuracy,Precision,Recall,F1-score,Kappa
0,Ensemble,task4_ensemble,offsite,DR,0.855,0.872483,0.928571,0.899654,0.639303
1,Ensemble,task4_ensemble,offsite,Glaucoma,0.905,0.826087,0.77551,0.8,0.737786
2,Ensemble,task4_ensemble,offsite,AMD,0.935,0.736842,0.636364,0.682927,0.646931
3,Ensemble,task4_ensemble,offsite,Average F1,,,,0.794194,
