In [1]:
import os
import pandas as pd
from PIL import Image
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score
import torch.nn.functional as F


## Task 2. Functions, Focal Loss, and Class-Balanced Loss

This section contains the core utility functions and custom loss modules used in training.  
They provide consistent handling of class imbalance and can be reused across all backbones and experimental setups.

- **Focal Loss** focuses learning on hard examples by down-weighting easy predictions.  
- **Class-Balanced BCE Loss** adjusts each class’s contribution based on inverse frequency computed from the training data.  

These components help stabilize multi-label training when classes appear with different frequencies.


In [2]:
# ========================
# Dataset preparation
# ========================
class RetinaMultiLabelDataset(Dataset):
    def __init__(self, csv_file, image_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        img_path = os.path.join(self.image_dir, row.iloc[0])
        img = Image.open(img_path).convert("RGB")
        labels = torch.tensor(row[1:].values.astype("float32"))
        if self.transform:
            img = self.transform(img)
        return img, labels

class RetinaTestDataset(Dataset):
    def __init__(self, csv_file, image_dir, transform=None):
        df = pd.read_csv(csv_file)
        self.ids = df.iloc[:, 0].values  # first column -> id/ID
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, idx):
        img_id = self.ids[idx]
        img_path = os.path.join(self.image_dir, img_id)
        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return img, img_id


In [3]:
# ========================
# build model
# ========================
from torchvision.models import resnet18, ResNet18_Weights
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights

def build_model(backbone="resnet18", num_classes=3, pretrained=True):
    if backbone == "resnet18":
        weights = ResNet18_Weights.IMAGENET1K_V1 if pretrained else None
        model = resnet18(weights=weights)
        model.fc = nn.Linear(model.fc.in_features, num_classes)

    elif backbone == "efficientnet":
        weights = EfficientNet_B0_Weights.IMAGENET1K_V1 if pretrained else None
        model = efficientnet_b0(weights=weights)
        model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)

    else:
        raise ValueError("Unsupported backbone")

    return model


In [4]:
# FocalLoss
class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2.0, reduction="mean"):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.reduction = reduction

        if alpha is not None:
            alpha = torch.tensor(alpha, dtype=torch.float32)
        self.alpha = alpha

    def forward(self, logits, targets):
        """
        logits: [B, C] raw model outputs
        targets: [B, C] in {0,1}
        """
        bce_loss = F.binary_cross_entropy_with_logits(
            logits, targets, reduction="none"
        )
        probs = torch.sigmoid(logits)
        p_t = probs * targets + (1 - probs) * (1 - targets)

        if self.alpha is not None:
            alpha = self.alpha.to(logits.device)
            # broadcast alpha if it's per-class
            if alpha.dim() == 1:
                alpha = alpha.view(1, -1)  # [1, C]
            alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
        else:
            alpha_t = 1.0

        # focal modulation
        focal_factor = (1.0 - p_t) ** self.gamma
        loss = alpha_t * focal_factor * bce_loss  # [B, C]

        if self.reduction == "mean":
            return loss.mean()
        elif self.reduction == "sum":
            return loss.sum()
        else:
            return loss  # [B, C]


In [5]:
# ClassBalancedBCELoss
def compute_class_frequency_weights_from_csv(train_csv_path, num_classes=3):
    df = pd.read_csv(train_csv_path)
    label_cols = df.columns[1 : 1 + num_classes]  # skip ID
    pos_counts = df[label_cols].sum(axis=0).values.astype(np.float32)
    total = len(df)

    # positive frequency per class
    freq = pos_counts / (total + 1e-6)

    # inverse frequency as weights
    inv_freq = 1.0 / (freq + 1e-6)
    inv_freq = inv_freq / inv_freq.mean()

    return torch.tensor(inv_freq, dtype=torch.float32)
class ClassBalancedBCELoss(nn.Module):
    def __init__(self, class_weights, reduction="mean"):
        super(ClassBalancedBCELoss, self).__init__()
        self.class_weights = class_weights  
        self.reduction = reduction

    def forward(self, logits, targets):
        """
        logits: [B, C]
        targets: [B, C]
        """
        bce = F.binary_cross_entropy_with_logits(
            logits, targets, reduction="none"
        )

        w = self.class_weights.to(logits.device).view(1, -1)
        loss = bce * w

        if self.reduction == "mean":
            return loss.mean()
        elif self.reduction == "sum":
            return loss.sum()
        else:
            return loss


In [6]:
# ========================
# model training and val
# ========================
def train_one_backbone(
    backbone,
    train_csv,
    val_csv,
    test_csv,
    train_image_dir,
    val_image_dir,
    test_image_dir,
    epochs=10,
    batch_size=32,
    lr=1e-4,
    img_size=256,
    save_dir="checkpoints",
    pretrained_backbone=None,
    task="full_ft",
    loss="bce",  # "bce", "focal", "cb"
    alpha = None,
    gamma = 2.0,
):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)

    task_name_map = {
        "no_finetune": "Task1.1 No fine-tuning",
        "cls_only": "Task1.2 Frozen backbone, classifier only",
        "full_ft": "Task1.3 Full fine-tuning",
    }

    print("===========================================")
    print(f"Task 2 |  Backbone: {backbone} | loss: {loss}")
    print("===========================================")

    # transforms
    transform = transforms.Compose(
        [
            transforms.Resize((img_size, img_size)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
        ]
    )

    # datasets & dataloaders
    train_ds = RetinaMultiLabelDataset(train_csv, train_image_dir, transform)
    val_ds = RetinaMultiLabelDataset(val_csv, val_image_dir,transform)
    test_ds = RetinaMultiLabelDataset(test_csv, test_image_dir, transform)

    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=0)
    val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False, num_workers=0)
    test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False, num_workers=0)


    # model
    model = build_model(backbone, num_classes=3, pretrained=False).to(device)

    if pretrained_backbone is not None:
        state_dict = torch.load(pretrained_backbone, map_location="cpu")
        model.load_state_dict(state_dict)
        print(f"Loaded pretrained weights from {pretrained_backbone}")

    # set which parameters are trainable
    if task == "no_finetune":
        # everything is frozen
        for p in model.parameters():
            p.requires_grad = False
        optimizer = None
    elif task == "cls_only":
        # freeze backbone
        for p in model.parameters():
            p.requires_grad = False
        # unfreeze classifier
        if backbone == "resnet18":
            for p in model.fc.parameters():
                p.requires_grad = True
        elif backbone == "efficientnet":
            for p in model.classifier.parameters():
                p.requires_grad = True
        optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr)
    else:  # full_ft
        for p in model.parameters():
            p.requires_grad = True
        optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr)
        #scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3,)


    # ----- choose loss function -----
    if loss == "bce":
        # plain BCE
        criterion = nn.BCEWithLogitsLoss()

    elif loss == "focal":
        # Task 2.1: Focal Loss
        criterion = FocalLoss(alpha=alpha, gamma=gamma, reduction="mean")

    elif loss == "cb":
        # Task 2.2: Class-Balanced BCE Loss
        class_weights = compute_class_frequency_weights_from_csv(train_csv, num_classes=3)
        criterion = ClassBalancedBCELoss(class_weights=class_weights, reduction="mean")

    else:
        raise ValueError(f"Unknown loss_type: {loss_type}")


    # checkpoint path (unique per backbone + task)
    os.makedirs(save_dir, exist_ok=True)
    task_file_map = {
    "bce": "task1",
    "focal":    "task2_1",
    "cb":     "task2_2",
    }
    task_prefix = task_file_map[loss] 
    ckpt_path = os.path.join(save_dir, f"csu_{task_prefix}_{backbone}.pt")

    # ========= TRAINING (only for Task1.2 and Task1.3) =========
    if task != "no_finetune":
        best_val_loss = float("inf")

        for epoch in range(epochs):
            model.train()
            train_loss = 0.0
            for imgs, labels in train_loader:
                imgs, labels = imgs.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(imgs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                train_loss += loss.item() * imgs.size(0)

            train_loss /= len(train_loader.dataset)

            # validation
            model.eval()
            val_loss = 0.0
            val_probs_all = []
            val_labels_all = []

            with torch.no_grad():
                for imgs, labels in val_loader:
                    imgs, labels = imgs.to(device), labels.to(device)
                    outputs = model(imgs)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item() * imgs.size(0)
                    probs = torch.sigmoid(outputs).cpu().numpy()
                    val_probs_all.extend(probs)
                    val_labels_all.extend(labels.cpu().numpy())
            val_loss /= len(val_loader.dataset)
            val_probs_all = np.array(val_probs_all)
            val_labels_all = np.array(val_labels_all)

            #scheduler.step(val_loss)

            print(f"[{backbone}] Epoch {epoch+1}/{epochs} Train Loss: {train_loss:.4f} Val Loss: {val_loss:.4f}")

            # save best
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                torch.save(model.state_dict(), ckpt_path)
                print(f"Saved best model for {backbone} ({task}) at {ckpt_path}")
    else:
        torch.save(model.state_dict(), ckpt_path)
        print(f"[{backbone}] {task_name_map[loss]}: no training, model saved at {ckpt_path}")

    # ========= OFFSITE TEST EVALUATION =========
    model.load_state_dict(torch.load(ckpt_path, map_location=device))
    model.to(device)
    model.eval()

    y_true, y_pred = [], []

    with torch.no_grad():
        for imgs, labels in test_loader:
            imgs = imgs.to(device)
            outputs = model(imgs)
            probs = torch.sigmoid(outputs).cpu().numpy()
            preds = (probs > 0.5).astype(int)
            y_true.extend(labels.numpy())
            y_pred.extend(preds)

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    return ckpt_path, y_true, y_pred, val_probs_all, val_labels_all



In [7]:
def generate_kaggle_submission(
    backbone,
    ckpt_path,
    onsite_csv,
    onsite_image_dir,
    img_size=256,
    batch_size=32,
    out_csv="submission.csv",
    threshold=0.5,
    best = False,
):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    transform = transforms.Compose(
        [
            transforms.Resize((img_size, img_size)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
        ]
    )

    # build model and load weights
    model = build_model(backbone, num_classes=3, pretrained=False).to(device)
    model.load_state_dict(torch.load(ckpt_path, map_location=device))
    model.eval()

    # read the original Kaggle template
    template = pd.read_csv(onsite_csv)
    id_col_name = template.columns[0]  # should be 'id'

    # dataset and loader use the same csv for IDs
    test_ds = RetinaTestDataset(onsite_csv, onsite_image_dir, transform)
    test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=0)

    ids = []
    probs_all = []

    with torch.no_grad():
        for imgs, img_ids in test_loader:
            imgs = imgs.to(device)
            outputs = model(imgs)
            probs = torch.sigmoid(outputs).cpu().numpy() 
            ids.extend(img_ids)
            probs_all.append(probs)

    probs_all = np.concatenate(probs_all, axis=0)

    template_ids = template[id_col_name].values
    ids = np.array(ids)

    if not np.array_equal(template_ids, ids):
        print("WARNING: IDs in template and predictions do not match exactly in order!")
        id_to_idx = {image_id: i for i, image_id in enumerate(ids)}
        reorder_idx = [id_to_idx[x] for x in template_ids]
        probs_all = probs_all[reorder_idx, :]

    # convert probabilities to 0/1 labels using threshold
    if best ==True:
        
        thr = np.array(threshold, dtype=float)
    
        if thr.ndim == 0:  # scalar threshold
            bin_preds = (probs_all >= thr).astype(int)
        else:              # per-class thresholds
            bin_preds = (probs_all >= thr.reshape(1, -1)).astype(int)
    else:
        bin_preds = (probs_all >= threshold).astype(int)

    # overwrite D/G/A in template; keeps int dtypes and exact structure
    template["D"] = bin_preds[:, 0]
    template["G"] = bin_preds[:, 1]
    template["A"] = bin_preds[:, 2]

    # ensure output directory exists
    out_dir = "submission"
    os.makedirs(out_dir, exist_ok=True)
    out_path = os.path.join(out_dir, out_csv)
    tmp_path = out_path + ".tmp"
    template.to_csv(tmp_path, index=False)
    os.replace(tmp_path, out_path)

    print(f"Kaggle submission saved to: {out_path}")


In [8]:
def evaluating_metrics(y_true, y_pred, backbone, task_name,split_name):
    
    disease_names = ["DR", "Glaucoma", "AMD"]
    rows = []
    f1_list = []

    print(f"\n{split_name.upper()} test results for {backbone} - {task_name}")

    for i, disease in enumerate(disease_names):
        yt = y_true[:, i]
        yp = y_pred[:, i]

        acc = accuracy_score(yt, yp)
        precision = precision_score(yt, yp, zero_division=0)
        recall = recall_score(yt, yp, zero_division=0)
        f1 = f1_score(yt, yp, zero_division=0)
        kappa = cohen_kappa_score(yt, yp)

        f1_list.append(f1)

        """# print in the required format (optional)
        print(f"{disease} Results [{backbone}] ({split_name})")
        print(f"Accuracy : {acc:.4f}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall   : {recall:.4f}")
        print(f"F1-score : {f1:.4f}")
        print(f"Kappa    : {kappa:.4f}")
        print("-----")"""

        rows.append({
            "Backbone": backbone,
            "Task": task_name,
            "Split": split_name,
            "Disease": disease,
            "Accuracy": acc,
            "Precision": precision,
            "Recall": recall,
            "F1-score": f1,
            "Kappa": kappa,
        })

    avg_f1 = sum(f1_list) / len(f1_list)
    print(f"Average F1 over 3 diseases ({split_name}): {avg_f1:.4f}\n")

    rows.append({
        "Backbone": backbone,
        "Task": task_name,
        "Split": split_name,
        "Disease": "Average F1",
        "Accuracy": None,
        "Precision": None,
        "Recall": None,
        "F1-score": avg_f1,
        "Kappa": None,
    })

    return pd.DataFrame(rows)

## Configuration

This section defines all dataset paths, pretrained backbone locations, and key training hyperparameters. Adjust these values to match your local setup before running the experiments. The class frequency–based alpha vector is also computed here for optional use in loss functions. In the version I ran, this vector was not used, but you may enable it if you wish to experiment with it.



In [9]:
# Configuration (edit paths here)


# Labeled splits
train_csv = "train.csv"
val_csv = "val.csv"
offsite_test_csv = "offsite_test.csv"

train_img_dir = "./images/train"
val_img_dir = "./images/val"
offsite_img_dir = "./images/offsite_test"

# unlabeled onsite test (for kaggle submission)
onsite_csv = "onsite_test_submission.csv"
onsite_img_dir = "./images/onsite_test"

# optional: your own pretrained backbones
pretrained_resnet18 = "./pretrained_backbone/ckpt_resnet18_ep50.pt"
pretrained_efficient = "./pretrained_backbone/ckpt_efficientnet_ep50.pt"

img_size = 256
epochs = 20
batch_size = 32
lr = 1e-4
save_dir = "checkpoints"

# per-class alpha based on disease frequency
df = pd.read_csv(train_csv)
label_cols = df.columns[1:4]  
pos_counts = df[label_cols].sum(axis=0).values.astype(np.float32)
total = len(df)
freq = pos_counts / (total + 1e-6)
alpha_vec = 1.0 - freq
alpha_vec = alpha_vec / alpha_vec.max()

In [10]:
def find_best_thresholds(probs_val, y_val, disease_names=("DR","Glaucoma","AMD")):
    best_thrs = []
    for i, name in enumerate(disease_names):
        best_f1 = -1
        best_t = 0.5
        for t in np.linspace(0.1, 0.9, 81):
            preds = (probs_val[:, i] > t).astype(int)
            f1 = f1_score(y_val[:, i], preds, zero_division=0)
            if f1 > best_f1:
                best_f1, best_t = f1, t
        print(f"{name}: best thr={best_t:.2f}, val F1={best_f1:.4f}")
        best_thrs.append(best_t)
    return np.array(best_thrs)


## Training

This section includes all training runs used to generate the results presented in the report and submitted to Kaggle. The preliminary results and metrics shown here were used directly in the report, and the hyperparameters in the code match the exact configurations of the final experiments. Because training is stochastic, your results may not match mine exactly, but repeated runs should produce similar outcomes.


Each task is organized into its own code block. For example:

- `# Task 2.1  resnet18, focal loss`  

A separate section includes the large-scale hyperparameter search used to identify the best-performing settings. These experiments are commented out because they require substantial compute time, but the preliminary results are shown for reference.

After running multiple trials, the best-performing configuration was selected for the final model and Kaggle submission. Your single run may not match the final score exactly, but repeated runs should produce similar performance. Though the number of repeated runs maybe large. And you need to have enough VRAM as some of the configurations were using batch_size = 128.


In [128]:
# Task 2.1  resnet18, focal Loss

backbone = "resnet18" 

if backbone == "resnet18":
    pretrained_path = pretrained_resnet18
elif backbone == "efficientnet":
    pretrained_path = pretrained_efficient
else:
    raise ValueError("unknown backbone")
    
checkpoint = "./checkpoints/task1/csu_task1_2_resnet18.pt"
#offsite
ckpt_task21, y_true_offsite, y_pred_offsite, val_probs, val_labels = train_one_backbone(
    backbone=backbone,
    train_csv=train_csv,
    val_csv=val_csv,
    test_csv=offsite_test_csv,
    train_image_dir=train_img_dir,
    val_image_dir=val_img_dir,
    test_image_dir=offsite_img_dir,
    epochs=12,
    batch_size=128,
    lr=5e-4,
    img_size=img_size,
    save_dir=save_dir,
    pretrained_backbone=checkpoint,
    task="full_ft",
    loss="focal", 
    alpha =None,
    gamma = 2,
)


best_thrs = find_best_thresholds(val_probs, val_labels)

#onsite
generate_kaggle_submission(
    backbone=backbone,
    ckpt_path=ckpt_task21,
    onsite_csv=onsite_csv,          
    onsite_image_dir=onsite_img_dir,
    img_size=img_size,
    batch_size=batch_size,
    out_csv=f"submission_{backbone}_task2_1.csv",
    threshold = 0.5,
    best = False,
)

df_offsite = evaluating_metrics(
    y_true=y_true_offsite,
    y_pred=y_pred_offsite,
    backbone=backbone,
    task_name="full_ft",
    split_name="offsite",
)


torch.cuda.empty_cache()
df_offsite

cuda
Task 2 |  Backbone: resnet18 | loss: focal
Loaded pretrained weights from ./checkpoints/task1/csu_task1_2_resnet18.pt
[resnet18] Epoch 1/12 Train Loss: 0.1121 Val Loss: 0.2575
Saved best model for resnet18 (full_ft) at checkpoints\csu_task2_1_resnet18.pt
[resnet18] Epoch 2/12 Train Loss: 0.0557 Val Loss: 0.3255
[resnet18] Epoch 3/12 Train Loss: 0.0307 Val Loss: 0.2073
Saved best model for resnet18 (full_ft) at checkpoints\csu_task2_1_resnet18.pt
[resnet18] Epoch 4/12 Train Loss: 0.0177 Val Loss: 0.1813
Saved best model for resnet18 (full_ft) at checkpoints\csu_task2_1_resnet18.pt
[resnet18] Epoch 5/12 Train Loss: 0.0125 Val Loss: 0.2346
[resnet18] Epoch 6/12 Train Loss: 0.0109 Val Loss: 0.2636
[resnet18] Epoch 7/12 Train Loss: 0.0072 Val Loss: 0.2360
[resnet18] Epoch 8/12 Train Loss: 0.0056 Val Loss: 0.2362
[resnet18] Epoch 9/12 Train Loss: 0.0033 Val Loss: 0.2976
[resnet18] Epoch 10/12 Train Loss: 0.0022 Val Loss: 0.3204
[resnet18] Epoch 11/12 Train Loss: 0.0021 Val Loss: 0.3075


Unnamed: 0,Backbone,Task,Split,Disease,Accuracy,Precision,Recall,F1-score,Kappa
0,resnet18,full_ft,offsite,DR,0.84,0.935484,0.828571,0.878788,0.646018
1,resnet18,full_ft,offsite,Glaucoma,0.905,0.8,0.816327,0.808081,0.744966
2,resnet18,full_ft,offsite,AMD,0.9,0.538462,0.636364,0.583333,0.526963
3,resnet18,full_ft,offsite,Average F1,,,,0.756734,


In [151]:
# Task 2.1  efficientnet, focal Loss

backbone = "efficientnet" 

if backbone == "resnet18":
    pretrained_path = pretrained_resnet18
elif backbone == "efficientnet":
    pretrained_path = pretrained_efficient
else:
    raise ValueError("unknown backbone")
    
checkpoint = "./checkpoints/task1/csu_task1_2_efficientnet.pt"
#offsite
ckpt_task21, y_true_offsite, y_pred_offsite, val_probs, val_labels = train_one_backbone(
    backbone=backbone,
    train_csv=train_csv,
    val_csv=val_csv,
    test_csv=offsite_test_csv,
    train_image_dir=train_img_dir,
    val_image_dir=val_img_dir,
    test_image_dir=offsite_img_dir,
    epochs=10,
    batch_size=128,
    lr=1e-3,
    img_size=img_size,
    save_dir=save_dir,
    pretrained_backbone=checkpoint,
    task="full_ft",
    loss="focal", 
    alpha =None,
    gamma = 2,
)


best_thrs = find_best_thresholds(val_probs, val_labels)

#onsite
generate_kaggle_submission(
    backbone=backbone,
    ckpt_path=ckpt_task21,
    onsite_csv=onsite_csv,          
    onsite_image_dir=onsite_img_dir,
    img_size=img_size,
    batch_size=batch_size,
    out_csv=f"submission_{backbone}_task2_1.csv",
    threshold = 0.5,
    best = False,
)

df_offsite = evaluating_metrics(
    y_true=y_true_offsite,
    y_pred=y_pred_offsite,
    backbone=backbone,
    task_name="full_ft",
    split_name="offsite",
)


torch.cuda.empty_cache()
df_offsite

cuda
Task 2 |  Backbone: efficientnet | loss: focal
Loaded pretrained weights from ./checkpoints/task1/csu_task1_2_efficientnet.pt
[efficientnet] Epoch 1/10 Train Loss: 0.1421 Val Loss: 0.2800
Saved best model for efficientnet (full_ft) at checkpoints\csu_task2_1_efficientnet.pt
[efficientnet] Epoch 2/10 Train Loss: 0.0755 Val Loss: 0.2633
Saved best model for efficientnet (full_ft) at checkpoints\csu_task2_1_efficientnet.pt
[efficientnet] Epoch 3/10 Train Loss: 0.0373 Val Loss: 0.2474
Saved best model for efficientnet (full_ft) at checkpoints\csu_task2_1_efficientnet.pt
[efficientnet] Epoch 4/10 Train Loss: 0.0284 Val Loss: 0.2853
[efficientnet] Epoch 5/10 Train Loss: 0.0198 Val Loss: 0.2969
[efficientnet] Epoch 6/10 Train Loss: 0.0108 Val Loss: 0.2705
[efficientnet] Epoch 7/10 Train Loss: 0.0070 Val Loss: 0.2642
[efficientnet] Epoch 8/10 Train Loss: 0.0060 Val Loss: 0.2716
[efficientnet] Epoch 9/10 Train Loss: 0.0032 Val Loss: 0.2793
[efficientnet] Epoch 10/10 Train Loss: 0.0058 Val 

Unnamed: 0,Backbone,Task,Split,Disease,Accuracy,Precision,Recall,F1-score,Kappa
0,efficientnet,full_ft,offsite,DR,0.825,0.877698,0.871429,0.874552,0.585308
1,efficientnet,full_ft,offsite,Glaucoma,0.885,0.809524,0.693878,0.747253,0.673388
2,efficientnet,full_ft,offsite,AMD,0.925,0.705882,0.545455,0.615385,0.574589
3,efficientnet,full_ft,offsite,Average F1,,,,0.74573,


In [23]:
# task 2.2 resnet18, try different hyperparameters to find the best ones
# No need to run this unless you are interested, though the pre result are preserved here just to show the exploration process.
"""import gc

backbone = "resnet18"
checkpoint = "./checkpoints/task1/csu_task1_2_resnet18.pt"  # starting checkpoint for full_ft

if backbone == "resnet18":
    pretrained_path = pretrained_resnet18
elif backbone == "efficientnet":
    pretrained_path = pretrained_efficient
else:
    raise ValueError("unknown backbone")

# -------------------------
# Hyperparameter grids
# -------------------------
lrs = [
    1e-3, 9e-4, 8e-4, 7e-4, 6e-4, 5e-4, 4e-4, 3e-4, 2e-4, 1e-4,
    9e-5, 8e-5, 7e-5, 6e-5, 5e-5, 4e-5, 3e-5, 2e-5, 1e-5
]
batch_sizes = [128, 64]

results = []  # will store dicts with ckpt_path, avg_f1, and hyperparams

exp_id = 0

for lr in lrs:
    for batch_size in batch_sizes:
        exp_id += 1
        print("=" * 60)
        print(f"Experiment {exp_id}: lr={lr}, batch_size={batch_size}")
        print("=" * 60)

        # ---- Train with given hyperparameters ----
        ckpt_path, y_true_offsite, y_pred_offsite, val_probs, val_labels = train_one_backbone(
            backbone=backbone,
            train_csv=train_csv,
            val_csv=val_csv,
            test_csv=offsite_test_csv,
            train_image_dir=train_img_dir,
            val_image_dir=val_img_dir,
            test_image_dir=offsite_img_dir,
            epochs=12,
            batch_size=batch_size,
            lr=lr,
            img_size=img_size,
            save_dir=save_dir,
            pretrained_backbone=checkpoint,  # or pretrained_path if you prefer
            task="full_ft",
            loss="cb",   # class-balanced loss, no alpha/gamma needed
        )

        # ---- Compute validation-based thresholds (for later use on onsite) ----
        best_thrs = find_best_thresholds(val_probs, val_labels)

        # ---- Compute OFFSITE metrics (used to rank models) ----
        df_off = evaluating_metrics(
            y_true=y_true_offsite,
            y_pred=y_pred_offsite,
            backbone=backbone,
            task_name=f"full_ft_cb_lr{lr}_bs{batch_size}",
            split_name="offsite",
        )

        # Extract average F1 row (assuming your df has 'Disease' == 'Average F1')
        try:
            avg_f1 = df_off.loc[df_off["Disease"] == "Average F1", "F1-score"].values[0]
        except Exception as e:
            print("WARNING: Could not extract average F1 from df_off, defaulting to 0.0")
            print("Error:", e)
            avg_f1 = 0.0

        print(f"OFFSITE Average F1 for this config: {avg_f1:.4f}")

        # ---- Store result ----
        results.append(
            {
                "ckpt_path": ckpt_path,
                "avg_f1": float(avg_f1),
                "lr": lr,
                "batch_size": batch_size,
                "best_thrs": best_thrs,
            }
        )

        # ---- Clean up GPU memory ----
        del y_true_offsite, y_pred_offsite, val_probs, val_labels
        gc.collect()
        torch.cuda.empty_cache()

# -------------------------
# Select top 3 configurations
# -------------------------
results_sorted = sorted(results, key=lambda x: x["avg_f1"], reverse=True)
top3 = results_sorted[:3]

print("\n" + "#" * 60)
print("TOP 3 CONFIGURATIONS (by OFFSITE average F1)")
print("#" * 60)

for rank, r in enumerate(top3, start=1):
    print(
        f"Rank {rank}: avg_F1={r['avg_f1']:.4f}, "
        f"lr={r['lr']}, batch_size={r['batch_size']}, "
        f"ckpt={r['ckpt_path']}"
    )

"""

Experiment 1: lr=0.001, batch_size=128
cuda
Task 2 |  Backbone: resnet18 | loss: cb
Loaded pretrained weights from ./checkpoints/task1/csu_task1_2_resnet18.pt
[resnet18] Epoch 1/12 Train Loss: 0.4272 Val Loss: 1.1608
Saved best model for resnet18 (full_ft) at checkpoints\csu_task2_2_resnet18.pt
[resnet18] Epoch 2/12 Train Loss: 0.2578 Val Loss: 1.7627
[resnet18] Epoch 3/12 Train Loss: 0.1589 Val Loss: 0.6746
Saved best model for resnet18 (full_ft) at checkpoints\csu_task2_2_resnet18.pt
[resnet18] Epoch 4/12 Train Loss: 0.1003 Val Loss: 0.7972
[resnet18] Epoch 5/12 Train Loss: 0.0754 Val Loss: 0.4941
Saved best model for resnet18 (full_ft) at checkpoints\csu_task2_2_resnet18.pt
[resnet18] Epoch 6/12 Train Loss: 0.0601 Val Loss: 0.6466
[resnet18] Epoch 7/12 Train Loss: 0.0481 Val Loss: 0.8278
[resnet18] Epoch 8/12 Train Loss: 0.0384 Val Loss: 0.5468
[resnet18] Epoch 9/12 Train Loss: 0.0292 Val Loss: 0.6384
[resnet18] Epoch 10/12 Train Loss: 0.0553 Val Loss: 0.8260
[resnet18] Epoch 11/12 

In [25]:
# Task 2.2  resnet18, cb

backbone = "resnet18" 

if backbone == "resnet18":
    pretrained_path = pretrained_resnet18
elif backbone == "efficientnet":
    pretrained_path = pretrained_efficient
else:
    raise ValueError("unknown backbone")
    
checkpoint = "./checkpoints/task1/csu_task1_2_resnet18.pt"
#offsite
ckpt_task21, y_true_offsite, y_pred_offsite, val_probs, val_labels = train_one_backbone(
    backbone=backbone,
    train_csv=train_csv,
    val_csv=val_csv,
    test_csv=offsite_test_csv,
    train_image_dir=train_img_dir,
    val_image_dir=val_img_dir,
    test_image_dir=offsite_img_dir,
    epochs=10,
    batch_size=128,
    lr=0.0004,
    img_size=img_size,
    save_dir=save_dir,
    pretrained_backbone=pretrained_resnet18,
    task="full_ft",
    loss="cb", 
)


best_thrs = find_best_thresholds(val_probs, val_labels)

#onsite
generate_kaggle_submission(
    backbone=backbone,
    ckpt_path=ckpt_task21,
    onsite_csv=onsite_csv,          
    onsite_image_dir=onsite_img_dir,
    img_size=img_size,
    batch_size=batch_size,
    out_csv=f"submission_{backbone}_task2_2.csv",
    threshold = 0.5,
    best = False,
)

df_offsite = evaluating_metrics(
    y_true=y_true_offsite,
    y_pred=y_pred_offsite,
    backbone=backbone,
    task_name="full_ft",
    split_name="offsite",
)


torch.cuda.empty_cache()
df_offsite

cuda
Task 2 |  Backbone: resnet18 | loss: cb
Loaded pretrained weights from ./pretrained_backbone/ckpt_resnet18_ep50.pt
[resnet18] Epoch 1/10 Train Loss: 0.6216 Val Loss: 1.4709
Saved best model for resnet18 (full_ft) at checkpoints\csu_task2_2_resnet18.pt
[resnet18] Epoch 2/10 Train Loss: 0.2490 Val Loss: 0.6878
Saved best model for resnet18 (full_ft) at checkpoints\csu_task2_2_resnet18.pt
[resnet18] Epoch 3/10 Train Loss: 0.1555 Val Loss: 0.3907
Saved best model for resnet18 (full_ft) at checkpoints\csu_task2_2_resnet18.pt
[resnet18] Epoch 4/10 Train Loss: 0.0824 Val Loss: 0.3775
Saved best model for resnet18 (full_ft) at checkpoints\csu_task2_2_resnet18.pt
[resnet18] Epoch 5/10 Train Loss: 0.0471 Val Loss: 0.3842
[resnet18] Epoch 6/10 Train Loss: 0.0284 Val Loss: 0.4464
[resnet18] Epoch 7/10 Train Loss: 0.0169 Val Loss: 0.5416
[resnet18] Epoch 8/10 Train Loss: 0.0128 Val Loss: 0.5124
[resnet18] Epoch 9/10 Train Loss: 0.0096 Val Loss: 0.5084
[resnet18] Epoch 10/10 Train Loss: 0.0097 

Unnamed: 0,Backbone,Task,Split,Disease,Accuracy,Precision,Recall,F1-score,Kappa
0,resnet18,full_ft,offsite,DR,0.84,0.855263,0.928571,0.890411,0.59596
1,resnet18,full_ft,offsite,Glaucoma,0.88,0.820513,0.653061,0.727273,0.651619
2,resnet18,full_ft,offsite,AMD,0.935,0.695652,0.727273,0.711111,0.674512
3,resnet18,full_ft,offsite,Average F1,,,,0.776265,


In [15]:
# task 2.2 efficientnet, try different hyperparameters to find the best ones
# No need to run this unless you are interested, though the pre result are preserved here just to show exploration process.
"""import gc

backbone = "efficientnet"
checkpoint = "./checkpoints/task1/csu_task1_2_efficientnet.pt"  # starting checkpoint for full_ft

if backbone == "resnet18":
    pretrained_path = pretrained_resnet18
elif backbone == "efficientnet":
    pretrained_path = pretrained_efficient
else:
    raise ValueError("unknown backbone")

# -------------------------
# Hyperparameter grids
# -------------------------
lrs = [
    1e-3, 9e-4, 8e-4, 7e-4, 6e-4, 5e-4, 4e-4, 3e-4, 2e-4, 1e-4,
    9e-5, 8e-5, 7e-5, 6e-5, 5e-5, 4e-5, 3e-5, 2e-5, 1e-5
]
batch_sizes = [64, 32]

results = []  # will store dicts with ckpt_path, avg_f1, and hyperparams

exp_id = 0

for lr in lrs:
    for batch_size in batch_sizes:
        exp_id += 1
        print("=" * 60)
        print(f"Experiment {exp_id}: lr={lr}, batch_size={batch_size}")
        print("=" * 60)

        # ---- Train with given hyperparameters ----
        ckpt_path, y_true_offsite, y_pred_offsite, val_probs, val_labels = train_one_backbone(
            backbone=backbone,
            train_csv=train_csv,
            val_csv=val_csv,
            test_csv=offsite_test_csv,
            train_image_dir=train_img_dir,
            val_image_dir=val_img_dir,
            test_image_dir=offsite_img_dir,
            epochs=12,
            batch_size=batch_size,
            lr=lr,
            img_size=img_size,
            save_dir=save_dir,
            pretrained_backbone=checkpoint,  # or pretrained_path if you prefer
            task="full_ft",
            loss="cb",   # class-balanced loss, no alpha/gamma needed
        )

        # ---- Compute validation-based thresholds (for later use on onsite) ----
        best_thrs = find_best_thresholds(val_probs, val_labels)

        # ---- Compute OFFSITE metrics (used to rank models) ----
        df_off = evaluating_metrics(
            y_true=y_true_offsite,
            y_pred=y_pred_offsite,
            backbone=backbone,
            task_name=f"full_ft_cb_lr{lr}_bs{batch_size}",
            split_name="offsite",
        )

        # Extract average F1 row (assuming your df has 'Disease' == 'Average F1')
        try:
            avg_f1 = df_off.loc[df_off["Disease"] == "Average F1", "F1-score"].values[0]
        except Exception as e:
            print("WARNING: Could not extract average F1 from df_off, defaulting to 0.0")
            print("Error:", e)
            avg_f1 = 0.0

        print(f"OFFSITE Average F1 for this config: {avg_f1:.4f}")

        # ---- Store result ----
        results.append(
            {
                "ckpt_path": ckpt_path,
                "avg_f1": float(avg_f1),
                "lr": lr,
                "batch_size": batch_size,
                "best_thrs": best_thrs,
            }
        )

        # ---- Clean up GPU memory ----
        del y_true_offsite, y_pred_offsite, val_probs, val_labels
        gc.collect()
        torch.cuda.empty_cache()

# -------------------------
# Select top 3 configurations
# -------------------------
results_sorted = sorted(results, key=lambda x: x["avg_f1"], reverse=True)
top3 = results_sorted[:3]

print("\n" + "#" * 60)
print("TOP 3 CONFIGURATIONS (by OFFSITE average F1)")
print("#" * 60)

for rank, r in enumerate(top3, start=1):
    print(
        f"Rank {rank}: avg_F1={r['avg_f1']:.4f}, "
        f"lr={r['lr']}, batch_size={r['batch_size']}, "
        f"ckpt={r['ckpt_path']}"
    )

"""

Experiment 1: lr=0.001, batch_size=64
cuda
Task 2 |  Backbone: efficientnet | loss: cb
Loaded pretrained weights from ./checkpoints/task1/csu_task1_2_efficientnet.pt
[efficientnet] Epoch 1/12 Train Loss: 0.3563 Val Loss: 0.4153
Saved best model for efficientnet (full_ft) at checkpoints\csu_task2_2_efficientnet.pt
[efficientnet] Epoch 2/12 Train Loss: 0.1859 Val Loss: 0.4633
[efficientnet] Epoch 3/12 Train Loss: 0.1064 Val Loss: 0.5348
[efficientnet] Epoch 4/12 Train Loss: 0.0540 Val Loss: 0.5947
[efficientnet] Epoch 5/12 Train Loss: 0.0520 Val Loss: 0.7034
[efficientnet] Epoch 6/12 Train Loss: 0.0496 Val Loss: 0.5380
[efficientnet] Epoch 7/12 Train Loss: 0.0353 Val Loss: 0.5433
[efficientnet] Epoch 8/12 Train Loss: 0.0341 Val Loss: 0.6265
[efficientnet] Epoch 9/12 Train Loss: 0.0299 Val Loss: 0.5302
[efficientnet] Epoch 10/12 Train Loss: 0.0283 Val Loss: 0.5359
[efficientnet] Epoch 11/12 Train Loss: 0.0165 Val Loss: 0.7011
[efficientnet] Epoch 12/12 Train Loss: 0.0153 Val Loss: 0.5916


In [24]:
# Task 2.2  efficientnet, cb

backbone = "efficientnet" 

if backbone == "resnet18":
    pretrained_path = pretrained_resnet18
elif backbone == "efficientnet":
    pretrained_path = pretrained_efficient
else:
    raise ValueError("unknown backbone")
    
checkpoint = "./checkpoints/task1/csu_task1_2_efficientnet.pt"
#offsite
ckpt_task21, y_true_offsite, y_pred_offsite, val_probs, val_labels = train_one_backbone(
    backbone=backbone,
    train_csv=train_csv,
    val_csv=val_csv,
    test_csv=offsite_test_csv,
    train_image_dir=train_img_dir,
    val_image_dir=val_img_dir,
    test_image_dir=offsite_img_dir,
    epochs=12,
    batch_size=32,
    lr=0.0004,
    img_size=img_size,
    save_dir=save_dir,
    pretrained_backbone=checkpoint,
    task="full_ft",
    loss="cb", 
)


best_thrs = find_best_thresholds(val_probs, val_labels)

#onsite
generate_kaggle_submission(
    backbone=backbone,
    ckpt_path=ckpt_task21,
    onsite_csv=onsite_csv,          
    onsite_image_dir=onsite_img_dir,
    img_size=img_size,
    batch_size=batch_size,
    out_csv=f"submission_{backbone}_task2_2.csv",
    threshold = 0.5,
    best = False,
)

df_offsite = evaluating_metrics(
    y_true=y_true_offsite,
    y_pred=y_pred_offsite,
    backbone=backbone,
    task_name="full_ft",
    split_name="offsite",
)


torch.cuda.empty_cache()
df_offsite

cuda
Task 2 |  Backbone: efficientnet | loss: cb
Loaded pretrained weights from ./checkpoints/task1/csu_task1_2_efficientnet.pt
[efficientnet] Epoch 1/12 Train Loss: 0.3241 Val Loss: 0.4611
Saved best model for efficientnet (full_ft) at checkpoints\csu_task2_2_efficientnet.pt
[efficientnet] Epoch 2/12 Train Loss: 0.1322 Val Loss: 0.4055
Saved best model for efficientnet (full_ft) at checkpoints\csu_task2_2_efficientnet.pt
[efficientnet] Epoch 3/12 Train Loss: 0.0653 Val Loss: 0.4971
[efficientnet] Epoch 4/12 Train Loss: 0.0438 Val Loss: 0.5056
[efficientnet] Epoch 5/12 Train Loss: 0.0414 Val Loss: 0.6185
[efficientnet] Epoch 6/12 Train Loss: 0.0341 Val Loss: 0.6243
[efficientnet] Epoch 7/12 Train Loss: 0.0397 Val Loss: 0.6943
[efficientnet] Epoch 8/12 Train Loss: 0.0244 Val Loss: 0.6104
[efficientnet] Epoch 9/12 Train Loss: 0.0203 Val Loss: 0.6425
[efficientnet] Epoch 10/12 Train Loss: 0.0295 Val Loss: 0.5694
[efficientnet] Epoch 11/12 Train Loss: 0.0237 Val Loss: 0.6006
[efficientnet]

Unnamed: 0,Backbone,Task,Split,Disease,Accuracy,Precision,Recall,F1-score,Kappa
0,efficientnet,full_ft,offsite,DR,0.835,0.879433,0.885714,0.882562,0.605263
1,efficientnet,full_ft,offsite,Glaucoma,0.895,0.833333,0.714286,0.769231,0.701789
2,efficientnet,full_ft,offsite,AMD,0.93,0.7,0.636364,0.666667,0.62766
3,efficientnet,full_ft,offsite,Average F1,,,,0.77282,
