In [1]:
pip install tensorflow



In [2]:
# Import necessary libraries
import random
import numpy as np
from dataclasses import dataclass
from pathlib import Path

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms

import timm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
from sklearn.linear_model import LogisticRegression

from torch.amp import autocast, GradScaler

In [6]:
from google.colab import drive
import os

# Mount drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
# Configurations

@dataclass
class CFG:
    DATA_ROOT: str = "/content/drive/MyDrive/dataset/AMDNet23_Dataset"
    IMG_SIZE: int = 224
    BATCH_SIZE: int = 32
    NUM_WORKERS: int = 2

    SEED: int = 42
    DEVICE: str = "cuda" if torch.cuda.is_available() else "cpu"

    META_SPLIT: float = 0.20
    EPOCHS_HEAD: int = 5
    EPOCHS_FINE: int = 15
    LR_HEAD: float = 3e-4
    LR_FINE: float = 1e-5
    WEIGHT_DECAY: float = 1e-4
    LABEL_SMOOTHING: float = 0.05
    PATIENCE: int = 5
    TARGET_ACC: float = 0.98

cfg = CFG()


Path exists: True
Contents: [PosixPath('/content/drive/MyDrive/dataset/AMDNet23_Dataset/train'), PosixPath('/content/drive/MyDrive/dataset/AMDNet23_Dataset/valid')]


In [8]:
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = True

set_seed(cfg.SEED)

In [10]:
data_root = Path(cfg.DATA_ROOT)
train_dir = data_root / "train"
valid_dir = data_root / "valid"

assert train_dir.exists(), f"Train folder not found: {train_dir}"
assert valid_dir.exists(), f"Valid folder not found: {valid_dir}"


In [11]:
#Data Augmentation

train_tfms = transforms.Compose([
    transforms.Resize((cfg.IMG_SIZE, cfg.IMG_SIZE)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.15, contrast=0.15, saturation=0.10, hue=0.02),
    transforms.RandomAffine(degrees=0, translate=(0.03, 0.03), scale=(0.95, 1.05)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])

eval_tfms = transforms.Compose([
    transforms.Resize((cfg.IMG_SIZE, cfg.IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])


In [12]:
# Datasets

train_ds_index = datasets.ImageFolder(str(train_dir))
class_names = train_ds_index.classes
num_classes = len(class_names)
assert num_classes == 4, f"Expected 4 classes, got {num_classes}: {class_names}"

# Get targets for stratified split
targets = np.array([y for _, y in train_ds_index.samples])
idx = np.arange(len(targets))

base_idx, meta_idx = train_test_split(
    idx,
    test_size=cfg.META_SPLIT,
    stratify=targets,
    random_state=cfg.SEED
)

# Actual datasets with transforms
train_ds_full = datasets.ImageFolder(str(train_dir), transform=train_tfms)
meta_ds_full  = datasets.ImageFolder(str(train_dir), transform=eval_tfms)  # meta should be eval-like
valid_ds      = datasets.ImageFolder(str(valid_dir), transform=eval_tfms)

# Subsets
base_train_ds = Subset(train_ds_full, base_idx)
meta_train_ds = Subset(meta_ds_full, meta_idx)

# sanity check: same class mapping
assert train_ds_full.class_to_idx == valid_ds.class_to_idx, "Train/Valid class mapping mismatch"
assert train_ds_full.class_to_idx == meta_ds_full.class_to_idx, "Train/Meta class mapping mismatch"

# Loaders
base_loader = DataLoader(base_train_ds, batch_size=cfg.BATCH_SIZE, shuffle=True,
                         num_workers=cfg.NUM_WORKERS, pin_memory=True)
meta_loader = DataLoader(meta_train_ds, batch_size=cfg.BATCH_SIZE, shuffle=False,
                         num_workers=cfg.NUM_WORKERS, pin_memory=True)
valid_loader = DataLoader(valid_ds, batch_size=cfg.BATCH_SIZE, shuffle=False,
                          num_workers=cfg.NUM_WORKERS, pin_memory=True)

print("Classes:", class_names)
print("base_train size:", len(base_train_ds), "| meta_train size:", len(meta_train_ds), "| valid size:", len(valid_ds))

Classes: ['amd', 'cataract', 'diabetes', 'normal']
base_train size: 1275 | meta_train size: 319 | valid size: 400


In [13]:
MODEL_SPECS = [
    ("tf_efficientnetv2_s", "EffNetV2-S"),
    ("densenet121", "DenseNet121"),
    ("swin_tiny_patch4_window7_224", "Swin-Tiny"),
]

def make_model(model_name: str, num_classes: int):
    return timm.create_model(model_name, pretrained=True, num_classes=num_classes)

def set_trainable_backbone(model, train_backbone: bool):
    for p in model.parameters():
        p.requires_grad = train_backbone
    for name, p in model.named_parameters():
        if any(k in name.lower() for k in ["classifier", "head", "fc"]):
            p.requires_grad = True

In [14]:
@torch.no_grad()
def predict_proba(model, loader, device):
    model.eval()
    all_probs, all_y = [], []
    for x, y in loader:
        x = x.to(device, non_blocking=True)
        logits = model(x)
        probs = torch.softmax(logits, dim=1).detach().cpu().numpy()
        all_probs.append(probs)
        all_y.append(y.numpy())
    return np.concatenate(all_probs, axis=0), np.concatenate(all_y, axis=0)

@torch.no_grad()
def eval_with_probs(probs, y):
    pred = probs.argmax(axis=1)
    acc = accuracy_score(y, pred)
    f1m = f1_score(y, pred, average="macro")
    return acc, f1m, pred

@torch.no_grad()
def eval_model(model, loader, device):
    probs, y = predict_proba(model, loader, device)
    acc, f1m, pred = eval_with_probs(probs, y)
    return acc, f1m, probs, y


In [15]:
def train_one_phase(model, train_loader, eval_loader, device, epochs, lr, weight_decay,
                    label_smoothing=0.0, patience=5):

    criterion = nn.CrossEntropyLoss(label_smoothing=label_smoothing)
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.AdamW(params, lr=lr, weight_decay=weight_decay)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=max(1, epochs))

    scaler = GradScaler("cuda", enabled=device.startswith("cuda"))

    best_acc = -1.0
    best_state = None
    bad = 0

    for ep in range(1, epochs + 1):
        model.train()
        running_loss = 0.0

        for x, y in train_loader:
            x = x.to(device, non_blocking=True)
            y = y.to(device, non_blocking=True)

            optimizer.zero_grad(set_to_none=True)

            with autocast("cuda", enabled=device.startswith("cuda")):
                logits = model(x)
                loss = criterion(logits, y)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item() * x.size(0)

        scheduler.step()

        eval_acc, eval_f1, _, _ = eval_model(model, eval_loader, device)
        tr_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch {ep:02d}/{epochs} | loss {tr_loss:.4f} | meta_acc {eval_acc:.4f} | meta_f1 {eval_f1:.4f}")

        if eval_acc > best_acc:
            best_acc = eval_acc
            best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
            bad = 0
        else:
            bad += 1
            if bad >= patience:
                print("Early stopping.")
                break

    if best_state is not None:
        model.load_state_dict(best_state)
    return model, best_acc

In [17]:
trained_models = []
meta_scores = []  # (model_name, meta_acc, meta_f1)

for timm_name, nice_name in MODEL_SPECS:
    print("\n==============================")
    print("Training:", nice_name, f"({timm_name})")
    print("==============================")

    model = make_model(timm_name, num_classes=num_classes).to(cfg.DEVICE)

    # Head training- feature frozen
    set_trainable_backbone(model, train_backbone=False)
    model, _ = train_one_phase(
        model, base_loader, meta_loader, cfg.DEVICE,
        epochs=cfg.EPOCHS_HEAD,
        lr=cfg.LR_HEAD,
        weight_decay=cfg.WEIGHT_DECAY,
        label_smoothing=cfg.LABEL_SMOOTHING,
        patience=max(2, cfg.PATIENCE // 2),
    )

    # Fine-tuning
    set_trainable_backbone(model, train_backbone=True)
    model, _ = train_one_phase(
        model, base_loader, meta_loader, cfg.DEVICE,
        epochs=cfg.EPOCHS_FINE,
        lr=cfg.LR_FINE,
        weight_decay=cfg.WEIGHT_DECAY,
        label_smoothing=cfg.LABEL_SMOOTHING,
        patience=cfg.PATIENCE,
    )

    meta_acc, meta_f1, _, _ = eval_model(model, meta_loader, cfg.DEVICE)
    print(f"[{nice_name}] META Acc: {meta_acc:.4f} | META Macro-F1: {meta_f1:.4f}")

    trained_models.append((nice_name, model))
    meta_scores.append((nice_name, meta_acc, meta_f1))



Training: EffNetV2-S (tf_efficientnetv2_s)
Epoch 01/5 | loss 4.5702 | meta_acc 0.6740 | meta_f1 0.6720
Epoch 02/5 | loss 2.3050 | meta_acc 0.6897 | meta_f1 0.6924
Epoch 03/5 | loss 1.9407 | meta_acc 0.6959 | meta_f1 0.7013
Epoch 04/5 | loss 1.7544 | meta_acc 0.6897 | meta_f1 0.6930
Epoch 05/5 | loss 1.6281 | meta_acc 0.7053 | meta_f1 0.7105
Epoch 01/15 | loss 1.5834 | meta_acc 0.7116 | meta_f1 0.7170
Epoch 02/15 | loss 1.4298 | meta_acc 0.7429 | meta_f1 0.7469
Epoch 03/15 | loss 1.3853 | meta_acc 0.7461 | meta_f1 0.7494
Epoch 04/15 | loss 1.2927 | meta_acc 0.7524 | meta_f1 0.7548
Epoch 05/15 | loss 1.2964 | meta_acc 0.7680 | meta_f1 0.7693
Epoch 06/15 | loss 1.1683 | meta_acc 0.7774 | meta_f1 0.7797
Epoch 07/15 | loss 1.1482 | meta_acc 0.7524 | meta_f1 0.7543
Epoch 08/15 | loss 1.0716 | meta_acc 0.7774 | meta_f1 0.7798
Epoch 09/15 | loss 1.1106 | meta_acc 0.7868 | meta_f1 0.7871
Epoch 10/15 | loss 1.0428 | meta_acc 0.7900 | meta_f1 0.7908
Epoch 11/15 | loss 0.9966 | meta_acc 0.7806 | 

model.safetensors:   0%|          | 0.00/32.3M [00:00<?, ?B/s]

Epoch 01/5 | loss 1.2027 | meta_acc 0.5956 | meta_f1 0.5675
Epoch 02/5 | loss 0.9399 | meta_acc 0.6897 | meta_f1 0.6761
Epoch 03/5 | loss 0.8303 | meta_acc 0.7586 | meta_f1 0.7533
Epoch 04/5 | loss 0.7928 | meta_acc 0.7586 | meta_f1 0.7534
Epoch 05/5 | loss 0.7735 | meta_acc 0.7649 | meta_f1 0.7599
Epoch 01/15 | loss 0.7485 | meta_acc 0.7680 | meta_f1 0.7645
Epoch 02/15 | loss 0.6906 | meta_acc 0.7962 | meta_f1 0.7921
Epoch 03/15 | loss 0.6608 | meta_acc 0.8025 | meta_f1 0.7981
Epoch 04/15 | loss 0.6186 | meta_acc 0.8088 | meta_f1 0.8057
Epoch 05/15 | loss 0.6020 | meta_acc 0.8150 | meta_f1 0.8106
Epoch 06/15 | loss 0.5678 | meta_acc 0.8307 | meta_f1 0.8278
Epoch 07/15 | loss 0.5489 | meta_acc 0.8339 | meta_f1 0.8308
Epoch 08/15 | loss 0.5326 | meta_acc 0.8401 | meta_f1 0.8371
Epoch 09/15 | loss 0.5296 | meta_acc 0.8339 | meta_f1 0.8300
Epoch 10/15 | loss 0.5283 | meta_acc 0.8433 | meta_f1 0.8403
Epoch 11/15 | loss 0.5287 | meta_acc 0.8433 | meta_f1 0.8403
Epoch 12/15 | loss 0.5221 | m

model.safetensors:   0%|          | 0.00/114M [00:00<?, ?B/s]

Epoch 01/5 | loss 0.7136 | meta_acc 0.9154 | meta_f1 0.9148
Epoch 02/5 | loss 0.4284 | meta_acc 0.9060 | meta_f1 0.9033
Epoch 03/5 | loss 0.3481 | meta_acc 0.9185 | meta_f1 0.9161
Epoch 04/5 | loss 0.2830 | meta_acc 0.9467 | meta_f1 0.9462
Epoch 05/5 | loss 0.2513 | meta_acc 0.9530 | meta_f1 0.9527
Epoch 01/15 | loss 0.2484 | meta_acc 0.9530 | meta_f1 0.9526
Epoch 02/15 | loss 0.2522 | meta_acc 0.9561 | meta_f1 0.9558
Epoch 03/15 | loss 0.2424 | meta_acc 0.9530 | meta_f1 0.9527
Epoch 04/15 | loss 0.2367 | meta_acc 0.9498 | meta_f1 0.9495
Epoch 05/15 | loss 0.2284 | meta_acc 0.9498 | meta_f1 0.9495
Epoch 06/15 | loss 0.2338 | meta_acc 0.9498 | meta_f1 0.9495
Epoch 07/15 | loss 0.2245 | meta_acc 0.9530 | meta_f1 0.9527
Early stopping.
[Swin-Tiny] META Acc: 0.9561 | META Macro-F1: 0.9558


In [18]:
meta_probs = {}
meta_y = None
for name, model in trained_models:
    probs, y = predict_proba(model, meta_loader, cfg.DEVICE)
    meta_probs[name] = probs
    meta_y = y

best_single = max(meta_scores, key=lambda x: x[1])
best_single_name, best_single_acc, best_single_f1 = best_single
print("\nBest single on META:", best_single)

# Candidate 1: choose single if meets target on META
chosen_strategy = None
chosen_meta = None
weights = None

if best_single_acc >= cfg.TARGET_ACC:
    chosen_strategy = f"SingleModel:{best_single_name}"
    print(f"\n Condition met on META: {best_single_name} >= {cfg.TARGET_ACC:.2f}. Using single model.")
else:
    # Candidate 2: weighted soft vote using (acc+f1)/2 weights computed on META
    weights = {}
    for n, a, f in meta_scores:
        weights[n] = max(1e-6, (a + f) / 2.0)
    s = sum(weights.values())
    for k in weights:
        weights[k] /= s

    soft_vote_meta = np.zeros_like(next(iter(meta_probs.values())))
    for n in meta_probs:
        soft_vote_meta += weights[n] * meta_probs[n]

    soft_acc, soft_f1, _ = eval_with_probs(soft_vote_meta, meta_y)
    print("\nWeighted soft-vote on META:", soft_acc, soft_f1, "weights:", weights)

    # Candidate 3: stacking on META. (using Logistic regression)
    X_meta = np.concatenate([meta_probs[n] for n, _ in trained_models], axis=1)
    meta_clf = LogisticRegression(max_iter=2000, n_jobs=-1, multi_class="auto")
    meta_clf.fit(X_meta, meta_y)
    stack_meta_probs = meta_clf.predict_proba(X_meta)
    stack_acc, stack_f1, _ = eval_with_probs(stack_meta_probs, meta_y)
    print("Stacking on META:", stack_acc, stack_f1)

    if stack_acc > soft_acc:
        chosen_strategy = "Stacking(LogReg)"
        chosen_meta = meta_clf
    else:
        chosen_strategy = "WeightedSoftVote"

print("\nChosen strategy (learned on META):", chosen_strategy)



Best single on META: ('Swin-Tiny', 0.9561128526645768, 0.9558435602189042)

Weighted soft-vote on META: 0.9498432601880877 0.9498529153848633 weights: {'EffNetV2-S': 0.3053888235352021, 'DenseNet121': 0.325245960811414, 'Swin-Tiny': 0.3693652156533839}




Stacking on META: 0.9623824451410659 0.9623203889651929

Chosen strategy (learned on META): Stacking(LogReg)


In [19]:
# Final evaluation on VALID(unseen)


valid_probs = {}
valid_y = None
for name, model in trained_models:
    probs, y = predict_proba(model, valid_loader, cfg.DEVICE)
    valid_probs[name] = probs
    valid_y = y

if chosen_strategy.startswith("SingleModel:"):
    chosen_name = chosen_strategy.split(":", 1)[1]
    final_probs = valid_probs[chosen_name]
elif chosen_strategy == "WeightedSoftVote":
    final_probs = np.zeros_like(next(iter(valid_probs.values())))
    for n in valid_probs:
        final_probs += weights[n] * valid_probs[n]
elif chosen_strategy == "Stacking(LogReg)":
    X_valid = np.concatenate([valid_probs[n] for n, _ in trained_models], axis=1)
    final_probs = chosen_meta.predict_proba(X_valid)
else:
    raise RuntimeError("Unknown strategy.")

valid_pred = final_probs.argmax(axis=1)
val_acc = accuracy_score(valid_y, valid_pred)
val_f1m = f1_score(valid_y, valid_pred, average="macro")
cm = confusion_matrix(valid_y, valid_pred)

print("\n==============================")
print("FINAL VALID RESULTS (NO LEAKAGE)")
print("==============================")
print("Strategy:", chosen_strategy)
print(f"Valid Accuracy: {val_acc:.4f}")
print(f"Valid Macro-F1: {val_f1m:.4f}")
print("\nConfusion Matrix:\n", cm)
print("\nClassification Report:\n", classification_report(valid_y, valid_pred, target_names=class_names))



FINAL VALID RESULTS (NO LEAKAGE)
Strategy: Stacking(LogReg)
Valid Accuracy: 0.9775
Valid Macro-F1: 0.9774

Confusion Matrix:
 [[100   0   0   0]
 [  1  99   0   0]
 [  2   0  93   5]
 [  0   0   1  99]]

Classification Report:
               precision    recall  f1-score   support

         amd       0.97      1.00      0.99       100
    cataract       1.00      0.99      0.99       100
    diabetes       0.99      0.93      0.96       100
      normal       0.95      0.99      0.97       100

    accuracy                           0.98       400
   macro avg       0.98      0.98      0.98       400
weighted avg       0.98      0.98      0.98       400

