In [1]:
%%capture
!pip install ultralytics timm wandb gradio scikit-learn
# Install required packages for the project

In [2]:
import os
from pathlib import Path
import cv2
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

import timm
from sklearn.metrics import f1_score, confusion_matrix
import wandb
from ultralytics import YOLO
import gradio as gr

In [3]:
data_dir = Path('ODIR-2019/YOLO/preprocessed_merged')
train_root = data_dir / 'train'
val_root = data_dir / 'val'
test_root = data_dir / 'test'

CLASSES = sorted([d.name for d in train_root.iterdir() if d.is_dir()])
NUM_CLASSES = len(CLASSES)
CLASS_TO_IDX = {c:i for i,c in enumerate(CLASSES)}

# Compute class weights from folder counts
counts = torch.tensor([len(list((train_root / cls).glob('*'))) for cls in CLASSES], dtype=torch.float32)
probs = counts / counts.sum()
weights = (probs.max() / probs)
print("Class counts:", counts)
print("Class weights:", weights)
# Save class weights for later use

Class counts: tensor([ 724.,  724., 2790., 1574.,  394.,  475., 2252.,  545.])
Class weights: tensor([3.8536, 3.8536, 1.0000, 1.7726, 7.0812, 5.8737, 1.2389, 5.1193])


In [4]:
import cv2
import numpy as np
from pathlib import Path
from PIL import Image
IMG_SIZE = 300 # ideal for B3 model
train_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE + 32),  # short side
    transforms.RandomResizedCrop(
        IMG_SIZE,
        scale=(0.85, 1.0),     # zoom-in only
        ratio=(0.95, 1.05)     # avoid distortion
    ),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(
        brightness=0.2,
        contrast=0.2,
        saturation=0.2,
        hue=0.05
    ),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])
val_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE + 32),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])
test_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE + 32),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

class ODIRImageProcessor:
    """Simple processor for ODIR retinal images"""
       # return img_bgr
    def preprocess_image(self, img, threshold=10, gamma=0.9, target_size=IMG_SIZE):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = self.crop_black_border(img, thresh=threshold)
        img = self.resize(img, target_size=target_size)
        img = self.crop_fundus_circle(img)
        img = self.center_retina(img)
        img = self.mask_outside_retina(img)
        img = self.gamma_correction(img, gamma=gamma)
        img = Image.fromarray(img)
        return img

    def gamma_correction(self, img, gamma=0.9):
        img_float = img.astype(np.float32) / 255.0
        # Gamma correction
        img_gamma = np.power(img_float, gamma)
        # Convert back to 0â€“255 for saving
        img_result = (img_gamma * 255).astype(np.uint8)
        return img_result

    def resize(self, img, target_size=300):
        h,w = img.shape[:2]
        scale = target_size / min(h, w)
        new_w = int(w * scale)
        new_h = int(h * scale)

        img_resized= cv2.resize(img, 
                          (new_w, new_h), 
                          interpolation=cv2.INTER_CUBIC)
        start_x = (new_w - target_size) // 2
        start_y = (new_h - target_size) // 2

        img_cropped = img_resized[
            start_y:start_y + target_size,
            start_x:start_x + target_size
        ]
        return img_cropped

    def crop_black_border(self, img, thresh=10):
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # mask of non-black pixels
        mask = gray > thresh

        if not np.any(mask):
            return img  # fallback safety

        coords = np.column_stack(np.where(mask))
        y_min, x_min = coords.min(axis=0)
        y_max, x_max = coords.max(axis=0)

        return img[y_min:y_max+1, x_min:x_max+1]

    def crop_fundus_circle(self, img):
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        _, th = cv2.threshold(gray, 15, 255, cv2.THRESH_BINARY)

        contours, _ = cv2.findContours(th, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cnt = max(contours, key=cv2.contourArea)

        x, y, w, h = cv2.boundingRect(cnt)
        return img[y:y+h, x:x+w]

    def mask_outside_retina(self, img):
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        _, th = cv2.threshold(gray, 15, 255, cv2.THRESH_BINARY)

        mask = cv2.morphologyEx(th, cv2.MORPH_CLOSE, np.ones((15,15), np.uint8))
        mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)

        return cv2.bitwise_and(img, mask)

    def center_retina(self, img):
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        _, th = cv2.threshold(gray, 15, 255, cv2.THRESH_BINARY)

        contours, _ = cv2.findContours(th, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cnt = max(contours, key=cv2.contourArea)

        x, y, w, h = cv2.boundingRect(cnt)
        crop = img[y:y+h, x:x+w]

        # pad to square
        h, w = crop.shape[:2]
        size = max(h, w)
        padded = np.zeros((size, size, 3), dtype=crop.dtype)

        y0 = (size - h) // 2
        x0 = (size - w) // 2
        padded[y0:y0+h, x0:x0+w] = crop

        return padded

In [5]:
class ODIRFolderDataset(Dataset):
    def __init__(self, root, transform=None, processor=None):
        self.root = Path(root)
        self.transform = transform
        self.processor = processor
        self.samples = []
        for cls in CLASSES:
            for img_path in (self.root / cls).glob('*'):
                self.samples.append((str(img_path), CLASS_TO_IDX[cls]))
    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if self.processor:
            img = self.processor.preprocess_image(img)
        if self.transform:
            img = self.transform(img)
        return img, label



In [6]:
processor = ODIRImageProcessor()
train_tfms = train_transform
val_tfms = val_transform
test_tfms = test_transform
train_ds = ODIRFolderDataset(train_root, train_tfms, processor=processor)
val_ds = ODIRFolderDataset(val_root, val_tfms, processor=processor)
test_ds = ODIRFolderDataset(test_root, test_tfms, processor=processor)

train_loader = DataLoader(train_ds, batch_size=16, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_ds, batch_size=16, shuffle=False, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_ds, batch_size=16, shuffle=False, num_workers=2, pin_memory=True)

In [7]:
class EffNetB4(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.model = timm.create_model('efficientnet_b3', pretrained=True, num_classes=num_classes)
    def forward(self, x):
        return self.model(x)

model_eff = EffNetB4(NUM_CLASSES).cuda()
criterion = nn.CrossEntropyLoss(weight=weights.cuda())
optimizer = torch.optim.AdamW(model_eff.parameters(), lr=3e-4)

In [None]:
import copy
best_f1 = 0
save_path = 'ODIR-2019/results/best_effnet_b3.pth'
for epoch in range(25):
    model_eff.train()
    losses = []
    for x, y in train_loader:
        x, y = x.cuda(), y.cuda()
        optimizer.zero_grad()
        logits = model_eff(x)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
    avg_loss = np.mean(losses)

    # Validationxdempo98
    
    model_eff.eval()
    preds, labels = [], []
    with torch.no_grad():
        for x, y in val_loader:
            x = x.cuda()
            logits = model_eff(x)
            preds.extend(logits.argmax(1).cpu().numpy())
            labels.extend(y.numpy())
    macro_f1 = f1_score(labels, preds, average='macro')
    print(f'Epoch {epoch} | Loss {avg_loss:.4f} | Macro-F1 {macro_f1:.4f}')

    if macro_f1 > best_f1:
        best_f1 = macro_f1
        torch.save(model_eff.state_dict(), save_path)
        print(f'Saved best EfficientNet model at epoch {epoch}')

OutOfMemoryError: CUDA out of memory. Tried to allocate 14.00 MiB. GPU 0 has a total capacity of 3.68 GiB of which 5.56 MiB is free. Including non-PyTorch memory, this process has 3.56 GiB memory in use. Of the allocated memory 3.42 GiB is allocated by PyTorch, and 58.48 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)