In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# ✅ Step 1: Install packages
!pip install -q transformers accelerate albumentations

# ✅ Step 2: Imports
import os, cv2, numpy as np, torch, torch.nn as nn
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from transformers import SegformerForSemanticSegmentation
import albumentations as A
from albumentations.pytorch import ToTensorV2

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ✅ Step 3: Paths
IMG_DIR = "/kaggle/input/cod10k/COD10K-v3/Train/Image"
MASK_DIR = "/kaggle/input/cod10k/COD10K-v3/Train/GT_Object"

# ✅ Step 4: Dataset
class COD10KDataset(Dataset):
    def __init__(self, img_paths, mask_paths, image_size=512, augment=True):
        self.img_paths = img_paths
        self.mask_paths = mask_paths
        self.transform = A.Compose([
            A.Resize(image_size, image_size),
            A.HorizontalFlip(p=0.5),
            A.ColorJitter(p=0.2),
            A.RandomBrightnessContrast(p=0.2),
            A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=15, p=0.5),
            A.Normalize(mean=(0.5,), std=(0.5,)),
            ToTensorV2()
        ]) if augment else A.Compose([
            A.Resize(image_size, image_size),
            A.Normalize(mean=(0.5,), std=(0.5,)),
            ToTensorV2()
        ])

    def __len__(self): return len(self.img_paths)

    def __getitem__(self, idx):
        image = cv2.imread(self.img_paths[idx])
        mask = cv2.imread(self.mask_paths[idx], cv2.IMREAD_GRAYSCALE)
        mask = (mask > 0).astype(np.float32)
        transformed = self.transform(image=image, mask=mask)
        return {
            'pixel_values': transformed['image'],
            'labels': transformed['mask'].unsqueeze(0)
        }

# ✅ Step 5: Data Loaders
all_images = sorted([os.path.join(IMG_DIR, x) for x in os.listdir(IMG_DIR)])
all_masks = sorted([os.path.join(MASK_DIR, x) for x in os.listdir(MASK_DIR)])
train_imgs, val_imgs, train_masks, val_masks = train_test_split(all_images, all_masks, test_size=0.2, random_state=42)
train_dataset = COD10KDataset(train_imgs, train_masks, augment=True)
val_dataset = COD10KDataset(val_imgs, val_masks, augment=False)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=8, num_workers=2)

# ✅ Step 6: Model
model = SegformerForSemanticSegmentation.from_pretrained(
    "nvidia/segformer-b2-finetuned-ade-512-512",
    num_labels=1,
    ignore_mismatched_sizes=True
).to(device)

# ✅ Step 7: Loss
class MixedLoss(nn.Module):
    def __init__(self, alpha=0.7, beta=0.3):
        super().__init__()
        self.bce = nn.BCEWithLogitsLoss()
        self.alpha, self.beta = alpha, beta

    def forward(self, inputs, targets):
        inputs = torch.sigmoid(inputs)
        bce = self.bce(inputs, targets)
        TP = (inputs * targets).sum()
        FP = (inputs * (1 - targets)).sum()
        FN = ((1 - inputs) * targets).sum()
        tversky = (TP + 1e-7) / (TP + self.alpha * FP + self.beta * FN + 1e-7)
        dice = (2 * TP + 1e-7) / (inputs.sum() + targets.sum() + 1e-7)
        return bce + (1 - tversky) + (1 - dice)

criterion = MixedLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=15)

# ✅ Step 8: Evaluation Metrics
def compute_metrics(pred, mask):
    pred = (pred > 0.5).float()
    intersection = (pred * mask).sum((1, 2, 3))
    union = pred.sum((1, 2, 3)) + mask.sum((1, 2, 3)) - intersection
    iou = (intersection + 1e-7) / (union + 1e-7)
    dice = (2 * intersection + 1e-7) / (pred.sum((1, 2, 3)) + mask.sum((1, 2, 3)) + 1e-7)
    precision = (intersection + 1e-7) / (pred.sum((1, 2, 3)) + 1e-7)
    recall = (intersection + 1e-7) / (mask.sum((1, 2, 3)) + 1e-7)
    f_beta = (1.25 * precision * recall + 1e-7) / (0.25 * precision + recall + 1e-7)
    mae = torch.abs(pred - mask).mean((1, 2, 3))
    sm = (iou + dice) / 2
    e = 1 - ((pred - mask) ** 2).mean((1, 2, 3))
    return {
        'iou': iou.mean().item(),
        'dice': dice.mean().item(),
        'precision': precision.mean().item(),
        'recall': recall.mean().item(),
        'f_beta': f_beta.mean().item(),
        'mae': mae.mean().item(),
        's_measure': sm.mean().item(),
        'e_measure': e.mean().item()
    }

# ✅ Step 9: Train Function (AMP + Cosine + Clip)
def train(model, train_loader, val_loader, criterion, optimizer, epochs=20):
    best_s = 0
    scaler = torch.cuda.amp.GradScaler()

    for epoch in range(epochs):
        model.train(); total_loss = 0
        for batch in tqdm(train_loader, desc=f"Epoch {epoch+1} Training"):
            imgs = batch["pixel_values"].to(device)
            masks = batch["labels"].to(device)

            with torch.cuda.amp.autocast():
                outputs = model(pixel_values=imgs).logits
                outputs = torch.nn.functional.interpolate(outputs, size=masks.shape[-2:], mode="bilinear")
                loss = criterion(outputs, masks)

            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            total_loss += loss.item()

        print(f"Train Loss: {total_loss / len(train_loader):.4f}")
        scheduler.step()

        # ✅ Validation
        model.eval(); val_loss = 0
        metrics = {k: [] for k in ['iou', 'dice', 'precision', 'recall', 'f_beta', 'mae', 's_measure', 'e_measure']}
        with torch.no_grad():
            for batch in tqdm(val_loader, desc=f"Epoch {epoch+1} Validation"):
                imgs = batch["pixel_values"].to(device)
                masks = batch["labels"].to(device)
                outputs = model(pixel_values=imgs).logits
                outputs = torch.nn.functional.interpolate(outputs, size=masks.shape[-2:], mode="bilinear")
                loss = criterion(outputs, masks)
                val_loss += loss.item()
                pred = torch.sigmoid(outputs)
                m = compute_metrics(pred, masks)
                for k in metrics: metrics[k].append(m[k])

        print(f"📊 Val Loss: {val_loss/len(val_loader):.4f}")
        print(f"📈 IoU: {np.mean(metrics['iou']):.4f}, Dice: {np.mean(metrics['dice']):.4f}, "
              f"Precision: {np.mean(metrics['precision']):.4f}, Recall: {np.mean(metrics['recall']):.4f}")
        print(f"⭐ Fβ: {np.mean(metrics['f_beta']):.4f}, MAE: {np.mean(metrics['mae']):.4f}, "
              f"S-measure: {np.mean(metrics['s_measure']):.4f}, E-measure: {np.mean(metrics['e_measure']):.4f}\n")

        if np.mean(metrics['s_measure']) > best_s:
            best_s = np.mean(metrics['s_measure'])
            torch.save(model.state_dict(), "/kaggle/working/best_segformer_cod_smeasure.pth")
            print(f"✅ Best model saved (S-measure: {best_s:.4f})\n")

# ✅ Step 10: Start Training
train(model, train_loader, val_loader, criterion, optimizer, epochs=20)


In [None]:
def final_evaluation(model, loader, name="Dataset"):
    model.eval()
    metrics = {k: [] for k in ['iou', 'dice', 'precision', 'recall', 'f_beta', 'mae', 's_measure', 'e_measure']}

    with torch.no_grad():
        for batch in tqdm(loader, desc=f"Evaluating on {name}"):
            imgs = batch["pixel_values"].to(device)
            masks = batch["labels"].to(device)
            outputs = model(pixel_values=imgs).logits
            outputs = torch.nn.functional.interpolate(outputs, size=masks.shape[-2:], mode="bilinear")
            preds = torch.sigmoid(outputs)

            m = compute_metrics(preds, masks)
            for k in metrics:
                metrics[k].append(m[k])

    print(f"\n📊 Final {name} Metrics:")
    for k, v in metrics.items():
        print(f"{k.upper():<12}: {np.mean(v):.4f}")

# ✅ Load best model weights (optional if already loaded)
model.load_state_dict(torch.load("/kaggle/working/best_segformer_cod_smeasure.pth"))

# ✅ Evaluate on Training and Validation sets
final_evaluation(model, train_loader, name="Training Set")
final_evaluation(model, val_loader, name="Validation Set")


In [None]:
import matplotlib.pyplot as plt

def visualize_predictions(model, dataset, num_samples=5, title="Visualization"):
    model.eval()
    shown = 0
    idx = 0

    while shown < num_samples and idx < len(dataset):
        sample = dataset[idx]
        image_tensor = sample['pixel_values']
        gt_mask = sample['labels'].squeeze().numpy()

        # Skip if mask is empty (no ground truth)
        if np.sum(gt_mask) == 0:
            idx += 1
            continue

        with torch.no_grad():
            image = image_tensor.unsqueeze(0).to(device)
            output = model(pixel_values=image).logits
            output = torch.nn.functional.interpolate(output, size=gt_mask.shape, mode="bilinear", align_corners=False)
            pred_mask = torch.sigmoid(output).squeeze().cpu().numpy()
            pred_bin = (pred_mask > 0.5).astype(np.uint8)

        # Undo normalization for visualization
        image_np = image_tensor.permute(1, 2, 0).cpu().numpy()
        if image_np.shape[2] == 1:  # grayscale fallback
            image_np = np.repeat(image_np, 3, axis=2)
        image_np = (image_np * 0.5 + 0.5).clip(0, 1)

        # Plot
        fig, axs = plt.subplots(1, 3, figsize=(15, 5))
        axs[0].imshow(image_np)
        axs[0].set_title("Input Image")
        axs[1].imshow(gt_mask, cmap='gray')
        axs[1].set_title("Ground Truth")
        axs[2].imshow(pred_bin, cmap='gray')
        axs[2].set_title("Predicted Mask")
        for ax in axs:
            ax.axis('off')
        plt.suptitle(f"{title} Example #{shown + 1}", fontsize=14)
        plt.tight_layout()
        plt.show()

        shown += 1
        idx += 1
# Visualize training predictions
visualize_predictions(model, train_dataset, num_samples=5, title="Training")

# Visualize validation predictions
visualize_predictions(model, val_dataset, num_samples=5, title="Validation")


In [None]:
# ⚠️ Set your correct test image and mask paths
TEST_IMG_DIR = "/kaggle/input/cod10k/COD10K-v3/Test/Image"
TEST_MASK_DIR = "/kaggle/input/cod10k/COD10K-v3/Test/GT_Object"

test_images = sorted([os.path.join(TEST_IMG_DIR, x) for x in os.listdir(TEST_IMG_DIR)])
test_masks = sorted([os.path.join(TEST_MASK_DIR, x) for x in os.listdir(TEST_MASK_DIR)])

test_dataset = COD10KDataset(test_images, test_masks, augment=False)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=2)


In [None]:
# Load the best weights (make sure path is correct)
model.load_state_dict(torch.load("/kaggle/working/best_segformer_cod_smeasure.pth"))
model.eval()


In [None]:
def evaluate_model(model, loader, name="Test Set"):
    model.eval()
    metrics = {k: [] for k in ['iou', 'dice', 'precision', 'recall', 'f_beta', 'mae', 's_measure', 'e_measure']}
    with torch.no_grad():
        for batch in tqdm(loader, desc=f"Evaluating on {name}"):
            imgs = batch["pixel_values"].to(device)
            masks = batch["labels"].to(device)
            outputs = model(pixel_values=imgs).logits
            outputs = torch.nn.functional.interpolate(outputs, size=masks.shape[-2:], mode="bilinear")
            pred = torch.sigmoid(outputs)
            m = compute_metrics(pred, masks)
            for k in metrics: metrics[k].append(m[k])

    print(f"\n📊 Final {name} Metrics:")
    for k, v in metrics.items():
        print(f"{k.upper():<12}: {np.mean(v):.4f}")
evaluate_model(model, test_loader, name="Test Set")


In [None]:
import matplotlib.pyplot as plt

def visualize_test_predictions(model, dataset, num_samples=5):
    model.eval()
    shown = 0
    idx = 0

    while shown < num_samples and idx < len(dataset):
        sample = dataset[idx]
        image = sample['pixel_values'].unsqueeze(0).to(device)
        gt_mask = sample['labels'].squeeze().cpu().numpy()

        if np.sum(gt_mask) == 0:
            idx += 1
            continue

        with torch.no_grad():
            output = model(pixel_values=image).logits
            output = torch.nn.functional.interpolate(output, size=gt_mask.shape, mode="bilinear", align_corners=False)
            pred_mask = torch.sigmoid(output).squeeze().cpu().numpy()
            pred_bin = (pred_mask > 0.5).astype(np.uint8)

        # 🖼️ Denormalize image for display
        image_vis = image.squeeze().cpu().permute(1, 2, 0).numpy()
        image_vis = (image_vis * 0.5 + 0.5).clip(0, 1)  # undo normalization

        # 📊 Plot
        fig, axs = plt.subplots(1, 3, figsize=(15, 5))
        axs[0].imshow(image_vis)
        axs[0].set_title('Input Image')
        axs[1].imshow(gt_mask, cmap='gray')
        axs[1].set_title('Ground Truth')
        axs[2].imshow(pred_bin, cmap='gray')
        axs[2].set_title('Predicted Mask')
        for ax in axs: ax.axis('off')
        plt.tight_layout()
        plt.show()

        shown += 1
        idx += 1
visualize_test_predictions(model, test_dataset, num_samples=6)


In [None]:
def visualize_predictions(model, dataset, num_samples=5, start_idx=0, dataset_name="Set"):
    model.eval()
    shown = 0
    idx = start_idx

    while shown < num_samples and idx < len(dataset):
        sample = dataset[idx]
        image = sample['pixel_values'].unsqueeze(0).to(device)
        gt_mask = sample['labels'].squeeze().cpu().numpy()

        if np.sum(gt_mask) == 0:
            idx += 1
            continue

        with torch.no_grad():
            output = model(pixel_values=image).logits
            output = torch.nn.functional.interpolate(output, size=gt_mask.shape, mode="bilinear", align_corners=False)
            pred_mask = torch.sigmoid(output).squeeze().cpu().numpy()
            pred_mask_bin = (pred_mask > 0.5).astype(np.uint8)

        input_image = image.squeeze().cpu().permute(1, 2, 0).numpy()
        input_image = (input_image * 0.5 + 0.5).clip(0, 1)

        # Plot
        fig, axs = plt.subplots(1, 3, figsize=(16, 5))
        axs[0].imshow(input_image)
        axs[0].set_title(f'{dataset_name} - Input Image')
        axs[1].imshow(gt_mask, cmap='gray')
        axs[1].set_title('Ground Truth')
        axs[2].imshow(pred_mask_bin, cmap='gray')
        axs[2].set_title('Predicted Mask')
        for ax in axs:
            ax.axis('off')
        plt.tight_layout()
        plt.show()

        shown += 1
        idx += 1

    return idx  # return updated start index
# 🔍 Train Set Visualization in batches
train_index = 0
train_index = visualize_predictions(model, train_dataset, num_samples=5, start_idx=train_index, dataset_name="Train")
train_index = visualize_predictions(model, train_dataset, num_samples=5, start_idx=train_index, dataset_name="Train")

# 🔍 Validation Set
val_index = 0
val_index = visualize_predictions(model, val_dataset, num_samples=5, start_idx=val_index, dataset_name="Validation")

# 🔍 Test Set
test_index = 0
test_dataset = COD10KDataset(test_images, test_masks, augment=False)
test_index = visualize_predictions(model, test_dataset, num_samples=5, start_idx=test_index, dataset_name="Test")


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
from torchvision.transforms.functional import to_pil_image
from tqdm import tqdm

def visualize_loader(loader, model, device, num_examples=5, thresh=0.5, title_prefix=""):
    """
    Shows up to `num_examples` valid (non‑empty GT & non‑empty pred) samples from `loader`.
    """
    model.eval()
    shown = 0

    with torch.no_grad():
        for batch in loader:
            imgs = batch["pixel_values"].to(device)      # [B,3,H,W]
            gts  = batch["labels"].to(device)            # [B,1,H,W]
            logits = model(pixel_values=imgs).logits     # [B,1,H,W]
            probs  = torch.sigmoid(logits)               # [B,1,H,W]

            B = imgs.size(0)
            for i in range(B):
                gt_mask   = gts[i,0].cpu().numpy()
                if gt_mask.sum() == 0:
                    continue    # no GT, skip

                pred_prob = probs[i,0].cpu().numpy()
                pred_bin  = (pred_prob > thresh).astype(np.uint8)
                if pred_bin.sum() == 0:
                    continue    # empty prediction, skip

                # Plot
                fig, ax = plt.subplots(1,3,figsize=(12,4))
                ax[0].imshow(to_pil_image(imgs[i].cpu()))
                ax[0].set_title(f"{title_prefix} Image")
                ax[1].imshow(gt_mask,   cmap="gray")
                ax[1].set_title("Ground Truth")
                ax[2].imshow(pred_bin,  cmap="gray")
                ax[2].set_title("Predicted Mask")
                for a in ax: a.axis("off")
                plt.tight_layout()
                plt.show()

                shown += 1
                if shown >= num_examples:
                    return

    if shown == 0:
        print("⚠️  No valid examples to show (all GT or preds empty).")

# Example usage:

visualize_loader(train_loader, model, device, num_examples=3, thresh=0.5, title_prefix="Train")
visualize_loader(val_loader,   model, device, num_examples=3, thresh=0.5, title_prefix="Val")
visualize_loader(test_loader,  model, device, num_examples=3, thresh=0.5, title_prefix="Test")


In [None]:
!cd /kaggle/working

In [None]:
!ls

In [None]:
!ls

from IPython.display import FileLink
FileLink('best_segformer_cod_smeasure.pth')


In [None]:
import random
import matplotlib.pyplot as plt

def visualize_examples(loader, model, device, num_examples=5, split_name="Split", thresh=0.5):
    model.eval()
    shown = 0
    # we’ll fix a random order so you don’t always see the same first few images
    indices = list(range(len(loader.dataset)))
    random.shuffle(indices)

    for idx in indices:
        img_tensor, gt_mask = loader.dataset[idx]['pixel_values'], loader.dataset[idx]['labels']
        # skip empty GT
        if gt_mask.sum() == 0:
            continue

        # run it through the model
        with torch.no_grad():
            inp = img_tensor.unsqueeze(0).to(device)
            logits = model(pixel_values=inp).logits
            pred_prob = torch.sigmoid(
                torch.nn.functional.interpolate(
                    logits, size=gt_mask.shape[-2:], mode="bilinear", align_corners=False
                )
            ).squeeze(0).cpu()
        pred_mask = (pred_prob > thresh).float()

        # prepare visuals
        img = img_tensor.permute(1,2,0).cpu().numpy()  # C,H,W -> H,W,C
        img = (img*0.5 + 0.5).clip(0,1)                 # un-normalize
        gt  = gt_mask.squeeze(0).cpu().numpy()
        pm  = pred_mask.squeeze(0).numpy()

        # overlay: red = GT, green = PRED
        overlay = img.copy()
        overlay[gt>0,   0] = 1.0   # red channel
        overlay[pm>0,   1] = 1.0   # green channel

        # plot
        fig, axes = plt.subplots(1,4, figsize=(16,4))
        axes[0].imshow(img);       axes[0].set_title(f"{split_name}\nInput");      axes[0].axis('off')
        axes[1].imshow(gt,  cmap='gray'); axes[1].set_title("GT mask");                axes[1].axis('off')
        axes[2].imshow(pm,  cmap='gray'); axes[2].set_title("Pred mask");              axes[2].axis('off')
        axes[3].imshow(overlay);    axes[3].set_title("Overlay (R=GT, G=Pred)"); axes[3].axis('off')
        plt.tight_layout()
        plt.show()

        shown += 1
        if shown >= num_examples:
            break

# Usage — call once per split:
visualize_examples(train_loader, model, device, num_examples=3, split_name="Train")
visualize_examples(val_loader,   model, device, num_examples=3, split_name="Val")
visualize_examples(test_loader,  model, device, num_examples=3, split_name="Test")


In [None]:
import random
import matplotlib.pyplot as plt

def visualize_examples(loader, model, device, num_examples=5, split_name="Split", thresh=0.5):
    model.eval()
    shown = 0
    # we’ll fix a random order so you don’t always see the same first few images
    indices = list(range(len(loader.dataset)))
    random.shuffle(indices)

    for idx in indices:
        img_tensor, gt_mask = loader.dataset[idx]['pixel_values'], loader.dataset[idx]['labels']
        if gt_mask.sum() == 0:
            continue

        # run it through the model
        with torch.no_grad():
            inp = img_tensor.unsqueeze(0).to(device)
            logits = model(pixel_values=inp).logits
            pred_prob = torch.sigmoid(
                torch.nn.functional.interpolate(
                    logits, size=gt_mask.shape[-2:], mode="bilinear", align_corners=False
                )
            ).squeeze(0).cpu()
        pred_mask = (pred_prob > thresh).float()

        # prepare visuals
        img = img_tensor.permute(1,2,0).cpu().numpy()  # C,H,W -> H,W,C
        img = (img*0.5 + 0.5).clip(0,1)                 # un-normalize
        gt  = gt_mask.squeeze(0).cpu().numpy()
        pm  = pred_mask.squeeze(0).numpy()

        # overlay: red = GT, green = PRED
        overlay = img.copy()
        overlay[gt>0,   0] = 1.0   # red channel
        overlay[pm>0,   1] = 1.0   # green channel

        # plot
        fig, axes = plt.subplots(1,4, figsize=(16,4))
        axes[0].imshow(img);       axes[0].set_title(f"{split_name}\nInput");      axes[0].axis('off')
        axes[1].imshow(gt,  cmap='gray'); axes[1].set_title("GT mask");                axes[1].axis('off')
        axes[2].imshow(pm,  cmap='gray'); axes[2].set_title("Pred mask");              axes[2].axis('off')
        axes[3].imshow(overlay);    axes[3].set_title("Overlay (R=GT, G=Pred)"); axes[3].axis('off')
        plt.tight_layout()
        plt.show()

        shown += 1
        if shown >= num_examples:
            break

# Usage — call once per split:
visualize_examples(train_loader, model, device, num_examples=3, split_name="Train")
visualize_examples(val_loader,   model, device, num_examples=3, split_name="Val")
visualize_examples(test_loader,  model, device, num_examples=3, split_name="Test")


In [None]:
from torchvision import transforms
from PIL import Image
import os

# Define transform (use same as training)
transform = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.ToTensor(),
])

image_dir = "/kaggle/input/nc4kdataset/Imgs"
gt_dir = "/kaggle/input/nc4kdataset/GT"

image_files = sorted(os.listdir(image_dir))
gt_files = sorted(os.listdir(gt_dir))

def load_image(path):
    img = Image.open(path).convert("RGB")
    return transform(img).unsqueeze(0).to(device)

def load_mask(path):
    mask = Image.open(path).convert("L")
    return transforms.Resize((512, 512))(mask)


In [None]:
import numpy as np
import torch
import cv2  # Import OpenCV for resizing
from tqdm import tqdm

# Define the Dice score function
def dice_score(pred, gt):
    intersection = np.sum(pred * gt)
    return 2.0 * intersection / (np.sum(pred) + np.sum(gt) + 1e-7)

# Evaluate the model on the validation set
def predict_and_evaluate():
    model.eval()  # Set the model to evaluation mode
    dice_list = []

    for batch in tqdm(val_loader, desc="Evaluating"):
        imgs = batch["pixel_values"].to(device)
        gt_masks = batch["labels"].to(device)

        with torch.no_grad():
            pred = model(imgs)  # Model returns a SemanticSegmenterOutput
            pred_mask = torch.sigmoid(pred.logits).squeeze().cpu().numpy() > 0.5  # Access logits
            gt_mask = gt_masks.squeeze().cpu().numpy() > 0.5  # Convert to binary mask

        # Convert gt_mask to uint8 for resizing
        gt_mask_uint8 = (gt_mask * 255).astype(np.uint8)

        # Resize ground truth masks to match the predicted mask shape (128x128 in this case)
        gt_mask_resized = np.array([cv2.resize(mask, (pred_mask.shape[1], pred_mask.shape[2]), interpolation=cv2.INTER_LINEAR) for mask in gt_mask_uint8])

        # Convert the resized mask back to binary format
        gt_mask_resized = gt_mask_resized > 127  # thresholding to get binary mask

        # Compute the Dice score or any other metric here
        dice = dice_score(pred_mask, gt_mask_resized)
        dice_list.append(dice)

    # Output the average Dice score
    print(f"Average Dice: {np.mean(dice_list):.4f}")

# Example usage
predict_and_evaluate()


In [None]:
import numpy as np
import torch
import cv2  # Import OpenCV for resizing
from tqdm import tqdm
from sklearn.metrics import mean_absolute_error

# Define the Dice score function
def dice_score(pred, gt):
    intersection = np.sum(pred * gt)
    return 2.0 * intersection / (np.sum(pred) + np.sum(gt) + 1e-7)

# Define the IoU score function
def iou_score(pred, gt):
    intersection = np.sum(pred * gt)
    union = np.sum(pred) + np.sum(gt) - intersection
    return intersection / (union + 1e-7)

# Define the F1 score function
def f_measure(pred, gt, beta=1.0):
    intersection = np.sum(pred * gt)
    precision = intersection / (np.sum(pred) + 1e-7)
    recall = intersection / (np.sum(gt) + 1e-7)
    return (1 + beta**2) * (precision * recall) / (beta**2 * precision + recall + 1e-7)

# Define the S-measure function (Structural Similarity Measure)
def s_measure(pred, gt):
    pred_int = pred.astype(np.int32)  # Convert to integers
    gt_int = gt.astype(np.int32)      # Convert to integers
    return np.sum(np.abs(pred_int - gt_int)) / np.sum(gt_int)

# Define the E-measure function (Edge-based Measure)
def e_measure(pred, gt):
    pred_int = pred.astype(np.int32)  # Convert to integers
    gt_int = gt.astype(np.int32)      # Convert to integers
    return np.sum(np.abs(pred_int - gt_int)) / (np.sum(np.abs(gt_int)) + 1e-7)

# Define the MAE (Mean Absolute Error) function
def mae_score(pred, gt):
    # Convert boolean arrays to integers (0 or 1)
    pred_int = pred.astype(np.int32)
    gt_int = gt.astype(np.int32)
    return mean_absolute_error(gt_int.flatten(), pred_int.flatten())

# Evaluate the model on the validation set
def predict_and_evaluate(model, val_loader, device):
    model.eval()  # Set the model to evaluation mode
    dice_list, iou_list, f1_list, s_list, e_list, mae_list = [], [], [], [], [], []

    for batch in tqdm(val_loader, desc="Evaluating"):
        imgs = batch["pixel_values"].to(device)
        gt_masks = batch["labels"].to(device)

        with torch.no_grad():
            pred = model(imgs)  # Model returns a SemanticSegmenterOutput
            pred_mask = torch.sigmoid(pred.logits).squeeze().cpu().numpy() > 0.5  # Access logits
            gt_mask = gt_masks.squeeze().cpu().numpy() > 0.5  # Convert to binary mask

        # Convert gt_mask to uint8 for resizing
        gt_mask_uint8 = (gt_mask * 255).astype(np.uint8)

        # Resize ground truth masks to match the predicted mask shape (128x128 in this case)
        gt_mask_resized = np.array([cv2.resize(mask, (pred_mask.shape[1], pred_mask.shape[2]), interpolation=cv2.INTER_LINEAR) for mask in gt_mask_uint8])

        # Convert the resized mask back to binary format
        gt_mask_resized = gt_mask_resized > 127  # thresholding to get binary mask

        # Compute metrics
        dice = dice_score(pred_mask, gt_mask_resized)
        iou = iou_score(pred_mask, gt_mask_resized)
        f1 = f_measure(pred_mask, gt_mask_resized)
        s = s_measure(pred_mask, gt_mask_resized)
        e = e_measure(pred_mask, gt_mask_resized)
        mae = mae_score(pred_mask, gt_mask_resized)

        # Append the results for each metric
        dice_list.append(dice)
        iou_list.append(iou)
        f1_list.append(f1)
        s_list.append(s)
        e_list.append(e)
        mae_list.append(mae)

    # Output the average of each metric
    print(f"Average Dice: {np.mean(dice_list):.4f}")
    print(f"Average IoU: {np.mean(iou_list):.4f}")
    print(f"Average F1 Score: {np.mean(f1_list):.4f}")
    print(f"Average S Measure: {np.mean(s_list):.4f}")
    print(f"Average E Measure: {np.mean(e_list):.4f}")
    print(f"Average MAE: {np.mean(mae_list):.4f}")

# Example usage
# Assuming you have the 'val_loader' (data loader for the validation set) and the model loaded
device = 'cuda' if torch.cuda.is_available() else 'cpu'
predict_and_evaluate(model, val_loader, device)


In [None]:
import numpy as np
import torch
import cv2  # Import OpenCV for resizing
from tqdm import tqdm
from sklearn.metrics import mean_absolute_error

# Define the Dice score function
def dice_score(pred, gt):
    intersection = np.sum(pred * gt)
    return 2.0 * intersection / (np.sum(pred) + np.sum(gt) + 1e-7)

# Define the IoU score function
def iou_score(pred, gt):
    intersection = np.sum(pred * gt)
    union = np.sum(pred) + np.sum(gt) - intersection
    return intersection / (union + 1e-7)

# Define the F1 score function
def f_measure(pred, gt, beta=1.0):
    intersection = np.sum(pred * gt)
    precision = intersection / (np.sum(pred) + 1e-7)
    recall = intersection / (np.sum(gt) + 1e-7)
    return (1 + beta**2) * (precision * recall) / (beta**2 * precision + recall + 1e-7)

# Define the S-measure function (Structural Similarity Measure)
def s_measure(pred, gt):
    pred_int = pred.astype(np.int32)  # Convert to integers
    gt_int = gt.astype(np.int32)      # Convert to integers
    denominator = np.sum(gt_int) + 1e-7  # Add small epsilon to avoid division by zero
    return np.sum(np.abs(pred_int - gt_int)) / denominator

# Define the E-measure function (Edge-based Measure)
def e_measure(pred, gt):
    pred_int = pred.astype(np.int32)  # Convert to integers
    gt_int = gt.astype(np.int32)      # Convert to integers
    denominator = np.sum(np.abs(gt_int)) + 1e-7  # Add small epsilon to avoid division by zero
    if denominator == 0:  # If the ground truth is all zeros, return zero for E-measure
        return 0
    return np.sum(np.abs(pred_int - gt_int)) / denominator

# Define the MAE (Mean Absolute Error) function
def mae_score(pred, gt):
    # Convert boolean arrays to integers (0 or 1)
    pred_int = pred.astype(np.int32)
    gt_int = gt.astype(np.int32)
    return mean_absolute_error(gt_int.flatten(), pred_int.flatten())

# Evaluate the model on the validation set
def predict_and_evaluate(model, val_loader, device):
    model.eval()  # Set the model to evaluation mode
    dice_list, iou_list, f1_list, s_list, e_list, mae_list = [], [], [], [], [], []

    for batch in tqdm(val_loader, desc="Evaluating"):
        imgs = batch["pixel_values"].to(device)
        gt_masks = batch["labels"].to(device)

        with torch.no_grad():
            pred = model(imgs)  # Model returns a SemanticSegmenterOutput
            pred_mask = torch.sigmoid(pred.logits).squeeze().cpu().numpy() > 0.5  # Access logits
            gt_mask = gt_masks.squeeze().cpu().numpy() > 0.5  # Convert to binary mask

        # Convert gt_mask to uint8 for resizing
        gt_mask_uint8 = (gt_mask * 255).astype(np.uint8)

        # Resize ground truth masks to match the predicted mask shape (128x128 in this case)
        gt_mask_resized = np.array([cv2.resize(mask, (pred_mask.shape[1], pred_mask.shape[2]), interpolation=cv2.INTER_LINEAR) for mask in gt_mask_uint8])

        # Convert the resized mask back to binary format
        gt_mask_resized = gt_mask_resized > 127  # thresholding to get binary mask

        # Compute metrics
        dice = dice_score(pred_mask, gt_mask_resized)
        iou = iou_score(pred_mask, gt_mask_resized)
        f1 = f_measure(pred_mask, gt_mask_resized)
        s = s_measure(pred_mask, gt_mask_resized)
        e = e_measure(pred_mask, gt_mask_resized)
        mae = mae_score(pred_mask, gt_mask_resized)

        # Append the results for each metric
        dice_list.append(dice)
        iou_list.append(iou)
        f1_list.append(f1)
        s_list.append(s)
        e_list.append(e)
        mae_list.append(mae)

    # Output the average of each metric
    print(f"Average Dice: {np.mean(dice_list):.4f}")
    print(f"Average IoU: {np.mean(iou_list):.4f}")
    print(f"Average F1 Score: {np.mean(f1_list):.4f}")
    print(f"Average S Measure: {np.mean(s_list):.4f}")
    print(f"Average E Measure: {np.mean(e_list):.4f}")
    print(f"Average MAE: {np.mean(mae_list):.4f}")

# Example usage
# Assuming you have the 'val_loader' (data loader for the validation set) and the model loaded
device = 'cuda' if torch.cuda.is_available() else 'cpu'
predict_and_evaluate(model, val_loader, device)
