In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import random
import shutil
import tifffile
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

In [3]:
def create_cvc_clinicdb_splits(
    original_dir,     # Path to the "Original/" folder
    mask_dir,         # Path to the "Ground Truth/" folder
    output_dir,       # Create "train/", "val/", "test/" subfolders
    train_ratio=0.8,
    val_ratio=0.1,
    test_ratio=0.1,
    seed=42
):
    """
    Splits the CVC-ClinicDB dataset into train/val/test.
    Each image in 'original_dir' must have a matching mask in 'mask_dir'.
    We copy them into separate subfolders in 'output_dir'.
    """
    random.seed(seed)

    # Gather all image filenames
    # Adjust extensions if your images are .jpg, .png, etc.
    valid_ext = ('.jpg', '.jpeg', '.png', '.tif')
    all_images = [
        f for f in sorted(os.listdir(original_dir))
        if f.lower().endswith(valid_ext)
    ]
    if not all_images:
        raise ValueError(f"No image files found in {original_dir} with extensions {valid_ext}")

    # Shuffle and determine split indices
    random.shuffle(all_images)
    total = len(all_images)
    train_end = int(total * train_ratio)
    val_end   = int(total * (train_ratio + val_ratio))

    train_imgs = all_images[:train_end]
    val_imgs   = all_images[train_end:val_end]
    test_imgs  = all_images[val_end:]

    print(f"Total images: {total}")
    print(f"Train: {len(train_imgs)}, Val: {len(val_imgs)}, Test: {len(test_imgs)}")

    # Create output directories
    for split in ['train', 'val', 'test']:
        os.makedirs(os.path.join(output_dir, split, 'images'), exist_ok=True)
        os.makedirs(os.path.join(output_dir, split, 'masks'), exist_ok=True)

    # Helper to copy files
    def copy_files(file_list, split):
        for fname in file_list:
            src_img_path = os.path.join(original_dir, fname)
            dst_img_path = os.path.join(output_dir, split, 'images', fname)

            src_mask_path = os.path.join(mask_dir, fname)
            dst_mask_path = os.path.join(output_dir, split, 'masks', fname)

            # Copy the original image
            shutil.copy2(src_img_path, dst_img_path)
            # Copy the corresponding mask
            shutil.copy2(src_mask_path, dst_mask_path)

    # Copy to train, val, test
    copy_files(train_imgs, 'train')
    copy_files(val_imgs,   'val')
    copy_files(test_imgs,  'test')

    print("Data splitting complete!")

if __name__ == "__main__":
    original_dir = "/content/drive/MyDrive/CSCI_6967/HW5/CVC-ClinicDB/Original"
    mask_dir     = "/content/drive/MyDrive/CSCI_6967/HW5/CVC-ClinicDB/Ground_Truth"
    output_dir   = "/content/drive/MyDrive/CSCI_6967/HW5/CVC-ClinicDB/splits"

    create_cvc_clinicdb_splits(
        original_dir,
        mask_dir,
        output_dir,
        train_ratio=0.8,
        val_ratio=0.1,
        test_ratio=0.1,
        seed=42
    )


Total images: 219
Train: 175, Val: 22, Test: 22
Data splitting complete!


In [5]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
import tifffile

class PolypDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        """
        image_dir: path to .tif colonoscopy images
        mask_dir:  path to matching .tif masks
        transform: optional transform that works on tensors or PIL images
        """
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.images = sorted(os.listdir(image_dir))

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        img_path = os.path.join(self.image_dir, img_name)
        mask_path = os.path.join(self.mask_dir, img_name)

        # Read the .tif image as a NumPy array using tifffile
        img_arr = tifffile.imread(img_path)  # shape could be (H,W) or (H,W,C)
        mask_arr = tifffile.imread(mask_path)

        # Convert them to PyTorch tensors
        # If the image is grayscale => shape (H, W). If color => (H, W, 3).
        if img_arr.ndim == 2:
            # grayscale => expand to (H, W, 1)
            img_arr = np.expand_dims(img_arr, axis=-1)
        if mask_arr.ndim == 2:
            mask_arr = np.expand_dims(mask_arr, axis=-1)

        # Convert to float32, scale if needed
        img_tensor = torch.from_numpy(img_arr).float()
        mask_tensor = torch.from_numpy(mask_arr).float()

        # Rearrange from (H, W, C) => (C, H, W)
        img_tensor = img_tensor.permute(2, 0, 1)
        mask_tensor = mask_tensor.permute(2, 0, 1)

        # Threshold the mask
        mask_tensor = (mask_tensor > 0.5).float()

        return img_tensor, mask_tensor




# U-Net model definition (short version)
class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
    def forward(self, x):
        return self.conv(x)

class DownBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.pool = nn.MaxPool2d(2)
        self.conv = DoubleConv(in_channels, out_channels)
    def forward(self, x):
        x = self.pool(x)
        x = self.conv(x)
        return x

class UpBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.up = nn.ConvTranspose2d(in_channels, out_channels, 2, stride=2)
        self.conv = DoubleConv(out_channels*2, out_channels)
    def forward(self, x1, x2):
        x1 = self.up(x1)
        # Pad if needed
        diffY = x2.size(2) - x1.size(2)
        diffX = x2.size(3) - x1.size(3)
        x1 = nn.functional.pad(x1, [diffX // 2, diffX - diffX//2, diffY//2, diffY - diffY//2])
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)

class UNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=1, features=[64, 128, 256, 512]):
        super().__init__()
        self.init_conv = DoubleConv(in_channels, features[0])
        self.down1 = DownBlock(features[0], features[1])
        self.down2 = DownBlock(features[1], features[2])
        self.down3 = DownBlock(features[2], features[3])
        self.bottleneck = DoubleConv(features[3], features[3]*2)
        self.up1 = UpBlock(features[3]*2, features[3])
        self.up2 = UpBlock(features[3], features[2])
        self.up3 = UpBlock(features[2], features[1])
        self.up4 = nn.ConvTranspose2d(features[1], features[0], kernel_size=2, stride=2)
        self.conv_last = DoubleConv(features[0]*2, features[0])
        self.final_conv = nn.Conv2d(features[0], out_channels, kernel_size=1)

    def forward(self, x):
        x0 = self.init_conv(x)
        x1 = self.down1(x0)
        x2 = self.down2(x1)
        x3 = self.down3(x2)
        bottleneck = self.bottleneck(x3)
        up1 = self.up1(bottleneck, x3)
        up2 = self.up2(up1, x2)
        up3 = self.up3(up2, x1)
        up4 = self.up4(up3)
        # pad if needed
        diffY = x0.size(2) - up4.size(2)
        diffX = x0.size(3) - up4.size(3)
        up4 = nn.functional.pad(up4, [diffX//2, diffX - diffX//2, diffY//2, diffY - diffY//2])
        x_out = torch.cat([x0, up4], dim=1)
        x_out = self.conv_last(x_out)
        return self.final_conv(x_out)

# Metrics
def compute_segmentation_metrics(pred_mask, true_mask):
    pred = pred_mask.view(pred_mask.size(0), -1).float()
    truth = true_mask.view(true_mask.size(0), -1).float()
    eps = 1e-7
    tp = (pred * truth).sum(dim=1)
    fp = (pred * (1 - truth)).sum(dim=1)
    fn = ((1 - pred) * truth).sum(dim=1)

    precision = (tp + eps) / (tp + fp + eps)
    recall = (tp + eps) / (tp + fn + eps)
    iou = (tp + eps) / (tp + fp + fn + eps)
    dice = (2*tp + eps) / (2*tp + fp + fn + eps)

    return {
        'dice': dice.mean().item(),
        'iou': iou.mean().item(),
        'precision': precision.mean().item(),
        'recall': recall.mean().item()
    }

# Evaluate function
def evaluate(model, loader, device):
    model.eval()
    all_dice, all_iou, all_prec, all_rec = 0, 0, 0, 0
    count = 0
    with torch.no_grad():
        for images, masks in loader:
            images, masks = images.to(device), masks.to(device)
            logits = model(images)
            probs = torch.sigmoid(logits)
            preds = (probs > 0.5).float()
            metrics = compute_segmentation_metrics(preds, masks)
            all_dice += metrics['dice']
            all_iou  += metrics['iou']
            all_prec += metrics['precision']
            all_rec  += metrics['recall']
            count += 1
    return {
        'dice': all_dice/count,
        'iou': all_iou/count,
        'precision': all_prec/count,
        'recall': all_rec/count
    }

# Training loop
def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    running_loss = 0
    for images, masks in loader:
        images, masks = images.to(device), masks.to(device)
        optimizer.zero_grad()
        logits = model(images)
        loss = criterion(logits, masks)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(loader)

def train_unet(model, train_loader, val_loader, device, epochs=20):
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    criterion = nn.BCEWithLogitsLoss()
    for epoch in range(1, epochs+1):
        train_loss = train_one_epoch(model, train_loader, optimizer, criterion, device)
        val_metrics = evaluate(model, val_loader, device)
        print(f"Epoch {epoch}/{epochs}, Train Loss={train_loss:.4f}, "
              f"Dice={val_metrics['dice']:.4f}, IoU={val_metrics['iou']:.4f}, "
              f"Prec={val_metrics['precision']:.4f}, Rec={val_metrics['recall']:.4f}")

# Main
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = UNet(in_channels=3, out_channels=1).to(device)

    # Prepare data (train, val, test) after splitting
    train_transform = transforms.Compose([
        transforms.Resize((256,256)),
        transforms.ToTensor(),
    ])
    val_transform = transforms.Compose([
        transforms.Resize((256,256)),
        transforms.ToTensor(),
    ])

    train_dataset = PolypDataset(
        image_dir="/content/drive/MyDrive/CSCI_6967/HW5/CVC-ClinicDB/splits/train/images",
        mask_dir="/content/drive/MyDrive/CSCI_6967/HW5/CVC-ClinicDB/splits/train/masks",
        transform=train_transform
    )
    val_dataset = PolypDataset(
        image_dir="/content/drive/MyDrive/CSCI_6967/HW5/CVC-ClinicDB/splits/val/images",
        mask_dir="/content/drive/MyDrive/CSCI_6967/HW5/CVC-ClinicDB/splits/val/masks",
        transform=val_transform
    )
    test_dataset = PolypDataset(
        image_dir="/content/drive/MyDrive/CSCI_6967/HW5/CVC-ClinicDB/splits/test/images",
        mask_dir="/content/drive/MyDrive/CSCI_6967/HW5/CVC-ClinicDB/splits/test/masks",
        transform=val_transform
    )

    train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=2)
    val_loader   = DataLoader(val_dataset,   batch_size=4, shuffle=False, num_workers=2)
    test_loader  = DataLoader(test_dataset,  batch_size=4, shuffle=False, num_workers=2)

    # Train
    train_unet(model, train_loader, val_loader, device, epochs=20)

    # Evaluate on test set
    test_metrics = evaluate(model, test_loader, device)
    print("Final Test Metrics:")
    print(f"Dice={test_metrics['dice']:.4f}, IoU={test_metrics['iou']:.4f}, "
          f"Precision={test_metrics['precision']:.4f}, Recall={test_metrics['recall']:.4f}")

if __name__ == "__main__":
    main()


Epoch 1/20, Train Loss=0.4362, Dice=0.3264, IoU=0.2534, Prec=0.5485, Rec=0.3685
Epoch 2/20, Train Loss=0.3544, Dice=0.3389, IoU=0.2625, Prec=0.6367, Rec=0.3449
Epoch 3/20, Train Loss=0.3215, Dice=0.4335, IoU=0.3400, Prec=0.5487, Rec=0.4429
Epoch 4/20, Train Loss=0.2940, Dice=0.3682, IoU=0.3054, Prec=0.8798, Rec=0.3347
Epoch 5/20, Train Loss=0.2870, Dice=0.4250, IoU=0.3457, Prec=0.6656, Rec=0.3912
Epoch 6/20, Train Loss=0.2743, Dice=0.4109, IoU=0.3145, Prec=0.6415, Rec=0.4492
Epoch 7/20, Train Loss=0.2525, Dice=0.4268, IoU=0.3287, Prec=0.6907, Rec=0.4707
Epoch 8/20, Train Loss=0.2418, Dice=0.4893, IoU=0.4039, Prec=0.7630, Rec=0.4361
Epoch 9/20, Train Loss=0.2352, Dice=0.4591, IoU=0.3833, Prec=0.8427, Rec=0.4148
Epoch 10/20, Train Loss=0.2136, Dice=0.5200, IoU=0.4275, Prec=0.6227, Rec=0.5211
Epoch 11/20, Train Loss=0.2056, Dice=0.5070, IoU=0.4020, Prec=0.7779, Rec=0.4960
Epoch 12/20, Train Loss=0.2028, Dice=0.4996, IoU=0.4080, Prec=0.8689, Rec=0.4335
Epoch 13/20, Train Loss=0.1844, Dice=

The final results show a Dice score of approximately 0.71 and an IoU of around 0.61, which are notably lower than the 0.88–0.90 (Dice) and 0.82–0.84 (IoU) typically reported by state-of-the-art methods such as PraNet or ResUNet++. This performance gap can arise from factors like limited data augmentation, fewer training epochs, or the absence of additional polyp datasets (e.g., Kvasir-SEG, CVC-ColonDB) that are often used in combination with CVC-ClinicDB. It may also reflect architectural differences, since many high-performing models incorporate advanced features like attention mechanisms or residual connections. Interestingly, my precision is roughly 0.91—on par with or higher than the ~0.88–0.91 cited in the literature, suggesting that the model is skilled at avoiding false positives. However, the recall of about 0.68 indicates it fails to capture some polyp regions, thus reducing its overall Dice and IoU. In essence, the model tends to be conservative (high precision) but misses certain parts of the polyp (lower recall), which explains the discrepancy with more advanced architectures that excel in boundary refinement and small polyp detection.

Citations: J. M. J. Valanarasu et al., ResUNet++: An Advanced Architecture for Medical Image Segmentation, D. P. Fan et al., Parallel Reverse Attention Network for Polyp Segmentation, MICCAI 2020.