In [1]:
import os
import cv2
import torch as T
from torch.utils.data import Dataset, DataLoader
import numpy as np
from glob import glob
import re
from sklearn.decomposition import PCA
from skimage.filters import threshold_multiotsu
from tqdm.notebook import tqdm
import time

In [2]:
# 📁 Set your base image directory
BASE_PATH = r"D:\AAU Internship\Code\CWF-788\IMAGE512x384"
DEVICE = T.device("cuda" if T.cuda.is_available() else "cpu")
print(f"🚀 Using device: {DEVICE}")

# 📦 Dataset class
class VegetationDataset(Dataset):
    def __init__(self, folder_path):
        self.image_paths = sorted(
            glob(os.path.join(folder_path, "*.jpg")) + glob(os.path.join(folder_path, "*.png")),
            key=lambda x: [int(t) if t.isdigit() else t for t in re.split(r'(\d+)', x)]
        )

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        img = cv2.imread(img_path) 
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 
        img = cv2.resize(img, (512, 384), interpolation=cv2.INTER_AREA)
        img = img.astype(np.float32) / 255.0
        img_tensor = T.from_numpy(img).permute(2, 0, 1)

        return img_tensor.to(DEVICE), img_path

def get_loader(folder_path, batch_size=4):
    dataset = VegetationDataset(folder_path)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    return loader

train_path = os.path.join(BASE_PATH, "train_new")
val_path   = os.path.join(BASE_PATH, "validation_new")
test_path  = os.path.join(BASE_PATH, "test_new")
train_loader = get_loader(train_path)
val_loader   = get_loader(val_path)
test_loader  = get_loader(test_path)

for batch_imgs, batch_paths in train_loader:
    print(f"📦 Loaded batch: {batch_imgs.shape} on {batch_imgs.device}")
    print(f"🖼️ First image: {batch_paths[0]}")
    break


🚀 Using device: cuda
📦 Loaded batch: torch.Size([4, 3, 384, 512]) on cuda:0
🖼️ First image: D:\AAU Internship\Code\CWF-788\IMAGE512x384\train_new\1_image.jpg


In [3]:
def compute_vegetation_indices(batch_imgs):
    """
    Compute 6 vegetation indices from a batch of RGB images.
    Input:  batch_imgs [B, 3, H, W], float32 in range [0, 1]
    Output: indices_tensor [B, 6, H, W]
    """

    R = batch_imgs[:, 0, :, :]
    G = batch_imgs[:, 1, :, :]
    B = batch_imgs[:, 2, :, :]

    eps = 1e-6  # to prevent division by zero

    # 1. ExG = 2G - R - B
    ExG = 2 * G - R - B

    # 2. ExR = 1.4R - G
    ExR = 1.4 * R - G

    # 3. CIVE = 0.441R - 0.811G + 0.385B + 18.787
    CIVE = 0.441 * R - 0.811 * G + 0.385 * B + 18.787

    # 4. VEG = G / (R^0.667 * B^0.333 + eps)
    VEG = G / ((R**0.667) * (B**0.333) + eps)

    # 5. NDI = (G - R) / (G + R + eps)
    NDI = (G - R) / (G + R + eps)

    # 6. GLI = (2G - R - B) / (2G + R + B + eps)
    GLI = (2 * G - R - B) / (2 * G + R + B + eps)

    # Stack all into a single tensor [B, 6, H, W]
    indices = T.stack([ExG, ExR, CIVE, VEG, NDI, GLI], dim=1)

    return indices


In [4]:
def fit_pca_on_sample(indices_tensor, n_components=1, sample_size=100):
    """
    indices_tensor: [B, 6, H, W]
    """
    B, C, H, W = indices_tensor.shape
    reshaped = indices_tensor.permute(0, 2, 3, 1).reshape(-1, C)  # [B*H*W, 6]
    
    # Sample randomly to limit fitting time
    sample = reshaped[T.randperm(reshaped.shape[0])[:sample_size * H]]
    pca = PCA(n_components=n_components)
    pca.fit(sample.cpu().numpy())
    return pca

# 2️⃣ Apply PCA to full batch
def apply_pca_to_batch(indices_tensor, pca):
    """
    indices_tensor: [B, 6, H, W]
    Returns: [B, 1, H, W] (PCA projected grayscale)
    """
    B, C, H, W = indices_tensor.shape
    reshaped = indices_tensor.permute(0, 2, 3, 1).reshape(-1, C)  # [B*H*W, 6]

    projected = pca.transform(reshaped.cpu().numpy())  # [B*H*W, 1]
    out = T.tensor(projected, dtype=T.float32).reshape(B, H, W)
    return out.unsqueeze(1).to(indices_tensor.device)  # [B, 1, H, W]


In [5]:
def apply_multi_otsu(pca_tensor, n_classes=2):
    """
    Input: pca_tensor [B, 1, H, W] (float32, device)
    Output: masks [B, H, W] (uint8 binary mask)
    """
    masks = []
    pca_np = pca_tensor.squeeze(1).cpu().numpy()  # [B, H, W]

    for img in pca_np:
        thresholds = threshold_multiotsu(img, classes=n_classes)
        mask = (img > thresholds[0]).astype(np.uint8)  # binary segmentation
        masks.append(mask)

    return masks  # list of [H, W] binary masks


In [6]:
def refine_masks_with_morphops(mask_list, apply_opening=True, apply_closing=True, dilate=False):
    """
    Refine a list of binary masks using Morphological Operations.
    Input: list of [H, W] uint8 masks (0 and 1)
    Output: list of refined [H, W] masks
    """
    kernel = np.ones((5, 5), np.uint8)
    refined = []

    for mask in mask_list:
        mask_uint8 = (mask * 255).astype(np.uint8)

        if apply_opening:
            mask_uint8 = cv2.morphologyEx(mask_uint8, cv2.MORPH_OPEN, kernel)

        if apply_closing:
            mask_uint8 = cv2.morphologyEx(mask_uint8, cv2.MORPH_CLOSE, kernel)

        if dilate:
            mask_uint8 = cv2.dilate(mask_uint8, kernel, iterations=1)

        refined.append(mask_uint8 // 255)  # Normalize back to 0 or 1

    return refined


In [7]:
def save_vegetation_masks(masks, paths, save_root="Vegetation_Masks"):
    os.makedirs(save_root, exist_ok=True)

    for mask, img_path in zip(masks, paths):
        # Extract base filename
        base_filename = os.path.splitext(os.path.basename(img_path))[0] + ".png"

        # Determine subfolder name from input path
        if "train_new" in img_path:
            subfolder = "train"
        elif "validation_new" in img_path:
            subfolder = "val"
        elif "test_new" in img_path:
            subfolder = "test"
        else:
            raise ValueError(f"❌ Cannot determine subfolder for: {img_path}")

        # Create subfolder path
        save_dir = os.path.join(save_root, subfolder)
        os.makedirs(save_dir, exist_ok=True)

        # Convert mask to 0-255 grayscale
        grayscale_mask = (mask * 255).astype(np.uint8)

        # Save the mask
        save_path = os.path.join(save_dir, base_filename)
        cv2.imwrite(save_path, grayscale_mask)


In [8]:
def run_vegetation_masking_pipeline(dataloader, set_name):
    """
    set_name: one of ["train", "val", "test"]
    Returns:
        total_images (int), total_time (float in seconds)
    """
    print(f"\n🚀 Starting vegetation masking for {set_name} set...")
    total_images = 0
    start_time = time.time()

    for batch_imgs, batch_paths in tqdm(dataloader, desc=f"🌱 Processing {set_name}"):
        total_images += len(batch_imgs)

        # Step 1: Compute vegetation indices
        veg_indices = compute_vegetation_indices(batch_imgs)

        # Step 2: PCA (fit on sample)
        pca_model = fit_pca_on_sample(veg_indices, n_components=1)

        # Step 3: Apply PCA
        pca_proj = apply_pca_to_batch(veg_indices, pca_model)

        # Step 4: Multi-Otsu
        binary_masks = apply_multi_otsu(pca_proj)

        # Step 5: Morphological refinement
        refined_masks = refine_masks_with_morphops(binary_masks)

        # Step 6: Save
        save_vegetation_masks(refined_masks, batch_paths)

    end_time = time.time()
    total_time = end_time - start_time
    ms_per_image = (total_time / total_images) * 1000

    print(f"✅ Completed {total_images} images in {total_time:.2f} sec")
    print(f"⚡ Avg processing time: {ms_per_image:.2f} ms/image\n")

    return total_images, total_time


In [9]:
# Aggregate total timing across all splits
total_imgs_all = 0
total_time_all = 0

for loader, name in zip([train_loader, val_loader, test_loader], ["train", "val", "test"]):
    imgs, time_taken = run_vegetation_masking_pipeline(loader, name)
    total_imgs_all += imgs
    total_time_all += time_taken

# Final performance summary
overall_ms_per_image = (total_time_all / total_imgs_all) * 1000
print(f"🧠 Total: {total_imgs_all} images processed in {total_time_all:.2f} seconds")
print(f"🚀 Overall Speed: {overall_ms_per_image:.2f} ms/image")



🚀 Starting vegetation masking for train set...


🌱 Processing train:   0%|          | 0/400 [00:00<?, ?it/s]

✅ Completed 1600 images in 65.75 sec
⚡ Avg processing time: 41.10 ms/image


🚀 Starting vegetation masking for val set...


🌱 Processing val:   0%|          | 0/88 [00:00<?, ?it/s]

✅ Completed 352 images in 12.62 sec
⚡ Avg processing time: 35.85 ms/image


🚀 Starting vegetation masking for test set...


🌱 Processing test:   0%|          | 0/300 [00:00<?, ?it/s]

✅ Completed 1200 images in 41.67 sec
⚡ Avg processing time: 34.73 ms/image

🧠 Total: 3152 images processed in 120.05 seconds
🚀 Overall Speed: 38.09 ms/image
