In [1]:
# Cell 1: Import libraries
import os
import cv2
import numpy as np
from pathlib import Path
from tqdm.notebook import tqdm  # For progress bars


In [2]:
# Cell 2: Function to create output directories
def create_output_dirs(output_root):
    """Create output directories if they don't exist."""
    for subfolder in ['train', 'val', 'test']:
        os.makedirs(os.path.join(output_root, subfolder), exist_ok=True)
    print(f"Output directories created/verified at: {output_root}")

# Cell 3: Function to process masks
def process_masks(veg_mask_path, crop_mask_path, output_path):
    """Process vegetation and crop masks to create weed mask."""
    # Read masks
    veg_mask = cv2.imread(veg_mask_path, cv2.IMREAD_GRAYSCALE)
    crop_mask = cv2.imread(crop_mask_path, cv2.IMREAD_GRAYSCALE)
    
    if veg_mask is None or crop_mask is None:
        print(f"Error reading masks: {veg_mask_path} or {crop_mask_path}")
        return False
    
    # Ensure masks are binary (0 or 255)
    _, veg_mask = cv2.threshold(veg_mask, 127, 255, cv2.THRESH_BINARY)
    _, crop_mask = cv2.threshold(crop_mask, 127, 255, cv2.THRESH_BINARY)
    
    # Subtract crop mask from vegetation mask
    # Where crop mask is white (255), result should be black (0)
    weed_mask = veg_mask.copy()
    weed_mask[crop_mask == 255] = 0
    
    # Save result
    cv2.imwrite(output_path, weed_mask)
    return True


In [3]:
# Cell 4: Main processing
# Get current directory (where the notebook resides)
current_dir = Path.cwd()

# Define input and output directories (in the same directory as the notebook)
veg_dir = current_dir / "Vegetation_Masks"
crop_dir = current_dir / "Crop_Masks"
output_dir = current_dir / "Weed_Masks"

# Create output directories
create_output_dirs(output_dir)

# Process each subset (train, val, test)
for subset in ['train', 'val', 'test']:
    veg_subset_dir = veg_dir / subset
    crop_subset_dir = crop_dir / subset
    output_subset_dir = output_dir / subset
    
    if not veg_subset_dir.exists() or not crop_subset_dir.exists():
        print(f"Directory not found: {veg_subset_dir} or {crop_subset_dir}")
        continue
    
    # Get list of vegetation mask files
    veg_files = [f for f in os.listdir(veg_subset_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    
    # Process each file with progress bar
    for veg_file in tqdm(veg_files, desc=f"Processing {subset}"):
        # Check if corresponding crop mask exists
        crop_file_path = crop_subset_dir / veg_file
        if not crop_file_path.exists():
            print(f"Crop mask not found for: {veg_file}")
            continue
            
        veg_file_path = veg_subset_dir / veg_file
        output_file_path = output_subset_dir / veg_file
        
        success = process_masks(str(veg_file_path), str(crop_file_path), str(output_file_path))


Output directories created/verified at: D:\AAU Internship\Code\Weed_Masks


Processing train:   0%|          | 0/1600 [00:00<?, ?it/s]

Processing val:   0%|          | 0/352 [00:00<?, ?it/s]

Processing test:   0%|          | 0/1200 [00:00<?, ?it/s]

In [4]:
print("Vegetation files:", os.listdir(veg_subset_dir)[:5])
print("Crop files:", os.listdir(crop_subset_dir)[:5])


Vegetation files: ['1000_image.png', '1001_image.png', '1002_image.png', '1003_image.png', '1004_image.png']
Crop files: ['1000_image.png', '1001_image.png', '1002_image.png', '1003_image.png', '1004_image.png']
