In [None]:
import cv2
import numpy as np
import tifffile
import os
from glob import glob

def read_tif_image(image_path):
    """
    Read a TIF image and convert it to a format suitable for processing
    """
    # Read TIF image
    img = tifffile.imread(image_path)

    # If image is 16-bit, convert to 8-bit
    if img.dtype == np.uint16:
        img = (img / 256).astype(np.uint8)

    # If image is grayscale, convert to RGB
    if len(img.shape) == 2:
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)

    # Ensure we have BGR format for OpenCV
    if len(img.shape) == 3 and img.shape[2] == 3:
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    return img

def convert_to_yolo_format(image_path, output_dir):
    """
    Convert colored bounding box annotations to YOLO format from TIF images.

    YOLO format: <class> <x_center> <y_center> <width> <height>
    All values are normalized between 0 and 1
    """
    # Create output directories if they don't exist
    os.makedirs(os.path.join(output_dir, 'images'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'labels'), exist_ok=True)

    # Read the TIF image
    img = read_tif_image(image_path)
    height, width = img.shape[:2]

    # Get base filename without extension
    base_name = os.path.basename(image_path).rsplit('.', 1)[0]

    # Define color ranges for each class (BGR format)
    color_classes = {
        'red': ([0, 0, 128], [80, 80, 255]),    # class 0
        'green': ([0, 128, 0], [80, 255, 80]),  # class 1
        'yellow': ([0, 128, 128], [80, 255, 255])  # class 2
    }

    yolo_annotations = []

    # Process each color class
    for class_id, (color_name, (lower, upper)) in enumerate(color_classes.items()):
        # Create mask for current color
        mask = cv2.inRange(img, np.array(lower), np.array(upper))

        # Find contours
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Process each contour
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)

            # Convert to YOLO format (normalized)
            x_center = (x + w/2) / width
            y_center = (y + h/2) / height
            w_normalized = w / width
            h_normalized = h / height

            # Add annotation
            yolo_annotations.append(f"{class_id} {x_center:.6f} {y_center:.6f} {w_normalized:.6f} {h_normalized:.6f}")

    # Save image as PNG
    cv2.imwrite(os.path.join(output_dir, 'images', f"{base_name}.png"), img)

    # Save annotations
    with open(os.path.join(output_dir, 'labels', f"{base_name}.txt"), 'w') as f:
        f.write('\n'.join(yolo_annotations))

def process_folder(input_folder, output_dir):
    """
    Process all TIF files in a folder and its subfolders
    """
    # Create output directory
    os.makedirs(output_dir, exist_ok=True)

    # Find all TIF files in folder and subfolders
    tif_files = glob(os.path.join(input_folder, '**/*.tif'), recursive=True)

    # Process each file
    for tif_file in tif_files:
        print(f"Processing: {tif_file}")
        convert_to_yolo_format(tif_file, output_dir)

    # Create data.yaml
    yaml_content = f"""
path: {output_dir}
train: images  # all images
val: images    # all images

nc: 3  # number of classes
names: ['red', 'green', 'yellow']  # class names

# Class descriptions
# 0: red boxes
# 1: green boxes
# 2: yellow boxes
    """

    with open(os.path.join(output_dir, 'data.yaml'), 'w') as f:
        f.write(yaml_content.strip())

# Example usage
if __name__ == "__main__":
    input_folder = "HFDA15"  # Your input folder containing TIF files
    output_dir = "yolo_dataset"  # Where to save the YOLO format dataset

    process_folder(input_folder, output_dir)