In [7]:
import cv2
import numpy as np
import tifffile
import os
from glob import glob

def read_tif_image(image_path):
    """
    Read a TIF image
    """
    img = tifffile.imread(image_path)
    if img.dtype == np.uint16:
        img = ((img / img.max()) * 255).astype(np.uint8)

    if len(img.shape) == 2:
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    elif len(img.shape) == 3 and img.shape[2] > 3:
        img = img[:, :, :3]

    return img

def extract_colored_boxes(img):
    """
    Extract the existing red, green, and yellow boxes
    Focus on single-pixel width lines
    """
    height, width = img.shape[:2]

    # Create individual channel masks
    b, g, r = cv2.split(img)

    # Define masks for each color (pure RGB values)
    red_mask = cv2.bitwise_and(cv2.compare(r, 253, cv2.CMP_GE),
                               cv2.bitwise_and(cv2.compare(g, 2, cv2.CMP_LE),
                                               cv2.compare(b, 2, cv2.CMP_LE)))

    green_mask = cv2.bitwise_and(cv2.compare(g, 253, cv2.CMP_GE),
                                 cv2.bitwise_and(cv2.compare(r, 2, cv2.CMP_LE),
                                                 cv2.compare(b, 2, cv2.CMP_LE)))

    yellow_mask = cv2.bitwise_and(cv2.compare(r, 253, cv2.CMP_GE),
                                  cv2.bitwise_and(cv2.compare(g, 253, cv2.CMP_GE),
                                                  cv2.compare(b, 2, cv2.CMP_LE)))

    boxes = {'red': [], 'green': [], 'yellow': []}
    debug_img = img.copy()

    # Process each color mask
    for color, mask in [('red', red_mask), ('green', green_mask), ('yellow', yellow_mask)]:
        # Find connected components
        num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(mask.astype(np.uint8), connectivity=8)

        for i in range(1, num_labels):  # Skip background (label 0)
            x = stats[i, cv2.CC_STAT_LEFT]
            y = stats[i, cv2.CC_STAT_TOP]
            w = stats[i, cv2.CC_STAT_WIDTH]
            h = stats[i, cv2.CC_STAT_HEIGHT]
            area = stats[i, cv2.CC_STAT_AREA]

            # Check if this forms a complete rectangle
            # We expect area to be approximately 2*(w+h)-4 for a single-pixel width rectangle
            expected_area = 2 * (w + h) - 4
            area_ratio = area / expected_area if expected_area > 0 else 0

            # Filter valid boxes
            if 0.8 < area_ratio < 1.2 and w > 5 and h > 5:
                boxes[color].append((x, y, w, h))

                # Draw on debug image
                color_bgr = (0,0,255) if color == 'red' else (0,255,0) if color == 'green' else (0,255,255)
                cv2.rectangle(debug_img, (x,y), (x+w,y+h), color_bgr, 1)

    return boxes, debug_img
import cv2
import numpy as np
from pathlib import Path
import tifffile

def extract_colored_boxes_hepar(image_path, output_path):
    """
    Extract bounding boxes from Hepar TIFF image with correct colors.
    """
    # Read the TIFF image
    img = tifffile.imread(str(image_path))

    # Print debug info
    print(f"Image shape: {img.shape}")
    print(f"Image dtype: {img.dtype}")

    # Convert to uint8 if needed
    if img.dtype in [np.float32, np.float64]:
        img = (img * 255).astype(np.uint8)
    elif img.dtype != np.uint8:
        img = img.astype(np.uint8)

    height, width = img.shape[:2]

    # Color ranges in RGB (matching original image)
    color_ranges = {
        'red': ([150, 30, 30], [255, 90, 90]),      # class 0
        'green': ([30, 150, 30], [90, 255, 90]),    # class 1
        'yellow': ([150, 150, 30], [255, 255, 90])  # class 2
    }

    yolo_annotations = []
    debug_img = img.copy()

    # Process each color
    for class_id, (color_name, (lower, upper)) in enumerate(color_ranges.items()):
        # Create mask
        try:
            mask = cv2.inRange(img, np.array(lower), np.array(upper))
        except cv2.error as e:
            print(f"Error creating mask for {color_name}: {str(e)}")
            continue

        # Find contours
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)

            # Filter small boxes
            if w < 8 or h < 8:
                continue

            # Convert to YOLO format
            x_center = (x + w/2) / width
            y_center = (y + h/2) / height
            w = w / width
            h = h / height

            yolo_annotations.append(f"{class_id} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}")

            # Draw on debug image with original colors
            color = {
                'red': (255, 0, 0),      # Pure red
                'green': (0, 255, 0),    # Pure green
                'yellow': (255, 255, 0)  # Pure yellow
            }[color_name]
            cv2.rectangle(debug_img, (x,y), (x+w,y+h), color, 2)

    # Save annotations
    output_file = Path(output_path) / f"{Path(image_path).stem}.txt"
    with open(output_file, 'w') as f:
        f.write('\n'.join(yolo_annotations))

    # Save debug visualization using RGB color space
    debug_path = Path(output_path) / "debug_visualizations"
    debug_path.mkdir(exist_ok=True)
    cv2.imwrite(str(debug_path / f"{Path(image_path).stem}_debug.png"), cv2.cvtColor(debug_img, cv2.COLOR_RGB2BGR))

    return len(yolo_annotations)

# Rest of the code remains the same...
def convert_to_yolo_format(image_path, output_dir):
    """
    Convert colored box annotations to YOLO format
    """
    os.makedirs(os.path.join(output_dir, 'images'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'labels'), exist_ok=True)

    img = read_tif_image(image_path)
    height, width = img.shape[:2]
    base_name = os.path.basename(image_path).rsplit('.', 1)[0]

    boxes, debug_img = extract_colored_boxes(img)

    # Convert to YOLO format
    yolo_annotations = []
    class_mapping = {'red': 0, 'green': 1, 'yellow': 2}

    for color, class_id in class_mapping.items():
        for x, y, w, h in boxes[color]:
            # Convert to YOLO format (normalized)
            x_center = (x + w/2) / width
            y_center = (y + h/2) / height
            w_normalized = w / width
            h_normalized = h / height

            yolo_annotations.append(f"{class_id} {x_center:.6f} {y_center:.6f} {w_normalized:.6f} {h_normalized:.6f}")

    # Save debug image
    debug_dir = os.path.join(output_dir, 'debug')
    os.makedirs(debug_dir, exist_ok=True)
    cv2.imwrite(os.path.join(debug_dir, f"{base_name}_debug.png"), debug_img)

    # Save original image and annotations
    cv2.imwrite(os.path.join(output_dir, 'images', f"{base_name}.png"), img)

    with open(os.path.join(output_dir, 'labels', f"{base_name}.txt"), 'w') as f:
        f.write('\n'.join(yolo_annotations))

    print(f"Processed {base_name}:")
    print(f"  Red boxes: {len(boxes['red'])}")
    print(f"  Green boxes: {len(boxes['green'])}")
    print(f"  Yellow boxes: {len(boxes['yellow'])}")

def process_folder(input_folder, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    tif_files = glob(os.path.join(input_folder, '**/*.tif'), recursive=True)
    print(f"Found {len(tif_files)} TIF files")

    for tif_file in tif_files:
        print(f"\nProcessing: {tif_file}")
        try:
            convert_to_yolo_format(tif_file, output_dir)
        except Exception as e:
            print(f"Error processing {tif_file}: {str(e)}")

    yaml_content = f"""
path: {output_dir}
train: images
val: images

nc: 3
names: ['red', 'green', 'yellow']
"""

    with open(os.path.join(output_dir, 'data.yaml'), 'w') as f:
        f.write(yaml_content.strip())

if __name__ == "__main__":
    input_folder = "HFDA15"
    output_dir = "yolo_dataset"
    process_folder(input_folder, output_dir)

Found 6 TIF files

Processing: HFDA15/Hepar - HFDA15 3 X40 - 1a.tif
Processed Hepar - HFDA15 3 X40 - 1a:
  Red boxes: 0
  Green boxes: 0
  Yellow boxes: 0

Processing: HFDA15/Hepar - HFDA15 2 X40 - 1b.tif
Processed Hepar - HFDA15 2 X40 - 1b:
  Red boxes: 0
  Green boxes: 0
  Yellow boxes: 0

Processing: HFDA15/Hepar - HFDA15 1 X40 - 1.tif
Processed Hepar - HFDA15 1 X40 - 1:
  Red boxes: 0
  Green boxes: 0
  Yellow boxes: 0

Processing: HFDA15/Hepar - HFDA15 3 X40 - 1b.tif
Processed Hepar - HFDA15 3 X40 - 1b:
  Red boxes: 0
  Green boxes: 0
  Yellow boxes: 0

Processing: HFDA15/Hepar - HFDA15 1 X40 - 2.tif
Processed Hepar - HFDA15 1 X40 - 2:
  Red boxes: 0
  Green boxes: 0
  Yellow boxes: 0

Processing: HFDA15/Hepar - HFDA15 2 X40 - 1a.tif
Processed Hepar - HFDA15 2 X40 - 1a:
  Red boxes: 0
  Green boxes: 0
  Yellow boxes: 0


In [14]:
import cv2
import numpy as np
import tifffile
import os
from pathlib import Path
from glob import glob

def read_tif_image(image_path):
    """
    Read a TIF image and normalize to uint8 if needed.
    """
    img = tifffile.imread(str(image_path))

    if img.dtype in [np.float32, np.float64]:
        img = (img * 255).astype(np.uint8)
    elif img.dtype != np.uint8:
        img = img.astype(np.uint8)

    if len(img.shape) == 2:
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    elif len(img.shape) == 3 and img.shape[2] > 3:
        img = img[:, :, :3]

    return img

def extract_colored_boxes(img, color_ranges):
    """
    Extract bounding boxes for specified color ranges and convert them to YOLO format.
    """
    height, width = img.shape[:2]
    yolo_annotations = []
    debug_img = img.copy()

    for class_id, (color_name, (lower, upper)) in enumerate(color_ranges.items()):
        # Create a binary mask for the color range
        mask = cv2.inRange(img, np.array(lower), np.array(upper))

        # Find contours
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)

            # Filter out small boxes
            if w < 8 or h < 8:
                continue

            # Convert to YOLO format (normalized)
            x_center = (x + w / 2) / width
            y_center = (y + h / 2) / height
            w_normalized = w / width
            h_normalized = h / height

            yolo_annotations.append(f"{class_id} {x_center:.6f} {y_center:.6f} {w_normalized:.6f} {h_normalized:.6f}")

            # Draw on debug image
            color = {
                'red': (255, 0, 0),
                'green': (0, 255, 0),
                'yellow': (255, 255, 0)
            }[color_name]
            cv2.rectangle(debug_img, (x, y), (x + w, y + h), color, 2)

    return yolo_annotations, debug_img

def convert_to_yolo_format(image_path, output_dir):
    """
    Convert bounding boxes for a single image to YOLO format.
    """
    img = read_tif_image(image_path)
    base_name = Path(image_path).stem
    output_image_dir = Path(output_dir) / "images"
    output_label_dir = Path(output_dir) / "labels"
    output_debug_dir = Path(output_dir) / "debug"

    output_image_dir.mkdir(parents=True, exist_ok=True)
    output_label_dir.mkdir(parents=True, exist_ok=True)
    output_debug_dir.mkdir(parents=True, exist_ok=True)

    # Define color ranges
    color_ranges = {
        'red': ([150, 30, 30], [255, 90, 90]),      # class 0
        'green': ([30, 150, 30], [90, 255, 90]),    # class 1
        'yellow': ([150, 150, 30], [255, 255, 90])  # class 2
    }

    # Extract YOLO annotations and debug image
    yolo_annotations, debug_img = extract_colored_boxes(img, color_ranges)

    # Save YOLO annotations
    with open(output_label_dir / f"{base_name}.txt", 'w') as f:
        f.write("\n".join(yolo_annotations))

    # Save the image and debug visualization
    cv2.imwrite(str(output_image_dir / f"{base_name}.png"), img)
    cv2.imwrite(str(output_debug_dir / f"{base_name}_debug.png"), cv2.cvtColor(debug_img, cv2.COLOR_RGB2BGR))

    return len(yolo_annotations)

def process_folder(input_folder, output_dir):
    """
    Process all TIF images in a folder and convert them to YOLO format.
    """
    tif_files = glob(os.path.join(input_folder, '**/*.tif'), recursive=True)
    print(f"Found {len(tif_files)} TIF files")

    for tif_file in tif_files:
        print(f"Processing: {tif_file}")
        try:
            annotations_count = convert_to_yolo_format(tif_file, output_dir)
            print(f"Generated {annotations_count} annotations for {tif_file}")
        except Exception as e:
            print(f"Error processing {tif_file}: {e}")

    # Create a YAML file for YOLO training
    yaml_content = f"""
path: {output_dir}
train: images
val: images

nc: 3
names: ['red', 'green', 'yellow']
"""
    with open(os.path.join(output_dir, 'data.yaml'), 'w') as f:
        f.write(yaml_content.strip())

if __name__ == "__main__":
    input_folder = "HFDA15"
    output_dir = "yolo_dataset"
    process_folder(input_folder, output_dir)


Found 6 TIF files
Processing: HFDA15/Hepar - HFDA15 1 X40 - 1.tif
Generated 81 annotations for HFDA15/Hepar - HFDA15 1 X40 - 1.tif
Processing: HFDA15/Hepar - HFDA15 1 X40 - 2.tif
Generated 103 annotations for HFDA15/Hepar - HFDA15 1 X40 - 2.tif
Processing: HFDA15/Hepar - HFDA15 2 X40 - 1a.tif
Generated 63 annotations for HFDA15/Hepar - HFDA15 2 X40 - 1a.tif
Processing: HFDA15/Hepar - HFDA15 2 X40 - 1b.tif
Error processing HFDA15/Hepar - HFDA15 2 X40 - 1b.tif: not a TIFF file b''
Processing: HFDA15/Hepar - HFDA15 3 X40 - 1a.tif
Error processing HFDA15/Hepar - HFDA15 3 X40 - 1a.tif: not a TIFF file b''
Processing: HFDA15/Hepar - HFDA15 3 X40 - 1b.tif
Error processing HFDA15/Hepar - HFDA15 3 X40 - 1b.tif: not a TIFF file b''


In [15]:
import cv2
import numpy as np
import os
from glob import glob
from pathlib import Path

def process_image(image_path, output_dir):
    """
    Process a single image to extract bounding boxes for red, green, and yellow,
    generate YOLO labels, and save debug images.
    """
    # Load the image
    image_cv = cv2.imread(image_path)

    # Convert to RGB (OpenCV loads images in BGR by default)
    image_rgb = cv2.cvtColor(image_cv, cv2.COLOR_BGR2RGB)

    # Define the color ranges for red, green, and yellow
    color_ranges = {
        "red": ([200, 0, 0], [255, 50, 50]),       # Lower and upper bounds for red
        "green": ([0, 200, 0], [50, 255, 50]),     # Lower and upper bounds for green
        "yellow": ([200, 200, 0], [255, 255, 50])  # Lower and upper bounds for yellow
    }

    # Convert the bounds to numpy arrays
    color_masks = {}
    for color, (lower, upper) in color_ranges.items():
        lower_bound = np.array(lower, dtype="uint8")
        upper_bound = np.array(upper, dtype="uint8")
        # Create masks for each color
        mask = cv2.inRange(image_rgb, lower_bound, upper_bound)
        color_masks[color] = mask

    # Count and extract the coordinates of bounding boxes for each color
    color_bounding_boxes = {}
    yolo_annotations = []
    debug_img = image_cv.copy()

    class_mapping = {"red": 0, "green": 1, "yellow": 2}
    height, width = image_rgb.shape[:2]

    for color, mask in color_masks.items():
        # Find contours for each color
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        bounding_boxes = [cv2.boundingRect(contour) for contour in contours]
        color_bounding_boxes[color] = bounding_boxes

        # Convert bounding boxes to YOLO format and annotate the debug image
        for bbox in bounding_boxes:
            x, y, w, h = bbox
            # YOLO format (normalized)
            x_center = (x + w / 2) / width
            y_center = (y + h / 2) / height
            w_norm = w / width
            h_norm = h / height
            yolo_annotations.append(f"{class_mapping[color]} {x_center:.6f} {y_center:.6f} {w_norm:.6f} {h_norm:.6f}")

            # Draw rectangle on debug image
            color_bgr = (0, 0, 255) if color == "red" else (0, 255, 0) if color == "green" else (0, 255, 255)
            cv2.rectangle(debug_img, (x, y), (x + w, y + h), color_bgr, 2)

    # Save YOLO labels
    base_name = Path(image_path).stem
    labels_dir = Path(output_dir) / "labels"
    labels_dir.mkdir(parents=True, exist_ok=True)
    with open(labels_dir / f"{base_name}.txt", "w") as f:
        f.write("\n".join(yolo_annotations))

    # Save debug image
    debug_dir = Path(output_dir) / "debug"
    debug_dir.mkdir(parents=True, exist_ok=True)
    cv2.imwrite(str(debug_dir / f"{base_name}_debug.png"), debug_img)

    print(f"Processed {image_path}: {len(yolo_annotations)} annotations")

def process_images(input_folder, output_dir):
    """
    Process all images in the input folder and generate YOLO labels and debug images.
    """
    image_paths = glob(os.path.join(input_folder, "*.*"))
    print(f"Found {len(image_paths)} images")

    for image_path in image_paths:
        try:
            process_image(image_path, output_dir)
        except Exception as e:
            print(f"Error processing {image_path}: {e}")

if __name__ == "__main__":
    input_folder = "HFDA15"  # Replace with the folder containing your images
    output_dir = "output_annotations"      # Replace with your desired output directory
    process_images(input_folder, output_dir)


Found 6 images
Processed HFDA15/Hepar - HFDA15 1 X40 - 1.tif: 83 annotations
Processed HFDA15/Hepar - HFDA15 1 X40 - 2.tif: 56 annotations
Processed HFDA15/Hepar - HFDA15 2 X40 - 1a.tif: 94 annotations
Error processing HFDA15/Hepar - HFDA15 2 X40 - 1b.tif: OpenCV(4.10.0) /io/opencv/modules/imgproc/src/color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'

Error processing HFDA15/Hepar - HFDA15 3 X40 - 1a.tif: OpenCV(4.10.0) /io/opencv/modules/imgproc/src/color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'

Error processing HFDA15/Hepar - HFDA15 3 X40 - 1b.tif: OpenCV(4.10.0) /io/opencv/modules/imgproc/src/color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'



In [18]:
def refined_process_image(image_path, output_dir):
    """
    Process a single image with improved bounding box detection to generate YOLO labels
    and save debug images.
    """
    # Check if the file exists
    if not os.path.exists(image_path):
        print(f"Error: File not found - {image_path}")
        return

    # Load the image
    image_cv = cv2.imread(image_path)

    # Check if the image was loaded successfully
    if image_cv is None:
        print(f"Error: Failed to load image - {image_path}")
        return

    # Convert to RGB (OpenCV loads images in BGR by default)
    image_rgb = cv2.cvtColor(image_cv, cv2.COLOR_BGR2RGB)

    # Define the color ranges for red, green, and yellow with refined thresholds
    color_ranges = {
        "red": ([180, 0, 0], [255, 80, 80]),       # Adjusted lower and upper bounds for red
        "green": ([0, 180, 0], [80, 255, 80]),     # Adjusted lower and upper bounds for green
        "yellow": ([180, 180, 0], [255, 255, 80])  # Adjusted lower and upper bounds for yellow
    }

    # Convert the bounds to numpy arrays
    color_masks = {}
    for color, (lower, upper) in color_ranges.items():
        lower_bound = np.array(lower, dtype="uint8")
        upper_bound = np.array(upper, dtype="uint8")
        # Create masks for each color
        mask = cv2.inRange(image_rgb, lower_bound, upper_bound)
        color_masks[color] = mask

    # Extract the bounding boxes and generate YOLO labels
    color_bounding_boxes = {}
    yolo_annotations = []
    debug_img = image_cv.copy()

    class_mapping = {"red": 0, "green": 1, "yellow": 2}
    height, width = image_rgb.shape[:2]

    for color, mask in color_masks.items():
        # Find contours for each color
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        bounding_boxes = [cv2.boundingRect(contour) for contour in contours]
        color_bounding_boxes[color] = bounding_boxes

        # Convert bounding boxes to YOLO format and annotate the debug image
        for bbox in bounding_boxes:
            x, y, w, h = bbox
            # Filter out noise and very small boxes
            if w < 5 or h < 5:
                continue

            # YOLO format (normalized)
            x_center = (x + w / 2) / width
            y_center = (y + h / 2) / height
            w_norm = w / width
            h_norm = h / height
            yolo_annotations.append(f"{class_mapping[color]} {x_center:.6f} {y_center:.6f} {w_norm:.6f} {h_norm:.6f}")

            # Draw rectangle on debug image
            color_bgr = (0, 0, 255) if color == "red" else (0, 255, 0) if color == "green" else (0, 255, 255)
            cv2.rectangle(debug_img, (x, y), (x + w, y + h), color_bgr, 2)

    # Save YOLO labels
    base_name = Path(image_path).stem
    labels_dir = Path(output_dir) / "labels"
    labels_dir.mkdir(parents=True, exist_ok=True)
    with open(labels_dir / f"{base_name}.txt", "w") as f:
        f.write("\n".join(yolo_annotations))

    # Save debug image
    debug_dir = Path(output_dir) / "debug"
    debug_dir.mkdir(parents=True, exist_ok=True)
    cv2.imwrite(str(debug_dir / f"{base_name}_debug.png"), debug_img)

    print(f"Processed {image_path}: {len(yolo_annotations)} annotations")

# Rerun the function with error handling
refined_process_image("./HFDA15/Hepar - HFDA15 1 X40 - 1.tif", "output_dir")


Processed ./HFDA15/Hepar - HFDA15 1 X40 - 1.tif: 81 annotations
