In [5]:
import os
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import random

In [None]:
# # Paths to the directories
# images_dir = '/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/Synthetic/images_full'
# labels_dir = '/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/Synthetic/labels_full'
# output_images_dir = '/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/Synthetic/images_resized'
# output_labels_dir = '/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/Synthetic/labels_resized'

# # Resize factor (0.5 for 50% reduction)
# resize_factor = 0.5

# # Ensure output directories exist
# os.makedirs(output_images_dir, exist_ok=True)
# os.makedirs(output_labels_dir, exist_ok=True)

# def resize_image_and_bbox(image_path, label_path, output_image_path, output_label_path, resize_factor):
#     # Open the image
#     image = Image.open(image_path)
#     width, height = image.size
    
#     # Calculate new dimensions
#     new_width = int(width * resize_factor)
#     new_height = int(height * resize_factor)
    
#     # Resize the image
#     resized_image = image.resize((new_width, new_height), Image.LANCZOS)
#     resized_image.save(output_image_path)

#     # Simply copy the YOLO label file without modifying the annotations
#     with open(label_path, 'r') as label_file:
#         lines = label_file.readlines()
    
#     with open(output_label_path, 'w') as output_label_file:
#         output_label_file.writelines(lines)

# # Process each image and label pair in the directories
# for filename in os.listdir(images_dir):
#     if filename.endswith('.jpg') or filename.endswith('.jpeg'):  # Adjust extensions as needed
#         image_path = os.path.join(images_dir, filename)
#         label_filename = filename.replace('.jpg', '.txt').replace('.jpeg', '.txt')
#         label_path = os.path.join(labels_dir, label_filename)
        
#         output_image_path = os.path.join(output_images_dir, 'resized_' + filename)
#         output_label_path = os.path.join(output_labels_dir, 'resized_' + label_filename)
        
#         if os.path.exists(label_path):
#             resize_image_and_bbox(image_path, label_path, output_image_path, output_label_path, resize_factor)
#         else:
#             print(f"Label file not found for {filename}")


In [3]:
# def plot_image_with_annotations(image_path, label_path):
#     # Open the image
#     image = Image.open(image_path)
#     width, height = image.size
    
#     # Plot the image
#     fig, ax = plt.subplots(1)
#     ax.imshow(image)
    
#     # Read bounding box annotations
#     with open(label_path, 'r') as label_file:
#         lines = label_file.readlines()
    
#     for line in lines:
#         # Parse the YOLO format: class_id, x_center, y_center, bbox_width, bbox_height
#         class_id, x_center, y_center, bbox_width, bbox_height = map(float, line.strip().split())
        
#         # Convert YOLO bbox format (relative) to actual pixel values
#         x_center *= width
#         y_center *= height
#         bbox_width *= width
#         bbox_height *= height
        
#         # Calculate the top-left corner coordinates
#         x_min = x_center - (bbox_width / 2)
#         y_min = y_center - (bbox_height / 2)
        
#         # Create a rectangle patch and add it to the plot
#         rect = patches.Rectangle((x_min, y_min), bbox_width, bbox_height, linewidth=1, edgecolor='r', facecolor='none')
#         ax.add_patch(rect)
    
#     plt.show()

# # Choose a random image from the directory
# image_files = [f for f in os.listdir(output_images_dir) if f.endswith('.jpg') or f.endswith('.jpeg')]
# if image_files:
#     random_image = random.choice(image_files)
#     image_path = os.path.join(output_images_dir, random_image)
#     label_filename = random_image.replace('.jpg', '.txt').replace('.jpeg', '.txt')
#     label_path = os.path.join(output_labels_dir, label_filename)
    
#     # Check if the label file exists and plot
#     if os.path.exists(label_path):
#         plot_image_with_annotations(image_path, label_path)
#     else:
#         print(f"Label file not found for {random_image}")
# else:
#     print("No images found in the directory.")

# DIVIDE

In [1]:
import os
import cv2
import numpy as np

def filter_bounding_boxes(labels, img_width, img_height, min_size=5, max_ratio=0.4):
    """
    Filters bounding boxes that are too small or too large.
    
    Args:
        labels (list): List of bounding boxes in YOLO format (class_id, x_center, y_center, width, height).
        img_width (int): Width of the image.
        img_height (int): Height of the image.
        min_size (int): Minimum size (in pixels) for width or height of a bounding box.
        max_ratio (float): Maximum ratio of the image size a bounding box can occupy.
        
    Returns:
        list: Filtered list of bounding boxes.
    """
    filtered_labels = []
    max_width = img_width * max_ratio
    max_height = img_height * max_ratio

    for label in labels:
        class_id, x_center, y_center, w, h = label
        abs_w = w * img_width
        abs_h = h * img_height

        # Check if the bounding box is within size constraints
        if abs_w >= min_size and abs_h >= min_size and abs_w <= max_width and abs_h <= max_height:
            filtered_labels.append(label)

    return filtered_labels

def process_images_and_labels(image_dir, label_dir, output_image_dir, output_label_dir, min_size=512):
    """
    Processes images and labels by dividing each image into a grid where each cell has a minimum size.
    Updates YOLO labels accordingly.
    
    Args:
        image_dir (str): Path to the directory containing images.
        label_dir (str): Path to the directory containing YOLO labels.
        output_image_dir (str): Path to save the processed images.
        output_label_dir (str): Path to save the updated YOLO labels.
        min_size (int): Minimum width or height of each grid cell.
    """
    if not os.path.exists(output_image_dir):
        os.makedirs(output_image_dir)
    if not os.path.exists(output_label_dir):
        os.makedirs(output_label_dir)

    # Get a list of all image files
    image_files = [f for f in os.listdir(image_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

    for image_file in image_files:
        # Load the image
        image_path = os.path.join(image_dir, image_file)
        image = cv2.imread(image_path)
        height, width, _ = image.shape

        # Determine the number of rows and columns based on min_size
        num_cols = max(1, width // min_size)
        num_rows = max(1, height // min_size)

        # Ensure the cells have equal sizes
        cell_width = width // num_cols
        cell_height = height // num_rows

        # Load corresponding label file
        label_path = os.path.join(label_dir, os.path.splitext(image_file)[0] + ".txt")
        labels = []
        if os.path.exists(label_path):
            with open(label_path, "r") as f:
                for line in f.readlines():
                    parts = line.strip().split()
                    if len(parts) >= 5:
                        labels.append([int(parts[0])] + list(map(float, parts[1:])))

        # Divide the image into grid cells
        for row in range(num_rows):
            for col in range(num_cols):
                # Determine the cropping region
                x_start = col * cell_width
                y_start = row * cell_height
                x_end = min(x_start + cell_width, width)
                y_end = min(y_start + cell_height, height)

                # Crop the image
                cropped_image = image[y_start:y_end, x_start:x_end]

                # Adjust labels for the cropped region
                updated_labels = []
                for label in labels:
                    class_id, x_center, y_center, w, h = label
                    abs_x_center = x_center * width
                    abs_y_center = y_center * height
                    abs_w = w * width
                    abs_h = h * height

                    # Check if the object is in the cropped region
                    if (abs_x_center + abs_w / 2 > x_start and abs_x_center - abs_w / 2 < x_end and
                            abs_y_center + abs_h / 2 > y_start and abs_y_center - abs_h / 2 < y_end):
                        # Adjust coordinates to the new cropped image
                        new_x_center = (abs_x_center - x_start) / (x_end - x_start)
                        new_y_center = (abs_y_center - y_start) / (y_end - y_start)
                        new_w = abs_w / (x_end - x_start)
                        new_h = abs_h / (y_end - y_start)

                        # Ensure the new bounding box is within bounds
                        if 0 <= new_x_center <= 1 and 0 <= new_y_center <= 1:
                            updated_labels.append([class_id, new_x_center, new_y_center, new_w, new_h])
                            
                # updated_labels = filter_bounding_boxes(updated_labels, x_end - x_start, y_end - y_start)

                # Save the cropped image
                cropped_image_filename = f"{os.path.splitext(image_file)[0]}_{row}_{col}.jpg"
                cropped_image_path = os.path.join(output_image_dir, cropped_image_filename)
                cv2.imwrite(cropped_image_path, cropped_image)

                # Save the updated labels
                cropped_label_filename = f"{os.path.splitext(image_file)[0]}_{row}_{col}.txt"
                cropped_label_path = os.path.join(output_label_dir, cropped_label_filename)
                with open(cropped_label_path, "w") as f:
                    for label in updated_labels:
                        f.write(f"{label[0]} {label[1]:.6f} {label[2]:.6f} {label[3]:.6f} {label[4]:.6f}\n")

    print(f"Processed {len(image_files)} images and saved results to {output_image_dir} and {output_label_dir}.")

In [5]:
image_dir = "/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/images_full"
# image_dir = "/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/Synthetic/images_full"
label_dir = "/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/labels_full"
# label_dir = "/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/Synthetic/labels_full_filtered"
output_image_dir = "/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/images_full/images_cropped"
# output_image_dir = "/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/Synthetic/images_cropped"
output_label_dir = "/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/labels_full/labels_cropped"
# output_label_dir = "/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/Synthetic/labels_cropped"

process_images_and_labels(image_dir, label_dir, output_image_dir, output_label_dir, min_size=512)

Processed 58 images and saved results to /group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/images_full/images_cropped and /group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/labels_full/labels_cropped.


In [6]:
import os

def remove_empty_labels(image_folder: str, label_folder: str, image_ext: str = ".jpg"):
    """
    Removes empty text label files and their corresponding image files.

    :param image_folder: Path to the folder containing image files.
    :param label_folder: Path to the folder containing label text files.
    :param image_ext: Image file extension (default: .jpg).
    """
    # Get all text files in the label folder
    for label_file in os.listdir(label_folder):
        label_path = os.path.join(label_folder, label_file)

        # Ensure it's a text file
        if not label_file.endswith(".txt"):
            continue

        # Check if the file is empty
        if os.path.getsize(label_path) == 0:
            # Construct corresponding image file path
            image_file = os.path.splitext(label_file)[0] + image_ext
            image_path = os.path.join(image_folder, image_file)

            # Remove the empty label file
            os.remove(label_path)
            print(f"Deleted empty label: {label_path}")

            # Remove the corresponding image if it exists
            if os.path.exists(image_path):
                os.remove(image_path)
                print(f"Deleted corresponding image: {image_path}")

# Example usage
remove_empty_labels("/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/images_cropped", "/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/labels_cropped")

Deleted empty label: /group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/labels_cropped/b20_t12_1658173678839934868_med_1_0.txt
Deleted corresponding image: /group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/images_cropped/b20_t12_1658173678839934868_med_1_0.jpg
Deleted empty label: /group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/labels_cropped/b18_t17_1658517727464934876_med_3_3.txt
Deleted corresponding image: /group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/images_cropped/b18_t17_1658517727464934876_med_3_3.jpg
Deleted empty label: /group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/labels_cropped/b16_t11_1658172936964934884_med_1_4.txt
Deleted corresponding image: /group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/images_cropped/b16_t11_1658172936964934884_med_1_4.jpg
Deleted empty label: /group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/labels_cropped/camA-1658783800964934873_1_0.txt
Deleted corresponding image: /group/jm

In [4]:
import os

def remove_labels_without_image(image_folder: str, label_folder: str, image_ext: str = ".jpg"):
    """
    Removes label text files that do not have a corresponding image file in the image folder.

    :param image_folder: Path to the folder containing image files.
    :param label_folder: Path to the folder containing label text files.
    :param image_ext: Image file extension (default: .jpg).
    """
    # Iterate over all files in the label folder
    for label_file in os.listdir(label_folder):
        label_path = os.path.join(label_folder, label_file)
        
        # Ensure the file is a text file
        if not label_file.endswith(".txt"):
            continue

        # Construct corresponding image file path from label filename
        image_file = os.path.splitext(label_file)[0] + image_ext
        image_path = os.path.join(image_folder, image_file)
        
        # If the image does not exist, remove the label file
        if not os.path.exists(image_path):
            os.remove(label_path)
            print(f"Deleted label file '{label_path}' because corresponding image '{image_path}' does not exist.")

# Example usage:
remove_labels_without_image(
    "/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/images_full",
    "/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/labels_full",
    image_ext=".jpg"  # Adjust the extension if your images use a different one
)


Deleted label file '/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/labels_full/b12_t6_1658516463716534868_med.txt' because corresponding image '/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/images_full/b12_t6_1658516463716534868_med.jpg' does not exist.
Deleted label file '/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/labels_full/b13_t28_1658172537341534855_med.txt' because corresponding image '/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/images_full/b13_t28_1658172537341534855_med.jpg' does not exist.
Deleted label file '/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/labels_full/b20_t12_1658173678841534855_med.txt' because corresponding image '/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/images_full/b20_t12_1658173678841534855_med.jpg' does not exist.
Deleted label file '/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/labels_full/b10_t27_1658516131591534868_med.txt' because corresponding image '/g

In [6]:
import os

def rename_yolo_classes(folder_path):
    """
    Goes through all YOLO label text files in a folder and changes all class indices to 0.
    
    Args:
        folder_path (str): Path to the folder containing YOLO label files.
    """
    for filename in os.listdir(folder_path):
        if filename.endswith(".txt"):  # Process only .txt files
            file_path = os.path.join(folder_path, filename)

            # Read the file and modify class labels
            with open(file_path, "r") as file:
                lines = file.readlines()

            new_lines = []
            for line in lines:
                parts = line.strip().split()
                if parts:  # Ensure the line is not empty
                    parts[0] = "0"  # Replace the class index with 0
                    new_lines.append(" ".join(parts))

            # Write the modified content back to the file
            with open(file_path, "w") as file:
                file.write("\n".join(new_lines) + "\n")

            print(f"Processed: {filename}")

# Example usage
folder_path = "/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/T4/splits/test/labels"  # Change this to your folder path
rename_yolo_classes(folder_path)

Processed: resized_camA-1658783952714934873_0_1.txt
Processed: resized_camA-1658783831339934857_0_2.txt
Processed: resized_b18_t25_1659121976839934870_med_3_3.txt
Processed: resized_b12_t3_1659120999839934870_med_2_0.txt
Processed: resized_camA-1658784198214934873_2_4.txt
Processed: resized_camA-1658783800964934873_3_0.txt
Processed: resized_camA-1658785564464934873_1_0.txt
Processed: resized_camA-1658786251964934873_2_3.txt
Processed: resized_camA-1658784198214934873_1_2.txt
Processed: resized_camA-1658783834589934873_0_4.txt
Processed: resized_b17_t25_1658173249339934868_med_3_4.txt
Processed: resized_camA-1658784198214934873_0_4.txt
Processed: resized_camA-1658784408464934873_0_3.txt
Processed: resized_camA-1658783800964934873_3_1.txt
Processed: resized_camA-1658784408464934873_2_2.txt
Processed: resized_b18_t17_1658517727466534852_med_3_4.txt
Processed: resized_b18_t17_1658517727466534852_med_3_3.txt
Processed: resized_camA-1658784198214934873_1_4.txt
Processed: resized_camA-165878

In [8]:
import os
import shutil
from tqdm import tqdm
from sklearn.model_selection import train_test_split

def split_yolo_dataset(images_dir, labels_dir, output_dir, train_size=0.7, val_size=0.15, test_size=0.15, random_seed=42):
    assert train_size + val_size + test_size == 1.0, "Splits must sum to 1"

    images = sorted([f for f in os.listdir(images_dir) if os.path.isfile(os.path.join(images_dir, f))])
    
    train_imgs, temp_imgs = train_test_split(images, train_size=train_size, random_state=random_seed)
    val_imgs, test_imgs = train_test_split(temp_imgs, test_size=test_size/(test_size + val_size), random_state=random_seed)

    splits = {'train': train_imgs, 'val': val_imgs, 'test': test_imgs}

    for split_name, split_imgs in splits.items():
        img_output_path = os.path.join(output_dir, split_name, 'images')
        lbl_output_path = os.path.join(output_dir, split_name, 'labels')

        os.makedirs(img_output_path, exist_ok=True)
        os.makedirs(lbl_output_path, exist_ok=True)

        for img_file in tqdm(split_imgs):
            label_file = os.path.splitext(img_file)[0] + '.txt'

            shutil.copy(os.path.join(images_dir, img_file), os.path.join(img_output_path, img_file))
            shutil.copy(os.path.join(labels_dir, label_file), os.path.join(lbl_output_path, label_file))

# Usage
split_yolo_dataset(
    images_dir='/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/Synthetic/images_cropped',
    labels_dir='/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/Synthetic/labels_cropped',
    output_dir='/group/jmearlesgrp/data/AGILE-Datasets/Flower-Detection/Synthetic/splits',
    train_size=0.8,
    val_size=0.1,
    test_size=0.1
)


100%|██████████| 2852/2852 [00:28<00:00, 101.79it/s]
100%|██████████| 356/356 [00:03<00:00, 109.81it/s]
100%|██████████| 357/357 [00:03<00:00, 95.80it/s] 
