In [None]:
# prompt: create colab code for the following look for xml annotations in "/content/drive/MyDrive/all/train/annotations_transformed" and "/content/drive/MyDrive/all/val/annotations_transformed". the corresponding images are in "/content/drive/MyDrive/all/train/annotations_transformed" and "/content/drive/MyDrive/all/val/annotations_transformed", respectively. look for annotations with empty bounding boxes. and where these are found, delete the bounding boxes. all the remaining xml files with valid bounding boxes should be copied to the new folders "/content/drive/MyDrive/all/train/annotations_transformed2" and "/content/drive/MyDrive/all/val/annotations_transformed2", respectively

import os
import xml.etree.ElementTree as ET
import shutil

def process_annotations(source_dir, dest_dir):
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)
    for filename in os.listdir(source_dir):
        if filename.endswith(".xml"):
            filepath = os.path.join(source_dir, filename)
            try:
                tree = ET.parse(filepath)
                root = tree.getroot()
                objects_to_remove = []
                for obj in root.findall('object'):
                    bndbox = obj.find('bndbox')
                    if bndbox is not None:
                        xmin = int(bndbox.find('xmin').text)
                        ymin = int(bndbox.find('ymin').text)
                        xmax = int(bndbox.find('xmax').text)
                        ymax = int(bndbox.find('ymax').text)
                        if xmin == xmax or ymin == ymax or xmin > xmax or ymin > ymax :
                            objects_to_remove.append(obj)

                for obj in objects_to_remove:
                    root.remove(obj)

                if len(root.findall('object')) > 0: # Only copy if there's at least one object
                  dest_filepath = os.path.join(dest_dir, filename)
                  tree.write(dest_filepath)
            except ET.ParseError as e:
                print(f"Error parsing {filename}: {e}")

# Process train annotations
train_source_dir = "/content/drive/MyDrive/all/train/annotations_transformed"
train_dest_dir = "/content/drive/MyDrive/all/train/annotations_transformed2"
process_annotations(train_source_dir, train_dest_dir)

# Process validation annotations
val_source_dir = "/content/drive/MyDrive/all/val/annotations_transformed"
val_dest_dir = "/content/drive/MyDrive/all/val/annotations_transformed2"
process_annotations(val_source_dir, val_dest_dir)

In [None]:
# prompt: create colab code for the following look for xml annotations in "/content/drive/MyDrive/all/train/annotations_transformed" and "/content/drive/MyDrive/all/val/annotations_transformed". the corresponding images are in "/content/drive/MyDrive/all/train/images" and "/content/drive/MyDrive/all/val/images", respectively. look for annotations with empty bounding boxes. and where these are found, delete the bounding boxes. all the remaining xml files with valid bounding boxes should be copied to the new folders "/content/drive/MyDrive/all/train/annotations_transformed2" and "/content/drive/MyDrive/all/val/annotations_transformed2", respectively. at the same time the img files matching the valid annotations should be copied to  "/content/drive/MyDrive/all/train/images2" and "/content/drive/MyDrive/all/val/images2", respectively

import os
import xml.etree.ElementTree as ET
import shutil

def process_annotations(source_annotation_dir, source_image_dir, dest_annotation_dir, dest_image_dir):
    if not os.path.exists(dest_annotation_dir):
        os.makedirs(dest_annotation_dir)
    if not os.path.exists(dest_image_dir):
        os.makedirs(dest_image_dir)

    for filename in os.listdir(source_annotation_dir):
        if filename.endswith(".xml"):
            annotation_filepath = os.path.join(source_annotation_dir, filename)
            try:
                tree = ET.parse(annotation_filepath)
                root = tree.getroot()
                objects_to_remove = []
                for obj in root.findall('object'):
                    bndbox = obj.find('bndbox')
                    if bndbox is not None:
                        xmin = int(bndbox.find('xmin').text)
                        ymin = int(bndbox.find('ymin').text)
                        xmax = int(bndbox.find('xmax').text)
                        ymax = int(bndbox.find('ymax').text)
                        if xmin == xmax or ymin == ymax or xmin > xmax or ymin > ymax:
                            objects_to_remove.append(obj)

                for obj in objects_to_remove:
                    root.remove(obj)

                if len(root.findall('object')) > 0:  # Only copy if there's at least one object
                    dest_annotation_filepath = os.path.join(dest_annotation_dir, filename)
                    tree.write(dest_annotation_filepath)

                    #copy corresponding image file
                    image_filename = filename[:-4] + ".jpg" # Assuming images are JPGs
                    source_image_path = os.path.join(source_image_dir, image_filename)
                    dest_image_path = os.path.join(dest_image_dir, image_filename)
                    if os.path.exists(source_image_path):
                        shutil.copy2(source_image_path, dest_image_path)
                    else:
                        print(f"Warning: Image file not found for {filename}: {source_image_path}")

            except ET.ParseError as e:
                print(f"Error parsing {filename}: {e}")
            except Exception as e:
                print(f"An unexpected error occurred while processing {filename}: {e}")


# Process train data
train_source_annotation_dir = "/content/drive/MyDrive/all/train/annotations_transformed"
train_source_image_dir = "/content/drive/MyDrive/all/train/images"
train_dest_annotation_dir = "/content/drive/MyDrive/all/train/annotations_transformed2"
train_dest_image_dir = "/content/drive/MyDrive/all/train/images2"
process_annotations(train_source_annotation_dir, train_source_image_dir, train_dest_annotation_dir, train_dest_image_dir)

# Process validation data
val_source_annotation_dir = "/content/drive/MyDrive/all/val/annotations_transformed"
val_source_image_dir = "/content/drive/MyDrive/all/val/images"
val_dest_annotation_dir = "/content/drive/MyDrive/all/val/annotations_transformed2"
val_dest_image_dir = "/content/drive/MyDrive/all/val/images2"
process_annotations(val_source_annotation_dir, val_source_image_dir, val_dest_annotation_dir, val_dest_image_dir)

# from chatgpt

In [None]:
# prompt: mount drive

from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import os
import shutil
import xml.etree.ElementTree as ET

# Define paths
train_annotation_dir = "/content/drive/MyDrive/all/train/annotations_transformed"
val_annotation_dir = "/content/drive/MyDrive/all/val/annotations_transformed"
train_image_dir = "/content/drive/MyDrive/all/train/images_transformed"
val_image_dir = "/content/drive/MyDrive/all/val/images_transformed"

train_annotation_output_dir = "/content/drive/MyDrive/all/train/annotations_transformed2"
val_annotation_output_dir = "/content/drive/MyDrive/all/val/annotations_transformed2"
train_image_output_dir = "/content/drive/MyDrive/all/train/images0"
val_image_output_dir = "/content/drive/MyDrive/all/val/images0"

# Create output directories if they don't exist
os.makedirs(train_annotation_output_dir, exist_ok=True)
os.makedirs(val_annotation_output_dir, exist_ok=True)
os.makedirs(train_image_output_dir, exist_ok=True)
os.makedirs(val_image_output_dir, exist_ok=True)

def process_annotations(annotation_dir, image_dir, annotation_output_dir, image_output_dir):
    """
    Process XML annotations to remove entries with empty bounding boxes and copy valid ones.
    Corresponding images are also copied to the new directory.
    """
    for filename in os.listdir(annotation_dir):
        if filename.endswith(".xml"):
            annotation_path = os.path.join(annotation_dir, filename)
            tree = ET.parse(annotation_path)
            root = tree.getroot()

            # Remove objects with invalid bounding boxes
            valid_objects = []
            for obj in root.findall("object"):
                bndbox = obj.find("bndbox")
                xmin = float(bndbox.find("xmin").text)
                ymin = float(bndbox.find("ymin").text)
                xmax = float(bndbox.find("xmax").text)
                ymax = float(bndbox.find("ymax").text)

                # Keep the object only if it has a valid bounding box
                if xmax > xmin and ymax > ymin:
                    valid_objects.append(obj)

            # If there are valid objects, save the annotation and copy the corresponding image
            if valid_objects:
                for obj in root.findall("object"):
                    root.remove(obj)  # Remove all objects
                for valid_obj in valid_objects:
                    root.append(valid_obj)  # Add only valid objects

                # Save the modified XML file
                output_annotation_path = os.path.join(annotation_output_dir, filename)
                tree.write(output_annotation_path)

                # Copy the corresponding image
                image_filename = filename.replace(".xml", ".jpg")  # Assuming images are .jpg
                image_path = os.path.join(image_dir, image_filename)
                output_image_path = os.path.join(image_output_dir, image_filename)

                if os.path.exists(image_path):
                    shutil.copy(image_path, output_image_path)
                else:
                    print(f"Image {image_filename} not found for annotation {filename}.")

# Process train and validation annotations
process_annotations(train_annotation_dir, train_image_dir, train_annotation_output_dir, train_image_output_dir)
process_annotations(val_annotation_dir, val_image_dir, val_annotation_output_dir, val_image_output_dir)

print("Processing complete. Valid annotations and images have been copied to new directories.")


Processing complete. Valid annotations and images have been copied to new directories.


In [None]:
# prompt: count total images in "/content/drive/MyDrive/all/train/images2" and "/content/drive/MyDrive/all/val/images2", respectively

import os

# Define the paths to the image directories
train_image_dir = "/content/drive/MyDrive/all/train/images0"
val_image_dir = "/content/drive/MyDrive/all/val/images0"

# Count the number of images in each directory
train_image_count = len([name for name in os.listdir(train_image_dir) if os.path.isfile(os.path.join(train_image_dir, name))])
val_image_count = len([name for name in os.listdir(val_image_dir) if os.path.isfile(os.path.join(val_image_dir, name))])

# Print the results
print(f"Total images in '{train_image_dir}': {train_image_count}")
print(f"Total images in '{val_image_dir}': {val_image_count}")

Total images in '/content/drive/MyDrive/all/train/images0': 979
Total images in '/content/drive/MyDrive/all/val/images0': 189
