In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import torch

# Check if CUDA is available
if torch.cuda.is_available():
    print("CUDA is available!")
    print("Number of GPUs:", torch.cuda.device_count())
    print("Current GPU:", torch.cuda.current_device())
    print("GPU Name:", torch.cuda.get_device_name(torch.cuda.current_device()))
else:
    print("CUDA is not available.")


CUDA is not available.


In [3]:
import os
import xml.etree.ElementTree as ET
from PIL import Image

# Paths to the directories
annotations_dir = "/content/drive/MyDrive/all/train/annotations_transformed2"
images_dir = "/content/drive/MyDrive/all/train/images_transformed"

# Get lists of files
annotation_files = [f for f in os.listdir(annotations_dir) if f.endswith('.xml')]
image_files = [f for f in os.listdir(images_dir) if f.endswith('.jpg')]

# Extract image names without extensions
image_names = {os.path.splitext(f)[0]: f for f in image_files}

# Initialize lists for invalid files
missing_images = []
invalid_annotations = []
small_boxes = []
out_of_bounds_boxes = []

# Minimum bounding box size (width, height)
MIN_BOX_SIZE = 10

# Validate each annotation file
for annotation_file in annotation_files:
    annotation_path = os.path.join(annotations_dir, annotation_file)

    # Check if corresponding image exists
    image_name = os.path.splitext(annotation_file)[0]
    if image_name not in image_names:
        missing_images.append(annotation_file)
        continue

    image_path = os.path.join(images_dir, image_names[image_name])

    # Check if the XML file is valid
    try:
        tree = ET.parse(annotation_path)
        root = tree.getroot()

        # Load the corresponding image to get dimensions
        with Image.open(image_path) as img:
            img_width, img_height = img.size

        # Check bounding boxes
        for obj in root.findall("object"):
            bndbox = obj.find("bndbox")
            if bndbox is not None:
                xmin = int(bndbox.find("xmin").text)
                ymin = int(bndbox.find("ymin").text)
                xmax = int(bndbox.find("xmax").text)
                ymax = int(bndbox.find("ymax").text)

                # Check if box is too small
                if (xmax - xmin) < MIN_BOX_SIZE or (ymax - ymin) < MIN_BOX_SIZE:
                    small_boxes.append(annotation_file)
                    continue

                # Check if box falls outside image boundaries
                if xmin < 0 or ymin < 0 or xmax > img_width or ymax > img_height:
                    out_of_bounds_boxes.append(annotation_file)
                    continue

    except ET.ParseError:
        invalid_annotations.append(annotation_file)

# Print results
if missing_images:
    print(f"Annotations missing corresponding images ({len(missing_images)}):")
    print("\n".join(missing_images))
else:
    print("All annotations have corresponding images.")

if invalid_annotations:
    print(f"Invalid annotation files ({len(invalid_annotations)}):")
    print("\n".join(invalid_annotations))
else:
    print("All annotations are valid.")

if small_boxes:
    print(f"Annotations with bounding boxes that are too small ({len(small_boxes)}):")
    print("\n".join(small_boxes))
else:
    print("No small bounding boxes found.")

if out_of_bounds_boxes:
    print(f"Annotations with bounding boxes outside image boundaries ({len(out_of_bounds_boxes)}):")
    print("\n".join(out_of_bounds_boxes))
else:
    print("No out-of-bounds bounding boxes found.")
