In [2]:
import xml.etree.ElementTree as ET
import os
import shutil

# Define paths
input_folder = r"C:\Users\me513\Downloads\images"# Folder containing multiple subfolders with XML and image files
output_labels_folder = r'C:\Users\me513\Downloads\labels'  # Folder for YOLO text files
output_images_folder = r'C:\Users\me513\Downloads\image'  # Folder for images

# Ensure output directories exist
os.makedirs(output_labels_folder, exist_ok=True)
os.makedirs(output_images_folder, exist_ok=True)

# Define image dimensions (assuming all images are the same size; adjust if necessary)
image_width = 1700
image_height = 2200

# Class mapping (adjust as needed)
class_mapping = {
    "Figure": 1,
    "Table": 0
}

# Function to convert XML to YOLO format
def convert_xml_to_yolo(xml_file, output_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    
    with open(output_file, 'w') as f:
        for obj in root.findall('object'):
            class_name = obj.find('name').text
            if class_name not in class_mapping:
                print(f"Warning: Class '{class_name}' not found in class mapping. Skipping.")
                continue  # Skip if class name not recognized

            class_id = class_mapping[class_name]

            bbox = obj.find('bndbox')
            xmin = float(bbox.find('xmin').text)
            ymin = float(bbox.find('ymin').text)
            xmax = float(bbox.find('xmax').text)
            ymax = float(bbox.find('ymax').text)

            # Convert to YOLO format (normalized)
            x_center = ((xmin + xmax) / 2) / image_width
            y_center = ((ymin + ymax) / 2) / image_height
            width = (xmax - xmin) / image_width
            height = (ymax - ymin) / image_height
            
            # Write in YOLO format
            f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

# Initialize counters for naming files in ascending order
image_counter = 1
label_counter = 1

# Recursively traverse each subfolder in the input folder
for root_dir, sub_dirs, files in os.walk(input_folder):
    for filename in files:
        # Process XML files for YOLO labels
        if filename.endswith('.xml'):
            xml_file_path = os.path.join(root_dir, filename)
            # Define the output text file path with ascending order naming
            output_txt_path = os.path.join(output_labels_folder, f"{label_counter:05d}.txt")
            convert_xml_to_yolo(xml_file_path, output_txt_path)
            print(f"Converted {filename} to {output_txt_path}")
            label_counter += 1

        # Process PNG files for images
        elif filename.endswith('.png'):
            image_file_path = os.path.join(root_dir, filename)
            # Define the output image file path with ascending order naming
            output_image_path = os.path.join(output_images_folder, f"{image_counter:05d}.png")
            shutil.copy(image_file_path, output_image_path)
            print(f"Copied {filename} to {output_image_path}")
            image_counter += 1

print("All files processed successfully.")


Copied 00001.png to C:\Users\me513\Downloads\image\00001.png
Converted 00001.xml to C:\Users\me513\Downloads\labels\00001.txt
Copied 00002.png to C:\Users\me513\Downloads\image\00002.png
Converted 00002.xml to C:\Users\me513\Downloads\labels\00002.txt
Copied 00003.png to C:\Users\me513\Downloads\image\00003.png
Converted 00003.xml to C:\Users\me513\Downloads\labels\00003.txt
Copied 00004.png to C:\Users\me513\Downloads\image\00004.png
Converted 00004.xml to C:\Users\me513\Downloads\labels\00004.txt
Copied 00005.png to C:\Users\me513\Downloads\image\00005.png
Converted 00005.xml to C:\Users\me513\Downloads\labels\00005.txt
Copied 00006.png to C:\Users\me513\Downloads\image\00006.png
Converted 00006.xml to C:\Users\me513\Downloads\labels\00006.txt
Copied 00007.png to C:\Users\me513\Downloads\image\00007.png
Converted 00007.xml to C:\Users\me513\Downloads\labels\00007.txt
Copied 00008.png to C:\Users\me513\Downloads\image\00008.png
Converted 00008.xml to C:\Users\me513\Downloads\labels\00

In [None]:
import os
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import xml.etree.ElementTree as ET

# Function to parse XML annotations
def parse_xml(xml_path):
    """
    Parse an XML file to extract bounding box coordinates.
    
    Parameters:
    - xml_path: Path to the XML file.
    
    Returns:
    - List of bounding boxes, each in the format [xmin, ymin, xmax, ymax].
    """
    tree = ET.parse(xml_path)
    root = tree.getroot()
    boxes = []
    
    for obj in root.findall('object'):
        bbox = obj.find('bndbox')
        xmin = int(bbox.find('xmin').text)
        ymin = int(bbox.find('ymin').text)
        xmax = int(bbox.find('xmax').text)
        ymax = int(bbox.find('ymax').text)
        boxes.append([xmin, ymin, xmax, ymax])
    
    return boxes

# Function to display an image with annotation boxes
# Function to display an image with annotation boxes
def show_image_with_boxes(image_path, boxes):
    """
    Display an image with annotation boxes.
    
    Parameters:
    - image_path: Path to the image file.
    - boxes: List of bounding boxes, each in the format [xmin, ymin, xmax, ymax].
    """
    image = Image.open(image_path)
    fig, ax = plt.subplots(1)
    ax.imshow(image)
    
    for box in boxes:
        xmin, ymin, xmax, ymax = box
        rect = patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, linewidth=2, edgecolor='red', facecolor='none')
        ax.add_patch(rect)
    
    plt.show()
    plt.close(fig)  # Close the figure after displaying


# Main function to loop over a directory
def process_all_images_with_annotations(folder_path, image_extension='.png', annotation_extension='.xml'):
    """
    Loop through all images and annotations in a folder and display each image with its boxes.
    
    Parameters:
    - folder_path: Path to the folder containing images and XML annotations.
    - image_extension: Extension for image files (e.g., '.jpg').
    - annotation_extension: Extension for annotation files (e.g., '.xml').
    """
    for filename in os.listdir(folder_path):
        if filename.endswith(image_extension):
            # Image and annotation filenames
            image_path = os.path.join(folder_path, filename)
            xml_path = os.path.join(folder_path, filename.replace(image_extension, annotation_extension))
            
            if os.path.exists(xml_path):  # Check if annotation file exists
                boxes = parse_xml(xml_path)
                show_image_with_boxes(image_path, boxes)
                print(xml_path)
            else:
                print(f"No annotation file found for {filename}")

# Example usage
folder_path = r"C:\Users\me513\Downloads\images"
process_all_images_with_annotations(folder_path)


In [5]:
import torch
torch.cuda.is_available()


True

In [None]:
from ultralytics import YOLO
import torchvision.ops as ops
# Load a pretrained YOLO model
model = YOLO("yolo11n.pt")

boxes = ops.nms(boxes, scores, iou_thres)

results = model.train(
    data=r"C:\Users\me513\Downloads\yolo\data.yaml",  # path to your YAML file
    batch=1,  # You can try increasing this depending on your GPU
    epochs=100,
    imgsz=640,  # Try a smaller image size like 640 for faster training
    device=0  # Specify GPU if you have one (use -1 for CPU)
)


In [2]:
import os
from collections import defaultdict

# Path to the directory with YOLO label files
labels_dir = r"C:\Users\me513\Downloads\yolo\train\labels"
class_counts = defaultdict(int)

# Loop through all .txt files in the labels directory
for label_file in os.listdir(labels_dir):
    if label_file.endswith('.txt'):
        # Read each line in the file and count class instances
        with open(os.path.join(labels_dir, label_file), 'r') as file:
            for line in file:
                class_id = int(line.split()[0])  # Extract the class ID (first element)
                class_counts[class_id] += 1      # Increment count for the class ID

# Print the number of instances for each class
for class_id, count in class_counts.items():
    print(f"Class {class_id}: {count} instances")


Class 1: 702 instances
Class 0: 1541 instances


In [None]:

# Evaluate the model on the test dataset
results = model.val(data=r"C:\Users\me513\Downloads\projects\data.yaml")

# Print evaluation results
print(results)