In [4]:
import matplotlib.pyplot as plt
import numpy as np
import os
import xml.etree.ElementTree as ET
from PIL import Image

In [8]:
folder_path = "./dataset/annotations"
output_folder = "./dataset/processed"

# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Iterate over each file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith(".xml"):
        file_path = os.path.join(folder_path, filename)
        tree = ET.parse(file_path)
        root = tree.getroot()
        
        image_folder = None
        image_filename = None
        objects = []
        
        # Find image folder, filename, and objects
        for elem in root.iter():
            if elem.tag == "folder":
                image_folder = elem.text
            elif elem.tag == "filename":
                image_filename = elem.text
            elif elem.tag == "object":
                obj = {}
                for sub_elem in elem:
                    obj[sub_elem.tag] = sub_elem.text
                    if sub_elem.tag == "bndbox":
                        bbox = {}
                        for bbox_elem in sub_elem:
                            bbox[bbox_elem.tag] = int(bbox_elem.text)
                        obj[sub_elem.tag] = bbox
                objects.append(obj)
        
        if image_folder is not None and image_filename is not None:
            # Construct full path to the image
            image_path = os.path.join('./dataset/', image_folder, image_filename)
            # Open the image using PIL
            image = Image.open(image_path)
            
            # Crop and save each object
            for obj in objects:
                name = obj.get("name")
                bndbox = obj.get("bndbox")
                xmin, ymin, xmax, ymax = bndbox["xmin"], bndbox["ymin"], bndbox["xmax"], bndbox["ymax"]
                cropped_image = image.crop((xmin, ymin, xmax, ymax))
                cropped_image = cropped_image.resize((100,100))

                # Create a folder with the name if it doesn't exist
                output_obj_folder = os.path.join(output_folder, name)
                os.makedirs(output_obj_folder, exist_ok=True)
                # Save the cropped image
                cropped_image.save(os.path.join(output_obj_folder, f"{image_filename.split('.')[0]}_{name}.png"))
