In [2]:
import json
import cv2
import os
import requests
from urllib.parse import urlparse
from urllib.parse import urljoin
import os
import shutil
import numpy as np
import xml.etree.ElementTree as ET 

In [3]:
def load_xml(xml_file_path):
    try:
        tree = ET.parse(xml_file_path)
        root = tree.getroot()
        return root
    except Exception as e:
        print(f"Error loading XML file '{xml_file_path}': {e}")
        return None

In [4]:
def download_image(image_path, output_directory):
    # Ensure the output directory exists
    os.makedirs(output_directory, exist_ok=True)

    # Extract the image filename from the path
    image_filename = os.path.basename(image_path)

    # Construct the full path to the image
    full_image_path = os.path.abspath(image_path)

    # Ensure that the image file exists and is not empty
    if os.path.exists(full_image_path) and os.path.getsize(full_image_path) > 0:
        # Check if the image already exists in the output directory
        destination_path = os.path.join(output_directory, image_filename)
        if not os.path.exists(destination_path):
            shutil.copy(full_image_path, destination_path)
        return destination_path
    else:
        print(f"Image file '{full_image_path}' is missing or empty.")
        return None


In [5]:
def extract_and_save_labeled_regions(xml_data, image, cut_output_directory, folder_no_star, folder_with_star, labeled_output_directory):
    objects = xml_data.findall(".//object")

    os.makedirs(cut_output_directory, exist_ok=True)
    os.makedirs(folder_no_star, exist_ok=True)
    os.makedirs(folder_with_star, exist_ok=True)
    os.makedirs(labeled_output_directory, exist_ok=True)

    image_name = os.path.splitext(xml_data.findtext(".//filename"))[0]

    # Save the original image with labeled regions to the labeled output directory
    original_image_path = os.path.join(labeled_output_directory, f'original_{image_name}.jpg')
    cv2.imwrite(original_image_path, image)

    # Create a counter for the cut region image filenames
    cut_filename_counter = 0

    for obj in objects:
        label = obj.findtext("name")
        bndbox = obj.find("bndbox")

        xmin = int(bndbox.findtext("xmin"))
        ymin = int(bndbox.findtext("ymin"))
        xmax = int(bndbox.findtext("xmax"))
        ymax = int(bndbox.findtext("ymax"))

        print(f"Label: {label}")
        print(f"Bounding Box: ({xmin}, {ymin}, {xmax}, {ymax})")

        region = image[ymin:ymax, xmin:xmax]

        # Generate a unique filename for the cut region image
        cut_filename = f'{image_name}_{cut_filename_counter + 1}.jpg'
        cut_filepath = os.path.join(cut_output_directory, cut_filename)

        print(f"Cut filename: {cut_filepath}")
        try:
            cv2.imwrite(cut_filepath, region)
        except Exception as e:
            print(f"An error occurred while saving the cut region: {e}")

        # Categorize the cutted photo based on the presence of "*"
        if '1' in label:
            destination_folder = folder_with_star
        else:
            destination_folder = folder_no_star

        # Move the cutted photo to the appropriate folder
        destination_path = os.path.join(destination_folder, cut_filename)
        shutil.move(cut_filepath, destination_path)

        # Increment the cut region image filename counter
        cut_filename_counter += 1

    # Check if there are no labels and delete the XML file
    if cut_filename_counter == 0:
        try:
            os.remove(os.path.join(xml_directory, f'{image_name}.xml'))
            print(f"Deleted XML file with no labels: {image_name}.xml")
        except Exception as e:
            print(f"An error occurred while deleting the XML file: {e}")


In [6]:
if __name__ == '__main__':
    xml_directory = r'C:\Users\hamza\OneDrive\Desktop\mature and immature\Pascal VOC Format'
    labeled_output_directory = r'C:\Users\hamza\OneDrive\Desktop\mature and immature\cascaaaaa' #not important as i remember
    cut_output_directory = r'C:\Users\hamza\OneDrive\Desktop\mature and immature\acsa'
    folder_no_star = r'C:\Users\hamza\OneDrive\Desktop\mature and immature\Immature'
    folder_with_star = r'C:\Users\hamza\OneDrive\Desktop\mature and immature\Mature'

    for filename in os.listdir(xml_directory):
        if filename.endswith('.xml'):
            xml_file_path = os.path.join(xml_directory, filename)
            xml_data = load_xml(xml_file_path)

            if xml_data is not None:
                image_path = xml_data.findtext(".//path")
                image = cv2.imread(image_path)

                if image is not None:
                    try:
                        extract_and_save_labeled_regions(xml_data, image, cut_output_directory, folder_no_star, folder_with_star, labeled_output_directory)
                    except PermissionError as pe:
                        print(f"PermissionError: {pe}. Could not move file '{xml_file_path}' to the destination folder.")

            print(f"Processed: {filename}")

    print("Processing completed for all XML files.")

Label: 0
Bounding Box: (1087, 213, 1166, 302)
Cut filename: C:\Users\hamza\OneDrive\Desktop\mature and immature\acsa\1673327431124_1.jpg
Label: 0
Bounding Box: (1194, 279, 1258, 341)
Cut filename: C:\Users\hamza\OneDrive\Desktop\mature and immature\acsa\1673327431124_2.jpg
Label: 0
Bounding Box: (1106, 180, 1176, 239)
Cut filename: C:\Users\hamza\OneDrive\Desktop\mature and immature\acsa\1673327431124_3.jpg
Label: 0
Bounding Box: (1227, 236, 1263, 279)
Cut filename: C:\Users\hamza\OneDrive\Desktop\mature and immature\acsa\1673327431124_4.jpg
Label: 0
Bounding Box: (1743, 71, 1891, 215)
Cut filename: C:\Users\hamza\OneDrive\Desktop\mature and immature\acsa\1673327431124_5.jpg
Label: 1
Bounding Box: (1572, 1, 1751, 109)
Cut filename: C:\Users\hamza\OneDrive\Desktop\mature and immature\acsa\1673327431124_6.jpg
Label: 0
Bounding Box: (1511, 322, 1563, 376)
Cut filename: C:\Users\hamza\OneDrive\Desktop\mature and immature\acsa\1673327431124_7.jpg
Processed: 1673327431124.xml
Label: 0
Boundi