In [8]:
import glob
import os
import shutil
#import random
import xml.etree.ElementTree as ET

# Set the seed for reproducibility
#random.seed(42)


In [9]:
# XML processing
def process_xml_file(xml_file, image_width, image_height, selected_classes):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    normalized_data = []
    for space in root.findall('.//space'):
        try:
            object_class_id = int(space.get('occupied'))
        except TypeError:
            print(f"Warning: 'occupied' attribute missing or invalid in {xml_file}, space ID {space.get('id')}. Skipping this space.")
            continue

        # Process only the specified classes
        if object_class_id not in selected_classes:
            continue

        center = space.find('.//center')
        size = space.find('.//size')

        x_center = int(center.get('x')) / image_width
        y_center = int(center.get('y')) / image_height
        width = int(size.get('w')) / image_width
        height = int(size.get('h')) / image_height

        normalized_data.append(f"{object_class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")

    return normalized_data

# Save as txt file
def save_to_txt(data, txt_file):
    with open(txt_file, 'w') as file:
        for line in data:
            file.write(line + '\n')
            
# Function to process and save files (same as before, but now includes selected_classes)
def process_and_save_files(every_nth_image, date_list, base_path, image_folder, label_folder, selected_classes=[0,1]):
    counter = 0
    for date in date_list:
        if counter % every_nth_image == 0: 
            date_path = os.path.join(base_path, date)
            for xml_file in glob.glob(os.path.join(date_path, '*.xml')):
                image_file = xml_file.replace('.xml', '.jpg')  # assuming JPG format
                if not os.path.exists(image_file):
                    continue  # skip if corresponding image does not exist

                # Process XML for the selected classes and save TXT
                normalized_data = process_xml_file(xml_file, image_width, image_height, selected_classes)
                txt_file_name = os.path.basename(xml_file).replace('.xml', '.txt')
                txt_file_path = os.path.join(label_folder, txt_file_name)
                save_to_txt(normalized_data, txt_file_path)

                # Copy image file
                shutil.copy2(image_file, os.path.join(image_folder, os.path.basename(image_file)))
        counter += 1


# Paths and settings
base_path = './data/PKLot/PKLot/'
processed_path = './datasets/ds3/'
train_path = os.path.join(processed_path, 'train')
val_path = os.path.join(processed_path, 'val')
image_width = 1280  # replace with actual image width
image_height = 720  # replace with actual image height
#selected_classes = [0, 1]  # Choose [0], [1], or [0, 1] depending on the classes you want to process

# Create necessary directories
os.makedirs(os.path.join(train_path, 'images'), exist_ok=True)
os.makedirs(os.path.join(train_path, 'labels'), exist_ok=True)
os.makedirs(os.path.join(val_path, 'images'), exist_ok=True)
os.makedirs(os.path.join(val_path, 'labels'), exist_ok=True)

# Training and validation dates
train_dates = ['PUCPR/Sunny/2012-10-15', 'PUCPR/Rainy/2012-09-21', 'PUCPR/Cloudy/2012-11-11',
               'UFPR04/Sunny/2013-01-29', 'UFPR04/Rainy/2012-12-14', 'UFPR04/Cloudy/2013-01-16']
val_dates = ['PUCPR/Sunny/2012-10-17', 'PUCPR/Rainy/2012-11-10', 'PUCPR/Cloudy/2012-11-08',
             'UFPR04/Sunny/2012-12-20', 'UFPR04/Rainy/2012-12-11','UFPR04/Cloudy/2013-01-15']

process_and_save_files(3, train_dates, base_path, os.path.join(train_path, 'images'), os.path.join(train_path, 'labels'))
process_and_save_files(3, val_dates, base_path, os.path.join(val_path, 'images'), os.path.join(val_path, 'labels'))

print("Processing complete.")

Processing complete.
