In [1]:
import os
import glob
import shutil
import xml.etree.ElementTree as ET
from datetime import datetime, timedelta

def get_evenly_distributed_xml_files(base_path, parking_lots, weather_conditions, max_files_per_day=5):
    selected_xml_files_train = []
    selected_xml_files_val = []
    day_counter = 0

    for lot in parking_lots:
        for weather in weather_conditions:
            day_paths = sorted(glob.glob(os.path.join(base_path, lot, weather, '*')))
            for day_path in day_paths:
                image_files = glob.glob(os.path.join(day_path, '*.jpg'))

                if not image_files:
                    continue

                timestamps = []
                for f in image_files:
                    filename = os.path.basename(f)
                    try:
                        timestamp_str = '_'.join(filename.split('_')[1:4]).split('.')[0]  # Get the 'HH_MM_SS' part
                        timestamp = datetime.strptime(timestamp_str, '%H_%M_%S')
                        timestamps.append((timestamp, f))
                    except ValueError:
                        print(f"Warning: Unable to parse timestamp from {filename}. Skipping this file.")

                if not timestamps:
                    continue

                timestamps.sort()  # Sort by timestamp
                min_time, max_time = timestamps[0][0], timestamps[-1][0]
                time_diff = (max_time - min_time).total_seconds() / max_files_per_day

                for i in range(max_files_per_day):
                    target_time = min_time + timedelta(seconds=i * time_diff)
                    closest_image = min(timestamps, key=lambda x: abs(x[0] - target_time))[1]
                    xml_file = closest_image.replace('.jpg', '.xml')

                    if os.path.exists(xml_file):
                        if day_counter % 3 == 0:
                            selected_xml_files_val.append(xml_file)
                        else:
                            selected_xml_files_train.append(xml_file)

                day_counter += 1

    return selected_xml_files_train, selected_xml_files_val


def process_xml_file(xml_file, image_width, image_height, selected_classes):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    normalized_data = []
    for space in root.findall('.//space'):
        try:
            object_class_id = int(space.get('occupied'))
        except TypeError:
            print(f"Warning: 'occupied' attribute missing or invalid in {xml_file}, space ID {space.get('id')}. Skipping this space.")
            continue

        # Process only the specified classes
        if object_class_id not in selected_classes:
            continue

        center = space.find('.//center')
        size = space.find('.//size')

        x_center = int(center.get('x')) / image_width
        y_center = int(center.get('y')) / image_height
        width = int(size.get('w')) / image_width
        height = int(size.get('h')) / image_height

        normalized_data.append(f"{object_class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")

    return normalized_data

# Save as txt file
def save_to_txt(data, txt_file):
    with open(txt_file, 'w') as file:
        for line in data:
            file.write(line + '\n')

# Updated function to process and save files based on selected XML files
def process_and_save_selected_files(selected_xml_files, image_folder, label_folder, image_width, image_height, selected_classes=[0,1]):
    for xml_file in selected_xml_files:
        image_file = xml_file.replace('.xml', '.jpg')  # assuming JPG format
        if not os.path.exists(image_file):
            continue  # skip if corresponding image does not exist

        # Process XML for the selected classes and save TXT
        normalized_data = process_xml_file(xml_file, image_width, image_height, selected_classes)
        txt_file_name = os.path.basename(xml_file).replace('.xml', '.txt')
        txt_file_path = os.path.join(label_folder, txt_file_name)
        save_to_txt(normalized_data, txt_file_path)

        # Copy image file
        shutil.copy2(image_file, os.path.join(image_folder, os.path.basename(image_file)))

# Paths and settings
base_path = './data/PKLot/PKLot/'
processed_path = './datasets/ds7/'
train_path = os.path.join(processed_path, 'train')
val_path = os.path.join(processed_path, 'val')
image_width = 1280  # replace with actual image width
image_height = 720  # replace with actual image height
parking_lots = ['PUCPR', 'UFPR04', 'UFPR05']
weather_conditions = ['Cloudy', 'Rainy', 'Sunny']

# Create necessary directories
os.makedirs(os.path.join(train_path, 'images'), exist_ok=True)
os.makedirs(os.path.join(train_path, 'labels'), exist_ok=True)
os.makedirs(os.path.join(val_path, 'images'), exist_ok=True)
os.makedirs(os.path.join(val_path, 'labels'), exist_ok=True)

# Get selected XML files and process
selected_train_xml_files, selected_val_xml_files = get_evenly_distributed_xml_files(base_path, parking_lots, weather_conditions, max_files_per_day=3)

process_and_save_selected_files(selected_train_xml_files, os.path.join(train_path, 'images'), os.path.join(train_path, 'labels'), image_width, image_height)
process_and_save_selected_files(selected_val_xml_files, os.path.join(val_path, 'images'), os.path.join(val_path, 'labels'), image_width, image_height)

print("Processing complete.")


Processing complete.
