In [None]:
import os
import re
import shutil
import pickle
from tqdm import tqdm
from collections import defaultdict

def consolidate_porto_anomaly_images(traj_id_str):
    """
    Consolidate all related anomaly detection images for the specified trajectory_id
    into a new subdirectory and rename files according to rules.
    Returns (target directory path, ordered file list)

    Args:
        traj_id_str: Trajectory ID string
    Returns:
        tuple: (target directory path, ordered file list)
    """
    # Base path configuration
    base_dir = '../data'
    target_consolidation_dir = f"../consolidated_data/consolidated_{traj_id_str}"

    # Source directory configuration
    source_configurations = [
        {
            "name_prefix": "poi",
            "source_dir": os.path.join(base_dir, "poi_trajectory_images"),
            "pattern": rf"^{traj_id_str}(_segment_\d+)?\.png$"  # Match main and segment images
        },
        {
            "name_prefix": "road_structure",
            "source_dir": os.path.join(base_dir, "road_structure_images"),
            "pattern": rf"^{traj_id_str}(_segment_\d+)?\.png$"  # Match main and segment images
        },
        {
            "name_prefix": "last_trajectory",
            "source_dir": os.path.join(base_dir, "last_trajectory_images"),
            "pattern": rf"^trajectory_{traj_id_str}_\d+\.png$"  # Match images in last_trajectory_images
        }
    ]

    # Check if target directory already exists
    if os.path.exists(target_consolidation_dir):
        return None, []

    # Create target directory
    try:
        os.makedirs(target_consolidation_dir, exist_ok=True)
    except Exception as e:
        print(f"Error creating directory '{target_consolidation_dir}': {e}")
        return None, []

    copied_files = []  # Store copied file information

    for config in source_configurations:
        source_dir_abs = config["source_dir"]
        file_source_prefix = config["name_prefix"]
        pattern = config["pattern"]

        if not os.path.isdir(source_dir_abs):
            print(f"Source directory does not exist: {source_dir_abs}")
            continue

        # Use different processing logic based on different source directories
        if file_source_prefix == "last_trajectory":
            # Process last_trajectory_images folder
            for original_filename in os.listdir(source_dir_abs):
                # Match files like trajectory_04d6595f4e9fb466dade447a6989445c_17.png
                last_traj_pattern = re.compile(rf"^trajectory_{traj_id_str}_(\d+)\.png$")
                match = last_traj_pattern.match(original_filename)

                if match:
                    # Extract sequence number
                    sequence_num = match.group(1)
                    # Rename to last_trajectory_123_17.png format
                    target_filename = f"last_trajectory_{traj_id_str}_{sequence_num}.png"

                    source_file_path = os.path.join(source_dir_abs, original_filename)
                    target_file_path = os.path.join(target_consolidation_dir, target_filename)

                    try:
                        shutil.copy2(source_file_path, target_file_path)
                        copied_files.append({
                            "original_path": source_file_path,
                            "target_path": target_file_path,
                            "filename": target_filename
                        })
                        print(f"Copied '{original_filename}' to '{target_filename}'")
                    except Exception as e:
                        print(f"Error copying '{source_file_path}' to '{target_file_path}': {e}")
        else:
            # Process original poi_anomaly_images and road_structure_anomaly_images
            for original_filename in os.listdir(source_dir_abs):
                target_filename = None  # Initialize target filename
                should_copy = False

                # Try to match segment image filename, e.g., "123_segment_0.png"
                segment_pattern = re.compile(rf"^{traj_id_str}_segment_(\d+)\.png$")
                match = segment_pattern.match(original_filename)

                if match:
                    # Is segment image, rename to 0_segment_123_poi_anomaly.png format
                    segment_index = match.group(1)  # Extract index number, e.g., "0"
                    target_filename = f"{segment_index}_segment_{traj_id_str}_{file_source_prefix}.png"
                    should_copy = True
                elif original_filename == f"{traj_id_str}.png":
                    # Is main image
                    target_filename = f"{file_source_prefix}_{traj_id_str}.png"
                    should_copy = True

                if should_copy and target_filename:
                    source_file_path = os.path.join(source_dir_abs, original_filename)
                    target_file_path = os.path.join(target_consolidation_dir, target_filename)

                    try:
                        shutil.copy2(source_file_path, target_file_path)
                        copied_files.append({
                            "original_path": source_file_path,
                            "target_path": target_file_path,
                            "filename": target_filename
                        })
                        print(f"Copied '{original_filename}' to '{target_filename}'")
                    except Exception as e:
                        print(f"Error copying '{source_file_path}' to '{target_file_path}': {e}")

    # Sort filenames
    copied_files.sort(key=lambda x: x["filename"])
    file_list = [item["filename"] for item in copied_files]

    # Save file_list.pkl
    if copied_files:
        pickle_file_path = os.path.join(target_consolidation_dir, 'file_list.pkl')
        try:
            with open(pickle_file_path, 'wb') as pickle_file:
                pickle.dump(file_list, pickle_file)
        except Exception as e:
            print(f"Error saving file list: {e}")

        # Print statistics of copied files
        file_types = defaultdict(int)
        for item in copied_files:
            filename = item["filename"]
            if "poi" in filename:
                file_types["poi"] += 1
            elif "road_structure" in filename:
                file_types["road_structure"] += 1
            elif "last_trajectory" in filename:
                file_types["last_trajectory"] += 1

        print(f"\nCopy statistics - Trajectory {traj_id_str}:")
        print(f"Total: {len(copied_files)} files")
        for file_type, count in file_types.items():
            print(f"- {file_type}: {count} files")

        return target_consolidation_dir, file_list
    else:
        print(f"Warning: No files found related to trajectory ID '{traj_id_str}'")
        return target_consolidation_dir, []

In [None]:
import os
import re
import shutil
import pickle
from tqdm import tqdm

def get_trajectory_ids_from_folder(folder_path):
    """
    Extract all unique trajectory IDs from segment images in the specified folder,
    excluding system-generated hidden files

    Args:
        folder_path: Folder path
    Returns:
        list: List of unique trajectory IDs
    """
    if not os.path.isdir(folder_path):
        print(f"Folder does not exist: {folder_path}")
        return []

    traj_ids = set()

    # Only extract segment image filename format: "traj_id_segment_n.png", e.g., "107.0_segment_0.png"
    segment_pattern = re.compile(r"^([^._][^/\\:*?\"<>|]*)_segment_\d+\.png$")

    for filename in os.listdir(folder_path):
        # Skip hidden files and files starting with ._
        if filename.startswith('.') or filename.startswith('._'):
            continue

        if not filename.endswith('.png'):
            continue

        # Check if it's segment image format
        segment_match = segment_pattern.match(filename)
        if segment_match:
            traj_id = segment_match.group(1)  # e.g., extract "107.0" from "107.0_segment_0.png"
            traj_ids.add(traj_id)

    return list(traj_ids)

In [None]:
base_dir = '../data'
poi_folder = os.path.join(base_dir, "poi_trajectory_images")
road_folder = os.path.join(base_dir, "road_structure_images")
poi_traj_ids = get_trajectory_ids_from_folder(poi_folder)
road_traj_ids = get_trajectory_ids_from_folder(road_folder)


In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm

# Merged trajectory IDs
all_traj_ids = list(set(poi_traj_ids + road_traj_ids))
print(f"Extracted {len(all_traj_ids)} unique trajectory IDs from folders")

# Parallel processing function
def process_trajectory(traj_id):
    try:
        target_dir, file_list = consolidate_porto_anomaly_images(traj_id)
        if file_list:
            return traj_id, True  # Indicates success
    except Exception as e:
        print(f"Error occurred while processing trajectory ID {traj_id}: {e}")
    return traj_id, False  # Indicates failure

# Use multi-threading for parallel processing
success_count = 0
max_workers = 8  # Set number of threads, can be adjusted based on machine

with ThreadPoolExecutor(max_workers=max_workers) as executor:
    futures = {executor.submit(process_trajectory, traj_id): traj_id for traj_id in all_traj_ids}

    for future in tqdm(as_completed(futures), total=len(futures), desc="Processing trajectories"):
        traj_id, success = future.result()
        if success:
            success_count += 1

print(f"Processing completed. Successfully consolidated images for {success_count}/{len(all_traj_ids)} trajectories.")