In [1]:
import os
import re
import shutil
import pickle
from tqdm import tqdm

def consolidate_porto_anomaly_images(traj_id_str):
    """
    Consolidate all related anomaly detection images for the specified trajectory_id
    into a new subdirectory and rename files according to rules.
    Returns (target directory path, ordered file list)

    Args:
        traj_id_str: Trajectory ID string
    Returns:
        tuple: (target directory path, ordered file list)
    """
    # Base path configuration
    base_dir = '../data'
    target_consolidation_dir = f"../consolidated_data/consolidated_anomaly_{traj_id_str}"

    # Source directory configuration
    source_configurations = [
        {
            "name_prefix": "poi",
            "source_dir": os.path.join(base_dir, "poi_anomaly_images")
        },
        {
            "name_prefix": "road_structure",
            "source_dir": os.path.join(base_dir, "road_structure_anomaly_images")
        }
    ]

    # Create/clear target directory
    if os.path.exists(target_consolidation_dir):
        # try:
        #     shutil.rmtree(target_consolidation_dir)
        # except Exception as e:
        #     return None, []
        return None, []

    try:
        os.makedirs(target_consolidation_dir, exist_ok=True)
    except Exception as e:
        return None, []

    copied_files = []  # Store copied file information

    for config in source_configurations:
        source_dir_abs = config["source_dir"]
        file_source_prefix = config["name_prefix"]  # "poi_anomaly", "road_structure_anomaly"

        if not os.path.isdir(source_dir_abs):
            print(f"Source directory does not exist: {source_dir_abs}")
            continue

        for original_filename in os.listdir(source_dir_abs):
            target_filename = None  # Initialize target filename
            should_copy = False

            # Try to match segment image filename, e.g., "123_segment_0.png"
            segment_pattern = re.compile(rf"^{traj_id_str}_segment_(\d+)\.png$")
            match = segment_pattern.match(original_filename)

            if match:
                # Is segment image, rename to 0_segment_123_poi_anomaly.png format
                segment_index = match.group(1)  # Extract index number, e.g., "0"
                target_filename = f"{segment_index}_segment_{traj_id_str}_{file_source_prefix}.png"
                should_copy = True
            elif original_filename == f"{traj_id_str}.png":
                # Is main image
                target_filename = f"{file_source_prefix}_{traj_id_str}.png"
                should_copy = True

            if should_copy and target_filename:
                source_file_path = os.path.join(source_dir_abs, original_filename)
                target_file_path = os.path.join(target_consolidation_dir, target_filename)

                try:
                    shutil.copy2(source_file_path, target_file_path)
                    copied_files.append({
                        "original_path": source_file_path,
                        "target_path": target_file_path,
                        "filename": target_filename
                    })
                except Exception as e:
                    print(f"Error copying '{source_file_path}' to '{target_file_path}': {e}")

    # Sort filenames
    copied_files.sort(key=lambda x: x["filename"])
    file_list = [item["filename"] for item in copied_files]

    # Save file_list.pkl
    if copied_files:
        pickle_file_path = os.path.join(target_consolidation_dir, 'file_list.pkl')
        try:
            with open(pickle_file_path, 'wb') as pickle_file:
                pickle.dump(file_list, pickle_file)
        except Exception as e:
            print(f"Error saving file list: {e}")

        return target_consolidation_dir, file_list
    else:
        return target_consolidation_dir, []

In [2]:
import os
import re
import shutil
import pickle
from tqdm import tqdm

def get_trajectory_ids_from_folder(folder_path):
    """
    Extract all unique trajectory IDs from segment images in the specified folder,
    excluding system-generated hidden files

    Args:
        folder_path: Folder path
    Returns:
        list: List of unique trajectory IDs
    """
    if not os.path.isdir(folder_path):
        print(f"Folder does not exist: {folder_path}")
        return []

    traj_ids = set()

    # Only extract segment image filename format: "traj_id_segment_n.png", e.g., "107.0_segment_0.png"
    segment_pattern = re.compile(r"^([^._][^/\\:*?\"<>|]*)_segment_\d+\.png$")

    for filename in os.listdir(folder_path):
        # Skip hidden files and files starting with ._
        if filename.startswith('.') or filename.startswith('._'):
            continue

        if not filename.endswith('.png'):
            continue

        # Check if it's segment image format
        segment_match = segment_pattern.match(filename)
        if segment_match:
            traj_id = segment_match.group(1)  # e.g., extract "107.0" from "107.0_segment_0.png"
            traj_ids.add(traj_id)

    return list(traj_ids)

In [3]:
base_dir = '../data'
poi_folder = os.path.join(base_dir, "poi_anomaly_images")
road_folder = os.path.join(base_dir, "road_structure_anomaly_images")
poi_traj_ids = get_trajectory_ids_from_folder(poi_folder)
road_traj_ids = get_trajectory_ids_from_folder(road_folder)


In [4]:
poi_traj_ids

['0792712deb604e4f3c505c6733ab13a4',
 'b57022934c9faa08fa2d9582c2e1e682',
 '94b0ce8a8f996252b204032215d0df99',
 '8a6fdca8a7c7ea1e7774134250a690db',
 '84d6f04430c014a01e3810f84b16cb59',
 'ac91e655cc7f815199599a9b26ad2bfb',
 'ae8d7fd83a23a41735f2d683feb0693b',
 '5b7d91b25734fe447bc443b2ec9a5321',
 '47a68075d1ebb74604012e2d5509c8eb',
 '0a96a7507d03b432fab945398cd91430',
 '7f7f748a8d675f8246c9a7586d630ca5',
 'a2784607e0cc1eff9ec56dbf431b8cdc',
 '70d0ad89a50f8d438b7fb523dc8fb10f',
 'a6f07797b661b465c4c8c8629d163a7a',
 '1ec758d425af0b007066beb548fadefc',
 'b73b53fedc4ea302335f59caeca6905d',
 'eacae2d0c78299d29f6ccf1973802ac4',
 'f6a3ae4d719178af3658f6428c58553f',
 '2357ae06e756ebeb32d451b11b7876ba',
 '0327219ff10ad86dc50d51688ab5889f',
 'dc69fed97b9f71740b821ad9430c7336',
 '44d2cf7f0f45f6f3b618929d71ca8af7',
 '39a47a27ba80dff63f1e8b181c922a9f',
 'e8041493e8f53c5fff1b44105593bbb0',
 '6cdf203df8b3dafea39e5bc26846500a',
 'e75b6bcf1327c7bd0f93c8e408ac4718',
 'b52d59e5655414c3db53b187ae0bf1d5',
 

In [5]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm

# Merged trajectory IDs
all_traj_ids = list(set(poi_traj_ids + road_traj_ids))
print(f"Extracted {len(all_traj_ids)} unique trajectory IDs from folders")

# Parallel processing function
def process_trajectory(traj_id):
    try:
        target_dir, file_list = consolidate_porto_anomaly_images(traj_id)
        if file_list:
            return traj_id, True  # Indicates success
    except Exception as e:
        print(f"Error occurred while processing trajectory ID {traj_id}: {e}")
    return traj_id, False  # Indicates failure

# Use multi-threading for parallel processing
success_count = 0
max_workers = 8  # Set number of threads, can be adjusted based on machine

with ThreadPoolExecutor(max_workers=max_workers) as executor:
    futures = {executor.submit(process_trajectory, traj_id): traj_id for traj_id in all_traj_ids}

    for future in tqdm(as_completed(futures), total=len(futures), desc="Processing trajectories"):
        traj_id, success = future.result()
        if success:
            success_count += 1

print(f"Processing completed. Successfully consolidated images for {success_count}/{len(all_traj_ids)} trajectories.")

从文件夹中提取到 100 个唯一轨迹ID


处理轨迹: 100%|██████████| 100/100 [00:00<00:00, 382.06it/s]

处理完成。成功整合了 100/100 个轨迹的图片。



