In [1]:
import os
import re
import shutil
import pickle
from tqdm import tqdm
from collections import defaultdict

def consolidate_porto_anomaly_images(traj_id_str):
    """
    将指定 trajectory_id 的所有相关异常检测图片整合到一个新的子目录中，
    并根据规则重命名文件。返回 (目标目录路径, 有序文件列表)

    Args:
        traj_id_str: 轨迹ID字符串

    Returns:
        tuple: (目标目录路径, 有序文件列表)
    """
    # 基础路径设置
    base_dir = '../data/'
    target_consolidation_dir = f"../../TTE/consolidated_data/consolidated_{traj_id_str}"

    # 源目录配置
    source_configurations = [
        {
            "name_prefix": "poi",
            "source_dir": os.path.join(base_dir, "poi_trajectory_images"),
            "pattern": rf"^{traj_id_str}(_segment_\d+)?\.png$"  # 匹配主图和片段图
        },
        {
            "name_prefix": "road_structure",
            "source_dir": os.path.join(base_dir, "road_structure_images"),
            "pattern": rf"^{traj_id_str}(_segment_\d+)?\.png$"  # 匹配主图和片段图
        },
        {
            "name_prefix": "last_trajectory",
            "source_dir": os.path.join(base_dir, "last_trajectory_images"),
            "pattern": rf"^trajectory_{traj_id_str}_\d+\.png$"  # 匹配 last_trajectory_images 中的图片
        }
    ]

    # 检查目标目录是否已存在
    if os.path.exists(target_consolidation_dir):
        return None, []

    # 创建目标目录
    try:
        os.makedirs(target_consolidation_dir, exist_ok=True)
    except Exception as e:
        print(f"创建目录 '{target_consolidation_dir}' 时出错: {e}")
        return None, []

    copied_files = []  # 用于存储复制的文件信息

    for config in source_configurations:
        source_dir_abs = config["source_dir"]
        file_source_prefix = config["name_prefix"]
        pattern = config["pattern"]

        if not os.path.isdir(source_dir_abs):
            print(f"源目录不存在: {source_dir_abs}")
            continue

        # 根据不同的源目录采用不同的处理逻辑
        if file_source_prefix == "last_trajectory":
            # 处理 last_trajectory_images 文件夹
            for original_filename in os.listdir(source_dir_abs):
                # 匹配形如 trajectory_04d6595f4e9fb466dade447a6989445c_17.png 的文件
                last_traj_pattern = re.compile(rf"^trajectory_{traj_id_str}_(\d+)\.png$")
                match = last_traj_pattern.match(original_filename)

                if match:
                    # 提取序号
                    sequence_num = match.group(1)
                    # 重命名为 last_trajectory_123_17.png 格式
                    target_filename = f"last_trajectory_{traj_id_str}_{sequence_num}.png"

                    source_file_path = os.path.join(source_dir_abs, original_filename)
                    target_file_path = os.path.join(target_consolidation_dir, target_filename)

                    try:
                        shutil.copy2(source_file_path, target_file_path)
                        copied_files.append({
                            "original_path": source_file_path,
                            "target_path": target_file_path,
                            "filename": target_filename
                        })
                        print(f"已复制 '{original_filename}' 到 '{target_filename}'")
                    except Exception as e:
                        print(f"复制 '{source_file_path}' 到 '{target_file_path}' 时出错: {e}")
        else:
            # 处理原有的 poi_anomaly_images 和 road_structure_anomaly_images
            for original_filename in os.listdir(source_dir_abs):
                target_filename = None  # 初始化目标文件名
                should_copy = False

                # 尝试匹配片段图文件名，例如 "123_segment_0.png"
                segment_pattern = re.compile(rf"^{traj_id_str}_segment_(\d+)\.png$")
                match = segment_pattern.match(original_filename)

                if match:
                    # 是片段图，重命名为 0_segment_123_poi_anomaly.png 格式
                    segment_index = match.group(1)  # 提取索引号，例如 "0"
                    target_filename = f"{segment_index}_segment_{traj_id_str}_{file_source_prefix}.png"
                    should_copy = True
                elif original_filename == f"{traj_id_str}.png":
                    # 是主图
                    target_filename = f"{file_source_prefix}_{traj_id_str}.png"
                    should_copy = True

                if should_copy and target_filename:
                    source_file_path = os.path.join(source_dir_abs, original_filename)
                    target_file_path = os.path.join(target_consolidation_dir, target_filename)

                    try:
                        shutil.copy2(source_file_path, target_file_path)
                        copied_files.append({
                            "original_path": source_file_path,
                            "target_path": target_file_path,
                            "filename": target_filename
                        })
                        print(f"已复制 '{original_filename}' 到 '{target_filename}'")
                    except Exception as e:
                        print(f"复制 '{source_file_path}' 到 '{target_file_path}' 时出错: {e}")

    # 对文件名进行排序
    copied_files.sort(key=lambda x: x["filename"])
    file_list = [item["filename"] for item in copied_files]

    # 保存file_list.pkl
    if copied_files:
        pickle_file_path = os.path.join(target_consolidation_dir, 'file_list.pkl')
        try:
            with open(pickle_file_path, 'wb') as pickle_file:
                pickle.dump(file_list, pickle_file)
        except Exception as e:
            print(f"保存文件列表时出错: {e}")

        # 打印复制文件的统计信息
        file_types = defaultdict(int)
        for item in copied_files:
            filename = item["filename"]
            if "poi" in filename:
                file_types["poi"] += 1
            elif "road_structure" in filename:
                file_types["road_structure"] += 1
            elif "last_trajectory" in filename:
                file_types["last_trajectory"] += 1

        print(f"\n复制统计信息 - 轨迹 {traj_id_str}:")
        print(f"总计: {len(copied_files)} 个文件")
        for file_type, count in file_types.items():
            print(f"- {file_type}: {count} 个文件")

        return target_consolidation_dir, file_list
    else:
        print(f"警告: 未找到与轨迹ID '{traj_id_str}' 相关的任何文件")
        return target_consolidation_dir, []



In [2]:
import os
import re
import shutil
import pickle
from tqdm import tqdm

def get_trajectory_ids_from_folder(folder_path):
    """
    从指定文件夹中的分段图片提取所有唯一的轨迹ID，排除系统生成的隐藏文件

    Args:
        folder_path: 文件夹路径

    Returns:
        list: 唯一轨迹ID列表
    """
    if not os.path.isdir(folder_path):
        print(f"文件夹不存在: {folder_path}")
        return []

    traj_ids = set()

    # 仅提取分段图文件名格式: "traj_id_segment_n.png"，例如"107.0_segment_0.png"
    segment_pattern = re.compile(r"^([^._][^/\\:*?\"<>|]*)_segment_\d+\.png$")

    for filename in os.listdir(folder_path):
        # 跳过隐藏文件和以._开头的文件
        if filename.startswith('.') or filename.startswith('._'):
            continue

        if not filename.endswith('.png'):
            continue

        # 检查是否是分段图格式
        segment_match = segment_pattern.match(filename)
        if segment_match:
            traj_id = segment_match.group(1)  # 例如从"107.0_segment_0.png"提取"107.0"
            traj_ids.add(traj_id)

    return list(traj_ids)




In [3]:
base_dir = '../data'
poi_folder = os.path.join(base_dir, "poi_trajectory_images")
road_folder = os.path.join(base_dir, "road_structure_images")
 # 从两个文件夹获取轨迹ID
poi_traj_ids = get_trajectory_ids_from_folder(poi_folder)
road_traj_ids = get_trajectory_ids_from_folder(road_folder)


In [4]:
poi_traj_ids

['988.0',
 '3059.0',
 '5027.0',
 '4379.0',
 '2069.0',
 '1364.0',
 '2245.0',
 '4268.0',
 '752.0',
 '975.0',
 '3404.0',
 '1857.0',
 '3068.0',
 '5014.0',
 '4271.0',
 '3633.0',
 '413.0',
 '3403.0',
 '1599.0',
 '3604.0']

In [5]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm

# 合并后的轨迹ID
all_traj_ids = list(set(poi_traj_ids + road_traj_ids))

print(f"从文件夹中提取到 {len(all_traj_ids)} 个唯一轨迹ID")

# 并行处理函数
def process_trajectory(traj_id):
    try:
        target_dir, file_list = consolidate_porto_anomaly_images(traj_id)
        if file_list:
            return traj_id, True  # 表明成功
    except Exception as e:
        print(f"处理轨迹ID {traj_id} 时发生错误: {e}")
    return traj_id, False  # 表明失败

# 使用多线程并行处理
success_count = 0
max_workers = 8  # 设置线程数，可以根据机器调整

with ThreadPoolExecutor(max_workers=max_workers) as executor:
    futures = {executor.submit(process_trajectory, traj_id): traj_id for traj_id in all_traj_ids}
    for future in tqdm(as_completed(futures), total=len(futures), desc="处理轨迹"):
        traj_id, success = future.result()
        if success:
            success_count += 1

print(f"处理完成。成功整合了 {success_count}/{len(all_traj_ids)} 个轨迹的图片。")

从文件夹中提取到 20 个唯一轨迹ID
已复制 '3059.0_segment_1.png' 到 '1_segment_3059.0_poi.png'
已复制 '4379.0_segment_4.png' 到 '4_segment_4379.0_poi.png'
已复制 '1364.0.png' 到 'poi_1364.0.png'
已复制 '988.0_segment_0.png' 到 '0_segment_988.0_poi.png'
已复制 '2069.0_segment_3.png' 到 '3_segment_2069.0_poi.png'
已复制 '5027.0.png' 到 'poi_5027.0.png'
已复制 '2245.0_segment_2.png' 到 '2_segment_2245.0_poi.png'
已复制 '4268.0_segment_0.png' 到 '0_segment_4268.0_poi.png'
已复制 '1364.0_segment_2.png' 到 '2_segment_1364.0_poi.png'
已复制 '4379.0_segment_1.png' 到 '1_segment_4379.0_poi.png'
已复制 '3059.0_segment_0.png' 到 '0_segment_3059.0_poi.png'
已复制 '5027.0_segment_1.png' 到 '1_segment_5027.0_poi.png'
已复制 '988.0_segment_1.png' 到 '1_segment_988.0_poi.png'
已复制 '2245.0_segment_3.png' 到 '3_segment_2245.0_poi.png'
已复制 '2069.0_segment_2.png' 到 '2_segment_2069.0_poi.png'
已复制 '4379.0_segment_0.png' 到 '0_segment_4379.0_poi.png'
已复制 '4268.0_segment_1.png' 到 '1_segment_4268.0_poi.png'
已复制 '1364.0_segment_3.png' 到 '3_segment_1364.0_poi.png'
已复制 '5027.0_segm

处理轨迹: 100%|██████████| 20/20 [00:00<00:00, 135.77it/s]

已复制 '2245.0_segment_0.png' 到 '0_segment_2245.0_poi.png'
已复制 '3059.0_segment_2.png' 到 '2_segment_3059.0_poi.png'
已复制 '5027.0_segment_3.png' 到 '3_segment_5027.0_poi.png'
已复制 '2069.0_segment_1.png' 到 '1_segment_2069.0_poi.png'
已复制 '988.0_segment_0.png' 到 '0_segment_988.0_road_structure.png'
已复制 '4268.0.png' 到 'poi_4268.0.png'
已复制 '988.0_segment_1.png' 到 '1_segment_988.0_road_structure.png'
已复制 '1364.0_segment_0.png' 到 '0_segment_1364.0_poi.png'
已复制 '5027.0.png' 到 'road_structure_5027.0.png'
已复制 '4268.0_segment_0.png' 到 '0_segment_4268.0_road_structure.png'
已复制 '988.0.png' 到 'road_structure_988.0.png'
已复制 '2245.0.png' 到 'poi_2245.0.png'
已复制 '3059.0_segment_3.png' 到 '3_segment_3059.0_poi.png'
已复制 '4379.0_segment_3.png' 到 '3_segment_4379.0_poi.png'
已复制 '5027.0_segment_1.png' 到 '1_segment_5027.0_road_structure.png'
已复制 '1364.0.png' 到 'road_structure_1364.0.png'
已复制 '4268.0_segment_1.png' 到 '1_segment_4268.0_road_structure.png'
已复制 'trajectory_988.0_17.png' 到 'last_trajectory_988.0_17.png'
已复制


