In [9]:
# import pickle
# import numpy as np
# import pandas as pd

# def find_consecutive_ranges(file_path):
#     # 1. 读取 .pkl 文件
#     with open(file_path, 'rb') as file:
#         data = pickle.load(file)

#     # 2. 将数据转换为 Pandas DataFrame
#     df = pd.DataFrame(data)

#     # 找到所有值为 1 的索引
#     indices = df[df.iloc[:, 0] == 1].index

#     # 将索引转换为数组
#     indices = np.array(indices)

#     # 找出连续的范围
#     ranges = []
#     start = indices[0]
#     prev = start

#     for i in range(1, len(indices)):
#         if indices[i] != prev + 1:
#             ranges.append((start, prev))
#             start = indices[i]
#         prev = indices[i]
#     ranges.append((start, prev))

#     # 计算每个范围的长度
#     lengths = [end - start + 1 for start, end in ranges]

#     # 创建结果 DataFrame
#     result = pd.DataFrame(ranges, columns=['start', 'end'])
#     result['length'] = lengths

#     return result

# # 使用示例
# file_path = 'data\\processed\\omi-1_test_label.pkl'
# result = find_consecutive_ranges(file_path)
# # print(result)
# result

​​.txt 和 .pkl 的范围均为左闭右闭 [start, end]

In [10]:
import os
import pickle
import numpy as np
import pandas as pd


def find_consecutive_ranges(file_path):
    # 1. 读取 .pkl 文件
    with open(file_path, 'rb') as file:
        data = pickle.load(file)

    # 2. 将数据转换为 Pandas DataFrame
    df = pd.DataFrame(data)

    # 找到所有值为 1 的索引
    indices = df[df.iloc[:, 0] == 1].index

    # 将索引转换为数组
    indices = np.array(indices)

    # 找出连续的范围
    ranges = []
    start = indices[0]
    prev = start

    for i in range(1, len(indices)):
        if indices[i] != prev + 1:
            ranges.append((start, prev))
            start = indices[i]
        prev = indices[i]
    ranges.append((start, prev))

    # 计算每个范围的长度
    lengths = [end - start + 1 for start, end in ranges]

    # 创建结果 DataFrame
    result = pd.DataFrame(ranges, columns=['start', 'end'])
    result['length'] = lengths

    return result

def parse_txt_file(txt_file_path):
    """
    解析 .txt 文件，提取 start 和 end 范围
    """
    ranges = []
    with open(txt_file_path, 'r') as file:
        for line in file:
            # 去掉换行符
            line = line.strip()
            if not line:
                continue
            # 分割 start-end 和后面的内容
            range_part, _ = line.split(':', 1)
            start, end = map(int, range_part.split('-'))
            ranges.append((start, end))
    return ranges

def compare_results(txt_file_path, pkl_file_path):
    """
    对比 .txt 文件和 .pkl 文件的结果
    """
    # 解析 .txt 文件
    txt_ranges = parse_txt_file(txt_file_path)
    
    # 获取 .pkl 文件的结果
    pkl_result = find_consecutive_ranges(pkl_file_path)
    pkl_ranges = pkl_result[['start', 'end']].to_records(index=False).tolist()
    
    # 对比结果
    if txt_ranges != pkl_ranges:
        print("结果不一致！对比文件：", txt_file_path, pkl_file_path)
        print("txt 文件范围:", txt_ranges)
        print("pkl 文件范围:", pkl_ranges)
        print("-" * 100)

# 示例使用
interpretation_label_dir = 'data\\interpretation_label'
processed_dir = 'data\\processed'

In [11]:

# 遍历 interpretation_label 目录下的 omi-开头的 .txt 文件
for txt_file_name in os.listdir(interpretation_label_dir):
    if txt_file_name.startswith('omi-') and txt_file_name.endswith('.txt'):
        txt_file_path = os.path.join(interpretation_label_dir, txt_file_name)
        
        # 构造对应的 .pkl 文件路径
        base_name = txt_file_name.split('.')[0]  # 去掉 .txt 后缀
        pkl_file_name = f"{base_name}_test_label.pkl"
        pkl_file_path = os.path.join(processed_dir, pkl_file_name)
        
        # 检查 .pkl 文件是否存在
        if os.path.exists(pkl_file_path):
            # print(f"对比文件: {txt_file_name} 和 {pkl_file_name}")
            compare_results(txt_file_path, pkl_file_path)
        else:
            print(f"未找到对应的 .pkl 文件: {pkl_file_name}")

结果不一致！对比文件： data\interpretation_label\omi-11.txt data\processed\omi-11_test_label.pkl
txt 文件范围: [(738, 755), (757, 781), (782, 815), (1366, 1375), (2301, 2314), (2365, 2381), (2464, 2485), (2588, 2610), (2650, 2675), (3012, 3072)]
pkl 文件范围: [(738, 755), (757, 815), (1366, 1375), (2301, 2314), (2365, 2381), (2464, 2485), (2588, 2610), (2650, 2675), (3012, 3072)]
----------------------------------------------------------------------------------------------------


In [12]:
# 遍历 interpretation_label 目录下的 omi-开头的 .txt 文件
for txt_file_name in os.listdir(interpretation_label_dir):
    if txt_file_name.startswith('machine-') and txt_file_name.endswith('.txt'):
        txt_file_path = os.path.join(interpretation_label_dir, txt_file_name)
        
        # 构造对应的 .pkl 文件路径
        base_name = txt_file_name.split('.')[0]  # 去掉 .txt 后缀
        pkl_file_name = f"{base_name}_test_label.pkl"
        pkl_file_path = os.path.join(processed_dir, pkl_file_name)
        
        # 检查 .pkl 文件是否存在
        if os.path.exists(pkl_file_path):
            # print(f"对比文件: {txt_file_name} 和 {pkl_file_name}")
            compare_results(txt_file_path, pkl_file_path)
        else:
            print(f"未找到对应的 .pkl 文件: {pkl_file_name}")

结果不一致！对比文件： data\interpretation_label\machine-1-1.txt data\processed\machine-1-1_test_label.pkl
txt 文件范围: [(15849, 16395), (16963, 17517), (18071, 18528), (19367, 20088), (20786, 21195), (24679, 24682), (26114, 26116), (27554, 27556)]
pkl 文件范围: [(15849, 16394), (16963, 17516), (18071, 18527), (19367, 20087), (20786, 21194), (24679, 24681), (26114, 26115), (27554, 27555)]
----------------------------------------------------------------------------------------------------
结果不一致！对比文件： data\interpretation_label\machine-1-6.txt data\processed\machine-1-6_test_label.pkl
txt 文件范围: [(246, 252), (653, 658), (2092, 2100), (2884, 2888), (3534, 3539), (4647, 5045), (5167, 5172), (5708, 5713), (5873, 5885), (6022, 6027), (6412, 6419), (7851, 7856), (9291, 9298), (10731, 10736), (11467, 11471), (12171, 12176), (13069, 13073), (13277, 13280), (13613, 13619), (14603, 14607), (15052, 15055), (15397, 15401), (15802, 15805), (16491, 16499), (16718, 16721), (16972, 16976), (17931, 17939), (18600, 21761), 