In [9]:
import json
import os

def split_json_by_timestamp(input_file, output_dir):
    """
    将原始 JSON 文件按时间戳分割成 8 份文件，并根据命名规则保存
    :param input_file: 原始 JSON 文件路径
    :param output_dir: 输出文件夹路径
    """
    try:
        # 打开 JSON 文件
        with open(input_file, 'r', encoding='utf-8') as f:
            data = json.load(f)
    except (OSError, IOError) as e:
        print(f"Error processing file {input_file}: {e}")
        return  # 跳过当前文件

    # 创建分区字典
    partitions = {}

    # 设置滑动窗口的参数
    window_size = 5  # 窗口长度
    step_size = 1    # 滑动步长

    # 计算窗口的数量（假设最大时间戳已知）
    max_timestamp = 40  # 设定一个大致的时间范围
    num_windows = (max_timestamp - window_size) // step_size + 1

    # 初始化分区字典
    for i in range(num_windows):
        partitions[i+10] = []

    # 将数据分类到滑动窗口中
    for item in data:
        try:
            timestamp = float(item["_source"]["layers"]["frame"]["frame.time_relative"])
            for i in range(num_windows):
                start = i * step_size
                end = start + window_size
                if start <= timestamp < end:
                    partitions[i+10].append(item)
        except KeyError:
            print("Invalid data structure:", item)

    # 原始文件名解析
    base_name = os.path.basename(input_file)
    name_parts = base_name.split('.json')[0].split('_')  # 解析文件名

    # 检查文件名是否符合预期格式
    if len(name_parts) == 4:
        name_parts[2] = name_parts[2]+"_"+name_parts[3]

    # 保存每个分割后的文件
    for part, content in partitions.items():
        if content:  # 如果分区中有数据
            # 修改输出文件命名规则，保留分类标签
            output_file = f"{name_parts[0]}_{name_parts[1]}_{part}_{name_parts[2]}.json"
            output_path = os.path.join(output_dir, output_file)
            with open(output_path, 'w', encoding='utf-8') as f:
                json.dump(content, f, indent=4)
            print(f"Saved {output_file} with {len(content)} records.")

In [10]:
# 主函数：批量处理文件
def process_all_files(input_dir, output_dir):
    """
    批量处理文件，将所有 JSON 文件分割
    :param input_dir: 输入文件夹路径
    :param output_dir: 输出文件夹路径
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for file_name in os.listdir(input_dir):
        if file_name.endswith('.json'):
            input_file = os.path.join(input_dir, file_name)
            print(f"Processing file: {input_file}")
            split_json_by_timestamp(input_file, output_dir)

In [11]:
# 输入文件夹和输出文件夹路径
input_dir = '/home/nesl/202_project_hxy_cbl/202_packet_json_new'
output_dir = '/home/nesl/202_project_hxy_cbl/202_packet_json_new_3500'

# 执行分割
process_all_files(input_dir, output_dir)

Processing file: /home/nesl/202_project_hxy_cbl/202_packet_json_new/b3_p5_0_2.json
Saved b3_p5_10_0_2.json with 2029 records.
Saved b3_p5_11_0_2.json with 1705 records.
Saved b3_p5_12_0_2.json with 1779 records.
Saved b3_p5_13_0_2.json with 1706 records.
Saved b3_p5_14_0_2.json with 1710 records.
Saved b3_p5_15_0_2.json with 1621 records.
Saved b3_p5_16_0_2.json with 1783 records.
Saved b3_p5_17_0_2.json with 1655 records.
Saved b3_p5_18_0_2.json with 1748 records.
Saved b3_p5_19_0_2.json with 1778 records.
Saved b3_p5_20_0_2.json with 1883 records.
Saved b3_p5_21_0_2.json with 1732 records.
Saved b3_p5_22_0_2.json with 1804 records.
Saved b3_p5_23_0_2.json with 1680 records.
Saved b3_p5_24_0_2.json with 1716 records.
Saved b3_p5_25_0_2.json with 1573 records.
Saved b3_p5_26_0_2.json with 1552 records.
Saved b3_p5_27_0_2.json with 1638 records.
Saved b3_p5_28_0_2.json with 1754 records.
Saved b3_p5_29_0_2.json with 1717 records.
Saved b3_p5_30_0_2.json with 1770 records.
Saved b3_p5_31