# 1. 将文件的脉冲和标记提取出来：raw data

In [10]:
import json
import os
import numpy as np
from datetime import datetime

# ================= 配置 =================
RAW_DIR = r"./raw"
OUTPUT_DIR = r"./data"
OUTPUT_FILE = os.path.join(OUTPUT_DIR, 'raw_dataset_standardized.npy')

os.makedirs(OUTPUT_DIR, exist_ok=True)
# =======================================

def process_raw_data():
    start_time = datetime.now()
    print(f"开始处理原始数据: {start_time}")

    all_sequences = []  # 存储所有样本的数据
    all_labels = []     # 存储所有样本的标签

    files = [f for f in os.listdir(RAW_DIR) if f.endswith('.json')]
    print(f"找到 {len(files)} 个 JSON 文件")

    for idx, filename in enumerate(files):
        file_path = os.path.join(RAW_DIR, filename)
        
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
        except Exception as e:
            print(f"读取失败 {filename}: {e}")
            continue

        # 1. 处理时间轴，找到 t=0 的索引
        X_time = np.array(data.get("POINT_N_time", []))
        if len(X_time) == 0: continue
        
        first_valid_index = next((i for i, v in enumerate(X_time) if v >= 0), None)
        if first_valid_index is None: continue

        # 2. 解析标签 (invalid_chord)
        invalid_chord = data.get("invalid_chord", "")
        invalid_ids = []
        if isinstance(invalid_chord, str):
            # 处理中文分号等情况
            invalid_chord = invalid_chord.replace('；', ';')
            if invalid_chord in ["all", "all;"]:
                invalid_ids = list(range(1, 12))
            elif invalid_chord != "no" and invalid_chord.strip() != "":
                parts = invalid_chord.split(';')
                invalid_ids = [int(p) for p in parts if p.strip().isdigit()]
        elif isinstance(invalid_chord, int):
            invalid_ids = [invalid_chord]

        # 3. 遍历 11 个通道提取数据
        for ch_i in range(1, 12):
            key = f'POINT_N{ch_i}'
            if key not in data: continue
            
            raw_seq = np.array(data[key])
            
            # 截取 t > 0 的部分
            if len(raw_seq) <= first_valid_index: continue
            valid_seq = raw_seq[first_valid_index:]
            
            # --- 关键步骤：数据标准化 (Standardization) ---
            # 减去均值，除以标准差 (Z-Score Normalization)
            # 加 1e-6 防止除以零
            std_val = np.std(valid_seq)
            mean_val = np.mean(valid_seq)
            if std_val == 0:
                normalized_seq = valid_seq - mean_val # 如果方差为0，这就全是0了
            else:
                normalized_seq = (valid_seq - mean_val) / (std_val + 1e-6)
            
            # 确定标签
            label = 0 if ch_i in invalid_ids else 1
            
            # 为了节省内存，如果数据太长可以降采样 (Optional)
            # 例如：每隔2个点取一个，保留物理特征但减少计算量
            # normalized_seq = normalized_seq[::2] 

            all_sequences.append(normalized_seq.astype(np.float32))
            all_labels.append(label)

        if (idx + 1) % 10 == 0:
            print(f"已处理 {idx + 1}/{len(files)} 个文件...")

    # 保存为 numpy 格式
    # 由于序列长度不一，必须使用 object 数组保存
    print("正在保存数据...")
    data_dict = {
        "sequences": np.array(all_sequences, dtype=object),
        "labels": np.array(all_labels, dtype=np.int64)
    }
    np.save(OUTPUT_FILE, data_dict, allow_pickle=True)
    
    end_time = datetime.now()
    print(f"处理完成。共保存 {len(all_labels)} 个样本。")
    print(f"保存路径: {OUTPUT_FILE}")
    print(f"总耗时: {end_time - start_time}")

if __name__ == "__main__":
    process_raw_data()

开始处理原始数据: 2025-12-20 00:05:06.055400
找到 433 个 JSON 文件
已处理 10/433 个文件...
已处理 20/433 个文件...
已处理 30/433 个文件...
已处理 40/433 个文件...
已处理 50/433 个文件...
已处理 60/433 个文件...
已处理 70/433 个文件...
已处理 80/433 个文件...
已处理 90/433 个文件...
已处理 100/433 个文件...
已处理 110/433 个文件...
已处理 120/433 个文件...
已处理 130/433 个文件...
已处理 140/433 个文件...
已处理 150/433 个文件...
已处理 160/433 个文件...
已处理 170/433 个文件...
已处理 180/433 个文件...
已处理 190/433 个文件...
已处理 200/433 个文件...
已处理 210/433 个文件...
已处理 220/433 个文件...
已处理 230/433 个文件...
已处理 240/433 个文件...
已处理 250/433 个文件...
已处理 260/433 个文件...
已处理 270/433 个文件...
已处理 280/433 个文件...
已处理 290/433 个文件...
已处理 300/433 个文件...
已处理 310/433 个文件...
已处理 320/433 个文件...
已处理 330/433 个文件...
已处理 340/433 个文件...
已处理 350/433 个文件...
已处理 360/433 个文件...
已处理 370/433 个文件...
已处理 380/433 个文件...
已处理 390/433 个文件...
已处理 400/433 个文件...
已处理 410/433 个文件...
已处理 420/433 个文件...
已处理 430/433 个文件...
正在保存数据...
处理完成。共保存 4763 个样本。
保存路径: ./data/raw_dataset_standardized.npy
总耗时: 0:01:28.882942


# 2. 将文件的脉冲和标记提取出来：基于Koopman线性化手段

In [1]:
import json
import os
import numpy as np
from datetime import datetime

# 定义路径
directory_path = r"./raw"
# 输出文件路径
result_path = r'./data/data_koopman_sequence.txt' 

start_time = datetime.now()
print("开始时间：", start_time)

# 确保输出目录存在
os.makedirs(os.path.dirname(result_path), exist_ok=True)

# =======================================================
# 【新增 1】: 预先获取所有 json 文件列表，以便计算总数
# =======================================================
all_json_files = [f for f in os.listdir(directory_path) if f.endswith('.json')]
total_files_count = len(all_json_files)
print(f"--> 目录中检测到 JSON 文件总数: {total_files_count}")

# =======================================================
# 【新增 2】: 初始化样本计数器
# =======================================================
total_samples_generated = 0


def koopman(X):
    # ... (函数体保持不变，为了节省篇幅，此处省略函数内部具体逻辑，与您提供的完全一致) ...
    X = np.array(X)
    num = len(X)
    embedding_dimension = num - 10 
    delay = 1
    hankel_rows = 1
    hankel_row_matrices = []
    
    for i in range(hankel_rows):
        num_rows = len(X) - delay * (embedding_dimension - 1)
        if num_rows <= 0: return []   
        num_cols = delay * embedding_dimension
        hankel_row_matrix = np.zeros((num_rows, num_cols))
        for k in range(num_rows):
            for l in range(embedding_dimension):
                hankel_row_matrix[k, l * delay: (l + 1) * delay] = X[k + l * delay]
        hankel_row_matrices.append(hankel_row_matrix)
    
    hankel = np.vstack(hankel_row_matrices)
    
    try:
        H, S, V = np.linalg.svd(hankel, full_matrices=False)
    except np.linalg.LinAlgError:
        return []

    singular_values_count = 11
    if len(S) < singular_values_count:
        singular_values_count = len(S)

    step = 120
    # 注意：这里依然依赖外部变量 dt_global，请确保下方主循环逻辑正确
    local_dt = 1.0 

    v = np.diag(S[0:singular_values_count]) @ V[0:singular_values_count, :]
    
    dv_dt = np.zeros_like(v)
    for i in range(singular_values_count):
        dv_dt[i, :] = np.gradient(v[i, :], dt_global, edge_order=2)
    
    row_norms_list = []
    limit = embedding_dimension - step
    if limit <= 0: return []

    for i in range(0, limit, 1):
        start = i
        end = start + step
        v_t = dv_dt[:, start:end]
        vv = v[:, start:end]
        try:
            A = v_t @ np.linalg.pinv(vv)
            B = v_t - A @ vv
            row_norms = np.linalg.norm(B, axis=1)
            BB = np.mean(row_norms)
            row_norms_list.append(BB)
        except:
            continue
            
    row_norms_list = np.array(row_norms_list)
    local_maxima_values = []
    window_size = 40
    if len(row_norms_list) > 2 * window_size:
        for i in range(window_size, len(row_norms_list) - window_size):
            window = row_norms_list[i - window_size:i + window_size + 1]
            max_index_in_window = np.argmax(window)
            if max_index_in_window == window_size:
                local_maxima_values.append(window[window_size])
    
    return local_maxima_values


# =======================================================
# 主循环 (修改为使用 enumerate 遍历文件列表)
# =======================================================
for idx, filename in enumerate(all_json_files, 1): # idx 从 1 开始计数
    
    file_start_time = datetime.now()
    
    # 【新增 3】: 打印当前进度 (当前/总数)
    print(f"[{idx}/{total_files_count}] 正在处理: {filename} ...")
    
    data_path = os.path.join(directory_path, filename)
    
    try:
        with open(data_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
    except Exception as e:
        print(f"  [Error] 读取文件 {filename} 失败: {e}")
        continue

    X_time = np.array(data["POINT_N_time"])
    first_valid_index = next((index for index, value in enumerate(X_time) if value >= 0), None)
    
    if first_valid_index is not None:
        X_time_valid = X_time[first_valid_index:]
        if len(X_time_valid) > 1:
            dt_global = X_time_valid[1] - X_time_valid[0] # 全局变量赋值
        else:
            dt_global = 1.0 
    else:
        print(f"  [Skip] 跳过 {filename}: 无有效时间数据")
        continue

    # 处理无效通道标签
    invalid_chord = data.get("invalid_chord", "")
    invalid_chord_index = []
    if isinstance(invalid_chord, str) and invalid_chord:
        invalid_chord = invalid_chord.replace('；', ';') 
        parts = invalid_chord.split(';')
        invalid_chord_index = [int(p) for p in parts if p.strip().isdigit()]
        
    if invalid_chord == "all" or invalid_chord == "all;":
        invalid_chord_index = list(range(1, 12))
    elif invalid_chord == "no":
        invalid_chord_index = []
    elif isinstance(invalid_chord, (int)) or (isinstance(invalid_chord, str) and invalid_chord.isdigit()):
        invalid_chord_index = [int(invalid_chord)]

    # 遍历 11 个通道
    samples_in_this_file = 0 # 统计当前文件生成的样本数
    
    for i in range(1, 12):
        key = f'POINT_N{i}'
        
        if i in invalid_chord_index:
            label = 0 
        else:
            label = 1 
        
        if key in data and len(data[key]) > first_valid_index:
            raw_series = data[key][first_valid_index:]
            koopman_sequence = koopman(raw_series)
            
            if len(koopman_sequence) > 0:
                sequence_str = ",".join(map(str, koopman_sequence))
                output_line = f"{sequence_str},{label}\n"
                
                with open(result_path, 'a') as file:
                    file.write(output_line)
                
                # 【新增 4】: 累加样本总数
                total_samples_generated += 1
                samples_in_this_file += 1
    
    file_end_time = datetime.now()
    # 打印单个文件的处理耗时和产出
    print(f"  -> 完成。耗时: {file_end_time - file_start_time}, 本文件产出样本: {samples_in_this_file}")

# =======================================================
# 【新增 5】: 程序结束后的汇总统计
# =======================================================
end_time = datetime.now()
total_duration = end_time - start_time

print("\n" + "="*50)
print("程序运行结束")
print(f"结束时间: {end_time}")
print(f"总运行时间: {total_duration}")
print(f"处理文件总数: {idx}/{total_files_count}") # 使用最后的 idx 确保准确
print(f"生成样本总数: {total_samples_generated}")
print(f"结果已保存至: {result_path}")
print("="*50)

开始时间： 2025-12-21 12:40:30.647976
--> 目录中检测到 JSON 文件总数: 433
[1/433] 正在处理: POINTdata_137557.json ...
  -> 完成。耗时: 0:00:11.545538, 本文件产出样本: 11
[2/433] 正在处理: POINTdata_78016.json ...
  -> 完成。耗时: 0:00:10.032780, 本文件产出样本: 11
[3/433] 正在处理: POINTdata_137691.json ...
  -> 完成。耗时: 0:00:10.134841, 本文件产出样本: 11
[4/433] 正在处理: POINTdata_100018.json ...
  -> 完成。耗时: 0:00:09.922386, 本文件产出样本: 11
[5/433] 正在处理: POINTdata_106965.json ...
  -> 完成。耗时: 0:00:13.767173, 本文件产出样本: 11
[6/433] 正在处理: POINTdata_106318.json ...
  -> 完成。耗时: 0:00:11.464081, 本文件产出样本: 11
[7/433] 正在处理: POINTdata_106461.json ...
  -> 完成。耗时: 0:00:11.256428, 本文件产出样本: 11
[8/433] 正在处理: POINTdata_77971.json ...
  -> 完成。耗时: 0:00:10.093045, 本文件产出样本: 11
[9/433] 正在处理: POINTdata_106477.json ...
  -> 完成。耗时: 0:00:13.755806, 本文件产出样本: 11
[10/433] 正在处理: POINTdata_77967.json ...
  -> 完成。耗时: 0:00:10.156382, 本文件产出样本: 11
[11/433] 正在处理: POINTdata_79580.json ...
  -> 完成。耗时: 0:00:09.851890, 本文件产出样本: 11
[12/433] 正在处理: POINTdata_106498.json ...
  -> 完成。耗时: 0:00:13.79

# 3. 基于Koopman并只提取几个特征

In [None]:
import json
import os
import numpy as np
from datetime import datetime
from scipy.stats import skew, kurtosis
from sklearn.linear_model import LinearRegression

# 定义路径
directory_path = r"./raw"
# features_path = r'D:\\Ising\\Ising-CiShuJu\\马金源新代码-20250407\\代码\\result\\result_koopman_3.txt'
result_path = r'./data/data_koopman_6.txt'


start_time = datetime.now()
print("开始时间：", start_time)

def koopman(X):

    X = np.array(X)
    embedding_dimension = num-10
    delay = 1
    hankel_rows = 1
    hankel_row_matrices = []
    for i in range(hankel_rows):
        num_rows = len(X) - delay * (embedding_dimension - 1)
        num_cols = delay * embedding_dimension
        hankel_row_matrix = np.zeros((num_rows, num_cols))
        for k in range(num_rows):
            for l in range(embedding_dimension):
                hankel_row_matrix[k, l * delay: (l + 1) * delay] = X[k + l * delay]
        hankel_row_matrices.append(hankel_row_matrix)
    hankel = np.vstack(hankel_row_matrices)
    H, S, V = np.linalg.svd(hankel)
    singular_values_count = 11
    step = 120
    v = np.diag(S[0:singular_values_count]) @ V[0:singular_values_count, :]
    dv_dt = np.zeros_like(v)
    for i in range(singular_values_count):
        dv_dt[i, :] = np.gradient(v[i, :], dt, edge_order=2)
    dv_dt = np.array(dv_dt)
    row_norms_list = []
    col_norms_list = []
    for i in range(0, embedding_dimension - step, 1):
        start = i
        end = start + step
        v_t = dv_dt[:, start:end]
        vv = v[:, start:end]
        A = v_t @ np.linalg.pinv(vv)
        col_norms = np.linalg.norm(A, axis=0)
        AA = np.mean(col_norms)
        B = v_t - A @ vv
        row_norms = np.linalg.norm(B, axis=1)
        BB = np.mean(row_norms)
        col_norms_list.append(AA)
        row_norms_list.append(BB)
    row_norms_list = np.array(row_norms_list)
    local_maxima_values = []
    local_maxima_indices = []
    local_maxima_times = []
    local_maxima_xdatas = []
    real_times = []
    window_size = 40
    for i in range(window_size, len(row_norms_list) - window_size):
        window = row_norms_list[i - window_size:i + window_size + 1]
        max_index_in_window = np.argmax(window)
        if max_index_in_window == window_size:
            local_maxima_values.append(window[window_size])
            local_maxima_indices.append(i)
            local_maxima_times.append(t[i])
            local_maxima_xdatas.append(X[i])
    return local_maxima_values


for filename in os.listdir(directory_path):
    file_time = datetime.now()
    if filename.endswith('.json'):
        print("一个文件开始：",file_time)
        data_path = os.path.join(directory_path, filename)
        with open(data_path, 'r', encoding='utf-8') as f:
            data = json.load(f)

        X_time = np.array(data["POINT_N_time"])
        first_valid_index = next((index for index, value in enumerate(X_time) if value >= 0), None)
        if first_valid_index is not None:
            X_time = X_time[first_valid_index:]
        num = len(X_time)
        t = np.linspace(X_time[0], X_time[-1], num)
        dt = t[1] - t[0]

        invalid_chord = data.get("invalid_chord", "")
        invalid_chord_index = []
        if isinstance(invalid_chord, str) and invalid_chord:
            invalid_chord_index = [int(index) for index in invalid_chord.split(';') if index.isdigit()]
        if invalid_chord == "all" or invalid_chord == "all;":
            invalid_chord_index = list(range(1, 12))
        elif invalid_chord == "no":
            invalid_chord_index = []
        elif isinstance(invalid_chord, (int, str)) and str(invalid_chord).isdigit():
            invalid_chord_index = [int(invalid_chord)]

        for i in range(1,12):
            features = []
            result = []
            key = f'POINT_N{i}'
            if i in invalid_chord_index:
                label = 0
            else:
                label = 1
            koopman_data = koopman(data[key][first_valid_index:])
            mean = np.mean(koopman_data)
            std = np.std(koopman_data)
            max_val = np.max(koopman_data)
            min_val = np.min(koopman_data)
            skewness = skew(koopman_data)
            kurt = kurtosis(koopman_data)


            # features.append([f"文件名{filename}", key, mean, std, min_val, max_val, skewness, kurt, label])
            result.append([mean, std, min_val, max_val, skewness, kurt, label])


            with open(result_path, 'a') as file:
                for re in result:
                    file.write(','.join(str(x).strip('[]') for x in result) + '\n')

            # with open(features_path, 'a') as file:
            #     for result in features:
            #         file.write(','.join(map(str, result)) + '\n')
        filend_time = datetime.now()
        print("一个文件结束：",filend_time)
        print("一个文件的运行时间为：", filend_time-file_time)
end_time = datetime.now()
print("程序结束：",end_time)
print("程序运行时间", end_time-start_time)

In [2]:
import json
import os
import numpy as np
from datetime import datetime
from scipy.stats import skew, kurtosis
# from sklearn.linear_model import LinearRegression # 未使用，暂时注释掉以保持整洁

# 定义路径
directory_path = r"./raw"
# features_path = r'...' # 原代码注释掉的部分保留
result_path = r'./data/data_koopman_6.txt'

start_time = datetime.now()
print("开始时间：", start_time)

# 确保输出目录存在
os.makedirs(os.path.dirname(result_path), exist_ok=True)

# =======================================================
# 【新增 1】: 获取总文件数以便计算进度
# =======================================================
all_files = [f for f in os.listdir(directory_path) if f.endswith('.json')]
total_files_count = len(all_files)
print(f"目录中检测到 JSON 文件总数: {total_files_count}")

# =======================================================
# 【新增 2】: 初始化总样本计数器
# =======================================================
total_samples_generated = 0


def koopman(X):
    # 注意：为了兼容原代码逻辑，这里依然依赖外部变量 num, dt 等
    # 在实际工程中建议将其作为参数传入 def koopman(X, dt):
    
    X = np.array(X)
    # 重新计算当前 X 的长度，防止外部 num 与 X 不一致导致索引越界
    current_num = len(X)
    embedding_dimension = current_num - 10
    
    delay = 1
    hankel_rows = 1
    hankel_row_matrices = []
    
    for i in range(hankel_rows):
        num_rows = len(X) - delay * (embedding_dimension - 1)
        
        # 增加安全检查
        if num_rows <= 0:
            return []
            
        num_cols = delay * embedding_dimension
        hankel_row_matrix = np.zeros((num_rows, num_cols))
        for k in range(num_rows):
            for l in range(embedding_dimension):
                hankel_row_matrix[k, l * delay: (l + 1) * delay] = X[k + l * delay]
        hankel_row_matrices.append(hankel_row_matrix)
        
    hankel = np.vstack(hankel_row_matrices)
    
    try:
        H, S, V = np.linalg.svd(hankel, full_matrices=False) # 优化速度
    except np.linalg.LinAlgError:
        return []

    singular_values_count = 11
    if len(S) < singular_values_count:
        singular_values_count = len(S)
        
    step = 120
    
    v = np.diag(S[0:singular_values_count]) @ V[0:singular_values_count, :]
    
    dv_dt = np.zeros_like(v)
    # 注意：dt 依赖外部循环中的定义
    for i in range(singular_values_count):
        dv_dt[i, :] = np.gradient(v[i, :], dt, edge_order=2)
    
    dv_dt = np.array(dv_dt)
    
    row_norms_list = []
    # col_norms_list = [] 
    
    limit = embedding_dimension - step
    if limit <= 0: return []

    for i in range(0, limit, 1):
        start = i
        end = start + step
        v_t = dv_dt[:, start:end]
        vv = v[:, start:end]
        
        try:
            A = v_t @ np.linalg.pinv(vv)
            # col_norms = np.linalg.norm(A, axis=0)
            # AA = np.mean(col_norms)
            
            B = v_t - A @ vv
            row_norms = np.linalg.norm(B, axis=1)
            BB = np.mean(row_norms)
            
            # col_norms_list.append(AA)
            row_norms_list.append(BB)
        except:
            continue
            
    row_norms_list = np.array(row_norms_list)
    local_maxima_values = []
    
    window_size = 40
    if len(row_norms_list) > 2 * window_size:
        for i in range(window_size, len(row_norms_list) - window_size):
            window = row_norms_list[i - window_size:i + window_size + 1]
            max_index_in_window = np.argmax(window)
            if max_index_in_window == window_size:
                local_maxima_values.append(window[window_size])
            
    return local_maxima_values


# =======================================================
# 主循环 (使用 enumerate 显示进度)
# =======================================================
for idx, filename in enumerate(all_files, 1): # idx 从 1 开始
    
    file_time = datetime.now()
    # 【新增 3】: 打印进度信息
    print(f"[{idx}/{total_files_count}] 正在处理文件：{filename} (开始时间: {file_time})")
    
    data_path = os.path.join(directory_path, filename)
    
    try:
        with open(data_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
    except Exception as e:
        print(f"  [Error] 读取文件失败: {e}")
        continue

    X_time = np.array(data.get("POINT_N_time", [])) # 使用 get 防止 key 不存在报错
    
    # 找到第一个非负时间点
    if len(X_time) == 0:
        continue
        
    first_valid_index = next((index for index, value in enumerate(X_time) if value >= 0), None)
    
    if first_valid_index is not None:
        X_time = X_time[first_valid_index:]
        num = len(X_time)
        if num < 2: continue # 数据太短无法计算 dt
        
        t = np.linspace(X_time[0], X_time[-1], num)
        dt = t[1] - t[0]
    else:
        continue # 没有有效时间点

    invalid_chord = data.get("invalid_chord", "")
    invalid_chord_index = []
    if isinstance(invalid_chord, str) and invalid_chord:
        # 兼容中文分号等情况
        invalid_chord = invalid_chord.replace('；', ';')
        parts = invalid_chord.split(';')
        invalid_chord_index = [int(index) for index in parts if index.strip().isdigit()]
        
    if invalid_chord == "all" or invalid_chord == "all;":
        invalid_chord_index = list(range(1, 12))
    elif invalid_chord == "no":
        invalid_chord_index = []
    elif isinstance(invalid_chord, (int)) or (isinstance(invalid_chord, str) and invalid_chord.isdigit()):
        invalid_chord_index = [int(invalid_chord)]

    # 统计本文件内生成的样本数
    samples_in_this_file = 0

    for i in range(1, 12):
        # features = [] # 原代码未使用，注释掉
        result = []
        key = f'POINT_N{i}'
        
        if i in invalid_chord_index:
            label = 0
        else:
            label = 1
            
        if key in data and len(data[key]) > first_valid_index:
            try:
                raw_data = data[key][first_valid_index:]
                # 确保数据长度足够进行 embedding
                if len(raw_data) < 20: continue 
                
                koopman_data = koopman(raw_data)
                
                if len(koopman_data) == 0:
                    continue
                
                mean = np.mean(koopman_data)
                std = np.std(koopman_data)
                max_val = np.max(koopman_data)
                min_val = np.min(koopman_data)
                skewness = skew(koopman_data)
                kurt = kurtosis(koopman_data)

                # features.append(...)
                
                # 这里保持你的逻辑，虽然 result 列表每次循环都会重置，append 了一行数据
                result.append([mean, std, min_val, max_val, skewness, kurt, label])
                
                # 【新增 4】: 累加计数
                total_samples_generated += 1
                samples_in_this_file += 1

            except Exception as e:
                print(f"  [Warning] 通道 {i} 处理出错: {e}")
                continue

        # 写入文件
        if result:
            with open(result_path, 'a') as file:
                for re in result:
                    # 简化写入逻辑，直接 join
                    file.write(','.join(str(x) for x in re) + '\n')

    filend_time = datetime.now()
    print(f"  -> 文件结束。耗时：{filend_time - file_time}。本文件产出样本数：{samples_in_this_file}")

# =======================================================
# 【新增 5】: 最终汇总报告
# =======================================================
end_time = datetime.now()
total_duration = end_time - start_time

print("\n" + "="*50)
print("程序运行结束")
print(f"结束时间: {end_time}")
print(f"总运行时间: {total_duration}")
print(f"处理文件总数: {total_files_count}")
print(f"生成样本总数: {total_samples_generated}")
print(f"结果保存路径: {result_path}")
print("="*50)

开始时间： 2025-12-21 15:03:13.429808
目录中检测到 JSON 文件总数: 433
[1/433] 正在处理文件：POINTdata_137557.json (开始时间: 2025-12-21 15:03:13.432742)
  -> 文件结束。耗时：0:00:11.427501。本文件产出样本数：11
[2/433] 正在处理文件：POINTdata_78016.json (开始时间: 2025-12-21 15:03:24.860312)
  -> 文件结束。耗时：0:00:09.823003。本文件产出样本数：11
[3/433] 正在处理文件：POINTdata_137691.json (开始时间: 2025-12-21 15:03:34.683378)
  -> 文件结束。耗时：0:00:09.913073。本文件产出样本数：11
[4/433] 正在处理文件：POINTdata_100018.json (开始时间: 2025-12-21 15:03:44.596518)
  -> 文件结束。耗时：0:00:10.116039。本文件产出样本数：11
[5/433] 正在处理文件：POINTdata_106965.json (开始时间: 2025-12-21 15:03:54.712624)
  -> 文件结束。耗时：0:00:13.592872。本文件产出样本数：11
[6/433] 正在处理文件：POINTdata_106318.json (开始时间: 2025-12-21 15:04:08.305561)
  -> 文件结束。耗时：0:00:11.188843。本文件产出样本数：11
[7/433] 正在处理文件：POINTdata_106461.json (开始时间: 2025-12-21 15:04:19.494474)
  -> 文件结束。耗时：0:00:11.153164。本文件产出样本数：11
[8/433] 正在处理文件：POINTdata_77971.json (开始时间: 2025-12-21 15:04:30.647699)
  -> 文件结束。耗时：0:00:09.798153。本文件产出样本数：11
[9/433] 正在处理文件：POINTdata_106477.json (开始时间: 2025-12