In [5]:
import os
import numpy as np
import scipy.io as sio

# 配置参数
n_trials = 20  # 每个被试的最大 trial 数
sampling_rate = 125  # 采样率 (Hz)
n_sub = 40  # 总的被试数量

# 输入和输出目录
input_dir = r'Z:/qingzhu/AutoICA_Processed_EEG/AMIGOS'  # 输入目录
output_dir = r'Z:/qingzhu/AutoICA_Processed_EEG/AMIGOS/Processed_MAT'  # 输出目录
os.makedirs(output_dir, exist_ok=True)

def get_trial_shape(sub_id, trial_id):
    """
    动态获取 trial 的形状，使用其他未缺失 trial 的被试数据作为参考。
    """
    for other_sub_id in range(n_sub):
        if other_sub_id != sub_id:  # 跳过当前被试
            trial_path = os.path.join(input_dir, f'sub_{other_sub_id}', f'eeg_sub_{other_sub_id}_trial_{trial_id}.npy')
            if os.path.exists(trial_path):
                trial_data = np.load(trial_path)  # 读取参考 trial
                return trial_data.shape  # 返回参考 trial 的形状
    raise ValueError(f"No valid trial data found for trial {trial_id}.")  # 没有任何参考 trial 时抛出异常

def process_subject(sub_id):
    """
    处理单个被试数据，将所有 trial 拼接并生成对应 .mat 文件。
    """
    sub_dir = os.path.join(input_dir, f'sub_{sub_id}')
    merged_data_all_cleaned = []  # 存储拼接后的数据
    merged_n_samples_one = []    # 存储每个 trial 的时长信息（以秒为单位）
    missing_trials = []          # 存储缺失的 trial 索引

    for trial_id in range(n_trials):
        trial_path = os.path.join(sub_dir, f'eeg_sub_{sub_id}_trial_{trial_id}.npy')
        
        if os.path.exists(trial_path):
            # 读取矩阵
            trial_data = np.load(trial_path)  # Shape: [n_channel, n_timepoints]
        else:
            # 如果 trial 文件缺失，用高斯噪声替代
            print(f"Missing trial detected: sub_{sub_id} trial_{trial_id}, replacing with noise.")
            trial_shape = get_trial_shape(sub_id, trial_id)  # 动态获取参考 trial 的形状
            trial_data = np.random.normal(loc=0, scale=1, size=trial_shape)  # 生成高斯噪声
            missing_trials.append(trial_id)

        # 添加到拼接矩阵
        merged_data_all_cleaned.append(trial_data)
        # 记录 trial 时长（单位：秒）
        merged_n_samples_one.append(trial_data.shape[1] / sampling_rate)

    # 拼接所有 trial 在时间轴上（按列拼接）
    merged_data_all_cleaned = np.hstack(merged_data_all_cleaned)  # Shape: [n_channel, total_timepoints]
    merged_n_samples_one = np.array(merged_n_samples_one).reshape(1, -1)  # Shape: [1, n_trial]

    # 保存到 .mat 文件
    output_file = os.path.join(output_dir, f'sub_{sub_id}_processed.mat')
    sio.savemat(output_file, {
        'merged_data_all_cleaned': merged_data_all_cleaned.astype(np.double),
        'merged_n_samples_one': merged_n_samples_one.astype(np.double),
        'missing_trials': np.array(missing_trials, dtype=np.int32)  # 保存缺失的 trial 索引
    })
    print(f"Processed sub_{sub_id} saved to {output_file}")

# 主循环：遍历所有被试
for sub_id in range(n_sub):
    sub_dir = os.path.join(input_dir, f'sub_{sub_id}')
    if os.path.exists(sub_dir):
        process_subject(sub_id)
    else:
        print(f"Directory for sub_{sub_id} not found. Skipping.")


Processed sub_0 saved to Z:/qingzhu/AutoICA_Processed_EEG/AMIGOS/Processed_MAT\sub_0_processed.mat
Processed sub_1 saved to Z:/qingzhu/AutoICA_Processed_EEG/AMIGOS/Processed_MAT\sub_1_processed.mat
Processed sub_2 saved to Z:/qingzhu/AutoICA_Processed_EEG/AMIGOS/Processed_MAT\sub_2_processed.mat
Processed sub_3 saved to Z:/qingzhu/AutoICA_Processed_EEG/AMIGOS/Processed_MAT\sub_3_processed.mat
Processed sub_4 saved to Z:/qingzhu/AutoICA_Processed_EEG/AMIGOS/Processed_MAT\sub_4_processed.mat
Processed sub_5 saved to Z:/qingzhu/AutoICA_Processed_EEG/AMIGOS/Processed_MAT\sub_5_processed.mat
Processed sub_6 saved to Z:/qingzhu/AutoICA_Processed_EEG/AMIGOS/Processed_MAT\sub_6_processed.mat
Missing trial detected: sub_7 trial_16, replacing with noise.
Missing trial detected: sub_7 trial_17, replacing with noise.
Missing trial detected: sub_7 trial_18, replacing with noise.
Missing trial detected: sub_7 trial_19, replacing with noise.
Processed sub_7 saved to Z:/qingzhu/AutoICA_Processed_EEG/A