In [1]:
import os
import random
from matplotlib import axis
import scipy.io as sio
import numpy as np
import math
import mne
import nolds
import joblib
import torch
from torch.utils.data import TensorDataset
from sklearn.model_selection import KFold, train_test_split
import model as dl  # Ensure this module contains necessary utility functions
import logging
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [2]:
def ensure_dir(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 使用 GPU
seed = 34
dl.seed_everything(seed)

# EEG data parameters
duration = 1250


In [4]:
def importAndCropData(file_paths, duration, labels, data_type='preprocessed'):
    EEG_list = []
    label = []
    srate = None
    for i, file in enumerate(file_paths):
        try:
            if data_type == 'raw':
                raw = mne.io.read_raw_edf(file, preload=True, encoding='latin1',verbose='Warning')
                # raw.filter(l_freq=None, h_freq=70)
                data = raw.get_data()[0:19]
                channels = raw.info.get('nchan')
                srate = raw.info.get('sfreq')
            else:
                raw = sio.loadmat(file, uint16_codec='latin1')
                data = raw.get('data')[0:19]
                channels = data.shape[0]
                srate = raw.get('srate_function', [None])[0]
                if srate is None:
                    raise ValueError(f"Sampling rate not found in {file}")

            if data.shape[1] > duration:
                epochs = data.shape[1] // duration
                data_crop = data[:,0:epochs*duration]
            else:
                continue
            label += [labels[i]] * epochs
            channels = 19
            data_new = data_crop.reshape(channels, -1, duration).transpose(1, 0, 2)
            EEG_list.append(data_new)

            logging.info(f"Processed file {file}: {epochs} epochs")
        except Exception as e:
            logging.error(f"Error processing file {file}: {e}")
            continue

    if not EEG_list:
        raise ValueError("No data was loaded. Please check the file paths and formats.")
    
    EEG = np.concatenate(EEG_list)
    label = np.array(label)
    logging.info(f"Total epochs: {EEG.shape[0]}, Normal: {np.sum(label == 1)}, "
            f"MCI: {np.sum(label == 0)}")
    return EEG,label,srate

In [5]:
import os
import warnings
# 忽略 RuntimeWarning 警告
warnings.filterwarnings("ignore", category=RuntimeWarning)
# 定义文件夹路径
base_dir = '糖尿病认知障碍与对照脑电数据'
normal_dir = os.path.join(base_dir, '认知正常')
impaired_dir = os.path.join(base_dir, '认知障碍')

# 获取所有的文件路径
normal_files = [os.path.join(normal_dir, f) for f in os.listdir(normal_dir) if f.endswith('.edf')]
impaired_files = [os.path.join(impaired_dir, f) for f in os.listdir(impaired_dir) if f.endswith('.edf')]

all_files = normal_files + impaired_files

label_single = np.concatenate([np.zeros(len(normal_files)), np.ones(len(impaired_files))],axis=0)
original_data,labels,srate = importAndCropData(all_files, duration, label_single, data_type='raw')

2024-11-05 11:55:51,521 - INFO - Processed file 糖尿病认知障碍与对照脑电数据/认知正常/张连荣.edf: 192 epochs
2024-11-05 11:55:51,742 - INFO - Processed file 糖尿病认知障碍与对照脑电数据/认知正常/王淑艳.edf: 238 epochs
2024-11-05 11:55:51,924 - INFO - Processed file 糖尿病认知障碍与对照脑电数据/认知正常/沈玉琴.edf: 192 epochs
2024-11-05 11:55:52,114 - INFO - Processed file 糖尿病认知障碍与对照脑电数据/认知正常/闫素军.edf: 207 epochs
2024-11-05 11:55:52,335 - INFO - Processed file 糖尿病认知障碍与对照脑电数据/认知正常/杨忠丽.edf: 240 epochs
2024-11-05 11:55:52,515 - INFO - Processed file 糖尿病认知障碍与对照脑电数据/认知正常/由立芹.edf: 193 epochs
2024-11-05 11:55:52,704 - INFO - Processed file 糖尿病认知障碍与对照脑电数据/认知正常/戴照同.edf: 204 epochs
2024-11-05 11:55:52,888 - INFO - Processed file 糖尿病认知障碍与对照脑电数据/认知正常/龚守利.edf: 194 epochs
2024-11-05 11:55:53,011 - INFO - Processed file 糖尿病认知障碍与对照脑电数据/认知正常/王秀芝.edf: 132 epochs
2024-11-05 11:55:53,197 - INFO - Processed file 糖尿病认知障碍与对照脑电数据/认知正常/王殿来.edf: 207 epochs
2024-11-05 11:55:53,407 - INFO - Processed file 糖尿病认知障碍与对照脑电数据/认知正常/朱荣.edf: 227 epochs
2024-11-05 11:55:53,627 - INFO - 

In [None]:
import numpy as np
import nolds

def multiscale_entropy(data, scales=20):
    """
    计算多尺度熵，将长度为 2500 的时间序列转化为 20 个熵值。
    """
    mse_values = []
    for tau in range(1, scales + 1):
        # 生成当前尺度下的降采样序列
        coarse_grained_series = np.array([np.mean(data[i:i + tau]) for i in range(0, len(data) - tau + 1, tau)])
        
        # 计算该尺度的样本熵
        if len(coarse_grained_series) > 2:  # 样本熵要求序列长度足够长
            mse_value = nolds.sampen(coarse_grained_series)
            mse_values.append(mse_value)
        else:
            mse_values.append(np.nan)  # 如果降采样序列太短，记录 NaN
    return mse_values

# 创建存储压缩后的数据矩阵，形状为 (5643, 43, 20)
compressed_data = np.zeros((5643, 43, 20))

# 对每个样本和每个通道计算多尺度熵
for sample in range(5643):
    for channel in range(43):
        # 提取单个通道的时间序列
        time_series = original_data[sample, channel, :]
        # 计算多尺度熵并存储到新的矩阵中
        compressed_data[sample, channel, :] = multiscale_entropy(time_series, scales=20)
        print(f"计算完成第 {sample} 个样本的第 {channel} 个通道")

print("计算完成的多尺度熵数据形状：", compressed_data.shape)


In [None]:
import torch
import numpy as np
import nolds

def multiscale_entropy(data, scales=20):
    """
    计算多尺度熵，将长度为 2500 的时间序列转化为 20 个熵值。
    """
    mse_values = []
    for tau in range(1, scales + 1):
        # 使用 torch 的张量操作进行降采样
        coarse_grained_series = torch.stack([data[i:i + tau].mean() for i in range(0, len(data) - tau + 1, tau)])
        
        # 将结果转换为 numpy 以计算样本熵
        coarse_grained_series = coarse_grained_series.cpu().numpy()  # 确保数据在 CPU 上
        if len(coarse_grained_series) > 2:
            mse_value = nolds.sampen(coarse_grained_series)
            mse_values.append(mse_value)
        else:
            mse_values.append(np.nan)
    return mse_values

# 将数据转换为 torch tensor 并将其移到 GPU 上
original_data = torch.tensor(original_data, dtype=torch.float32).to("cuda")
compressed_data = torch.zeros((5643, 43, 20), device="cuda")

# 对每个样本和每个通道并行计算多尺度熵
for sample in range(5643):
    for channel in range(43):
        # 提取单个通道的时间序列
        time_series = original_data[sample, channel, :]
        # 将时间序列传入多尺度熵函数
        mse_values = multiscale_entropy(time_series, scales=20)
        # 将结果存储在新张量中
        compressed_data[sample, channel, :] = torch.tensor(mse_values, device="cuda")
        print(f"计算完成第 {sample} 个样本的第 {channel} 个通道")

# 将结果转回 CPU 并转换为 numpy 格式
compressed_data = compressed_data.cpu().numpy()
print("计算完成的多尺度熵数据形状：", compressed_data.shape)


In [6]:
total_folds = 10
train_indices, test_indices = dl.Split_Sets(total_folds, original_data)
# Ensure output directories exist
ensure_dir("EEGData/250hz/TrainData")
ensure_dir("EEGData/250hz/ValidData")
ensure_dir("EEGData/250hz/TestData")
kf = KFold(n_splits=10)
seed = 34  # 设定随机种子

for fold, (train_index, test_index) in enumerate(kf.split(original_data)):
    train_data, test_data = original_data[train_index], original_data[test_index]
    train_labels, test_labels = labels[train_index], labels[test_index]
    
    # 进一步划分训练集和验证集
    train_data_split, valid_data_split, train_labels_split, valid_labels_split = train_test_split(
        train_data, train_labels, test_size=0.1, random_state=seed, stratify=train_labels
    )
    # print(train_data_split.shape,train_labels_split.shape,valid_data_split.shape,valid_labels_split.shape)

    # 转换为 PyTorch 张量
    train_data_split = torch.tensor(train_data_split, dtype=torch.float32)
    valid_data_split = torch.tensor(valid_data_split, dtype=torch.float32)
    train_labels_split = torch.tensor(train_labels_split, dtype=torch.long)
    valid_labels_split = torch.tensor(valid_labels_split, dtype=torch.long)
    test_data = torch.tensor(test_data, dtype=torch.float32)
    test_labels = torch.tensor(test_labels, dtype=torch.long)
    
    # 创建TensorDatasets
    train_dataset = TensorDataset(train_data_split, train_labels_split)
    valid_dataset = TensorDataset(valid_data_split, valid_labels_split)
    test_dataset = TensorDataset(test_data, test_labels)

    # 保存数据和标签
    torch.save(train_dataset, f"EEGData/250hz/TrainData/train_data_{fold + 1}_fold_with_seed_{seed}.pth")
    torch.save(valid_dataset, f"EEGData/250hz/ValidData/valid_data_{fold + 1}_fold_with_seed_{seed}.pth")
    torch.save(test_dataset, f"EEGData/250hz/TestData/test_data_{fold + 1}_fold_with_seed_{seed}.pth")
    logging.info(f"Fold {fold + 1} data saved successfully.")


2024-11-05 11:56:36,668 - INFO - Fold 1 data saved successfully.
2024-11-05 11:56:38,527 - INFO - Fold 2 data saved successfully.
2024-11-05 11:56:40,370 - INFO - Fold 3 data saved successfully.
2024-11-05 11:56:42,208 - INFO - Fold 4 data saved successfully.
2024-11-05 11:56:44,069 - INFO - Fold 5 data saved successfully.
2024-11-05 11:56:46,089 - INFO - Fold 6 data saved successfully.
2024-11-05 11:56:48,134 - INFO - Fold 7 data saved successfully.
2024-11-05 11:56:50,237 - INFO - Fold 8 data saved successfully.
2024-11-05 11:56:52,359 - INFO - Fold 9 data saved successfully.
2024-11-05 11:56:54,423 - INFO - Fold 10 data saved successfully.


In [None]:
total_folds = 10
train_indices, test_indices = dl.Split_Sets(total_folds, EEG_crop)
# Ensure output directories exist
ensure_dir("EEG_Augemnted_Data/TrainData")
ensure_dir("EEG_Augemnted_Data/ValidData")
ensure_dir("EEG_Augemnted_Data/TestData")
kf = KFold(n_splits=10)
seed = 34  # 设定随机种子

for fold, (train_index, test_index) in enumerate(kf.split(SampleEn_EEG)):
    train_data, test_data = SampleEn_EEG[train_index], SampleEn_EEG[test_index]
    train_labels, test_labels = labels[train_index], labels[test_index]
    
    # 进一步划分训练集和验证集
    train_data_split, valid_data_split, train_labels_split, valid_labels_split = train_test_split(
        train_data, train_labels, test_size=0.1, random_state=seed, stratify=train_labels
    )
    # print(train_data_split.shape,train_labels_split.shape,valid_data_split.shape,valid_labels_split.shape)

    # 转换为 PyTorch 张量
    train_data_split = torch.tensor(train_data_split, dtype=torch.float32)
    valid_data_split = torch.tensor(valid_data_split, dtype=torch.float32)
    train_labels_split = torch.tensor(train_labels_split, dtype=torch.long)
    valid_labels_split = torch.tensor(valid_labels_split, dtype=torch.long)

    test_data = torch.tensor(test_data, dtype=torch.float32)
    test_labels = torch.tensor(test_labels, dtype=torch.long)

    # 创建TensorDatasets
    train_dataset = TensorDataset(train_data_split, train_labels_split)
    valid_dataset = TensorDataset(valid_data_split, valid_labels_split)
    test_dataset = TensorDataset(test_data, test_labels)

    # 保存数据和标签
    torch.save(train_dataset, f"EEG_Augemnted_Data/TrainData/train_data_{fold + 1}_fold_with_seed_{seed}.pth")
    torch.save(valid_dataset, f"EEG_Augemnted_Data/ValidData/valid_data_{fold + 1}_fold_with_seed_{seed}.pth")
    torch.save(test_dataset, f"EEG_Augemnted_Data/TestData/test_data_{fold + 1}_fold_with_seed_{seed}.pth.pth")
    logging.info(f"Fold {fold + 1} data saved successfully.")
