In [50]:
import os
import random
from matplotlib import axis
import scipy.io as sio
import numpy as np
import pandas as pd 
from mne.preprocessing import ICA, create_eog_epochs, create_ecg_epochs
import joblib
import torch
from torch.utils.data import TensorDataset
from sklearn.model_selection import KFold, train_test_split
import model as dl  # Ensure this module contains necessary utility functions
import logging
from mne.preprocessing import ICA
from sklearn.metrics import confusion_matrix
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [51]:
def ensure_dir(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)
def calculate_metrics(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    TN, FP, FN, TP = cm.ravel()
    
    accuracy = (TP + TN) / (TP + TN + FP + FN)
    sensitivity = TP / (TP + FN)
    specificity = TN / (TN + FP)
    
    return accuracy, sensitivity, specificity


In [52]:
def importAndCropData(file_paths, duration, labels,partition):
    EEG_list = []
    for i, file in enumerate(file_paths):
        try:
            raw =pd.read_csv(file)
            data = np.array(raw)[:,1:]
            data = data[partition]
            channels = len(partition)
            feature = data.shape[1]
            data_new = data.reshape(1, channels, feature)
            EEG_list.append(data_new)
            logging.info(f"Processed file {file} done")
        except Exception as e:
            logging.error(f"Error processing file {file}: {e}")
            continue

    if not EEG_list:
        raise ValueError("No data was loaded. Please check the file paths and formats.")
    EEG = np.concatenate(EEG_list)
    logging.info(f"Total epochs: {EEG.shape[0]}, Normal: {np.sum(labels == 1)}, "
            f"MCI: {np.sum(labels == 0)}")
    return EEG

In [53]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 使用 GPU
seed = 34
dl.seed_everything(seed)
# EEG data parameters
duration = 2500

from itertools import combinations

# 初始脑区定义
regions = {
    "prefrontal": [0, 1, 2, 3, 10, 11, 16],
    "central": [4, 5, 17],
    "temporal": [12, 13, 14, 15],
    "parietal": [6, 7, 18],
    "occipital": [8, 9]
}

# 自动生成多脑区组合
def generate_combinations(regions, sizes):
    combined_regions = {}
    region_names = list(regions.keys())

    # 遍历指定组合大小
    for size in sizes:
        for combination in combinations(region_names, size):
            combined_name = "_".join(combination)  # 组合名称
            combined_indices = sorted(set().union(*(regions[region] for region in combination)))  # 合并去重
            combined_regions[combined_name] = combined_indices

    return combined_regions

# 生成所有二、三、四脑区组合
regions = generate_combinations(regions, sizes=[1,2,3,4,5])
# 动态获取变量值
partition = "prefrontal_occipital"

In [54]:
import os
import warnings
# 忽略 RuntimeWarning 警告
warnings.filterwarnings("ignore", category=RuntimeWarning)
# 定义文件夹路径
base_dir = '糖尿病数据分段'
normal_dir = os.path.join(base_dir, '认知正常\\')
impaired_dir = os.path.join(base_dir, '认知障碍\\')

# 获取所有的文件路径

normal_files_names = [os.path.join(normal_dir, f) for f in os.listdir(normal_dir) ]
normal_files = [os.path.join(x, f) for x in  normal_files_names for f in os.listdir(x ) if f.endswith('.csv')]
impaired_files_names = [os.path.join(impaired_dir, f) for f in os.listdir(impaired_dir)]
impaired_files = [os.path.join(x, f) for x in  impaired_files_names for f in os.listdir(x ) if f.endswith('.csv')]

all_files = normal_files + impaired_files
labels = np.concatenate([np.zeros(len(normal_files)),np.ones(len(impaired_files))],axis=0)


# 将 all_files 和 label_single 中的元素按相同顺序打乱
combined = list(zip(all_files, labels))
random.shuffle(combined)
all_files[:], labels[:] = zip(*combined)


all_data = importAndCropData(all_files, duration,labels,regions[partition])
# prefrontal_data = importAndCropData(all_files, duration,labels,regions["prefrontal"])
# central_data = importAndCropData(all_files, duration,labels,regions["central"])
# temporal_data = importAndCropData(all_files, duration,labels,regions["temporal"])
# parietal_data = importAndCropData(all_files, duration,labels,regions["parietal"])
# occipital_data = importAndCropData(all_files, duration,labels,regions["occipital"])

2025-01-08 17:18:33,991 - INFO - Processed file 糖尿病数据分段\认知正常\王殿来\王殿来_130.csv done
2025-01-08 17:18:34,011 - INFO - Processed file 糖尿病数据分段\认知障碍\张立志\张立志_109.csv done
2025-01-08 17:18:34,051 - INFO - Processed file 糖尿病数据分段\认知正常\余洪涛\余洪涛_179.csv done
2025-01-08 17:18:34,071 - INFO - Processed file 糖尿病数据分段\认知正常\张金艳\张金艳_260.csv done
2025-01-08 17:18:34,091 - INFO - Processed file 糖尿病数据分段\认知障碍\曹玉华\曹玉华_97.csv done
2025-01-08 17:18:34,108 - INFO - Processed file 糖尿病数据分段\认知正常\果春胜\果春胜_89.csv done
2025-01-08 17:18:34,125 - INFO - Processed file 糖尿病数据分段\认知障碍\王翠兰\王翠兰_137.csv done
2025-01-08 17:18:34,140 - INFO - Processed file 糖尿病数据分段\认知障碍\王秋平\王秋平_94.csv done
2025-01-08 17:18:34,160 - INFO - Processed file 糖尿病数据分段\认知正常\王淑艳\王淑艳_181.csv done
2025-01-08 17:18:34,178 - INFO - Processed file 糖尿病数据分段\认知障碍\陈艳杰\陈艳杰_198.csv done
2025-01-08 17:18:34,195 - INFO - Processed file 糖尿病数据分段\认知正常\张秀云\张秀云_125.csv done
2025-01-08 17:18:34,211 - INFO - Processed file 糖尿病数据分段\认知正常\张秀云\张秀云_184.csv done
2025-01-08 17:18:34

In [55]:
# from sklearn.svm import SVC
# from sklearn.preprocessing import StandardScaler
# from sklearn.model_selection import train_test_split
# X_data = occipital_data
# X_flattened = X_data.reshape(X_data.shape[0], -1)
# # X_flattened = np.nan_to_num(X_flattened, nan=0.0)
# y = labels  # 使用相同的标签

# # 数据集划分
# X_train, X_test, y_train, y_test = train_test_split(X_flattened, y, test_size=0.3, random_state=1)

# # 数据标准化
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

# # 创建SVM分类器
# svm_classifier = SVC(kernel='linear')  # 使用线性核
# svm_classifier.fit(X_train, y_train)
# y_pred = svm_classifier.predict(X_test)
# acc, sens, spec = calculate_metrics(y_test, y_pred)

In [56]:
# import matplotlib.pyplot as plt
# from sklearn.metrics import confusion_matrix
# from sklearn.svm import SVC
# from sklearn.preprocessing import StandardScaler
# from sklearn.neural_network import MLPClassifier
# from sklearn.linear_model import LinearRegression
# from sklearn.linear_model import LogisticRegression
# from sklearn.datasets import make_classification
# from sklearn.model_selection import train_test_split
# import numpy as np

# # 计算各项指标
# def calculate_metrics(y_true, y_pred):
#     cm = confusion_matrix(y_true, y_pred)
#     TN, FP, FN, TP = cm.ravel()
    
#     accuracy = (TP + TN) / (TP + TN + FP + FN)
#     sensitivity = TP / (TP + FN)
#     specificity = TN / (TN + FP)
    
#     return accuracy, sensitivity, specificity

# # 绘制三幅柱状图
# def plot_bar_charts(accuracy, sensitivity, specificity, regions):
#     x = np.arange(len(regions))  # 各脑区的索引位置
    
#     # 创建图形对象
#     fig, ax = plt.subplots(1, 3, figsize=(18, 6))
    
#     # 绘制准确性柱状图
#     ax[0].bar(x, accuracy, color='blue', alpha=0.7)
#     ax[0].set_title('Accuracy')
#     ax[0].set_xlabel('Region')
#     ax[0].set_ylabel('Accuracy')
#     ax[0].set_xticks(x)
#     ax[0].set_xticklabels(regions)
#     ax[0].grid(True)
    
#     # 绘制敏感性柱状图
#     ax[1].bar(x, sensitivity, color='green', alpha=0.7)
#     ax[1].set_title('Sensitivity')
#     ax[1].set_xlabel('Region')
#     ax[1].set_ylabel('Sensitivity')
#     ax[1].set_xticks(x)
#     ax[1].set_xticklabels(regions)
#     ax[1].grid(True)
    
#     # 绘制特异性柱状图
#     ax[2].bar(x, specificity, color='red', alpha=0.7)
#     ax[2].set_title('Specificity')
#     ax[2].set_xlabel('Region')
#     ax[2].set_ylabel('Specificity')
#     ax[2].set_xticks(x)
#     ax[2].set_xticklabels(regions)
#     ax[2].grid(True)
    
#     # 调整布局
#     plt.tight_layout()
#     plt.show()

# # 假设你已经加载了数据
# regions = ["all", "prefrontal", "central", "temporal", "parietal", "occipital"]

# # 存储每个脑区模型的指标
# accuracy = []
# sensitivity = []
# specificity = []

# # 模型训练与评估
# for region in regions:
#     # 根据区域选择数据
#     if region == "all":
#         X_data = all_data
#     elif region == "prefrontal":
#         X_data = prefrontal_data
#     elif region == "central":
#         X_data = central_data
#     elif region == "temporal":
#         X_data = temporal_data
#     elif region == "parietal":
#         X_data = parietal_data
#     elif region == "occipital":
#         X_data = occipital_data
    
#     # 展平数据
#     X_flattened = X_data.reshape(X_data.shape[0], -1)
#     # X_flattened = np.nan_to_num(X_flattened, nan=0.0)
#     y = labels  # 使用相同的标签

#     # 数据集划分
#     X_train, X_test, y_train, y_test = train_test_split(X_flattened, y, test_size=0.3, random_state=1)
    
#     # 数据标准化
#     scaler = StandardScaler()
#     X_train = scaler.fit_transform(X_train)
#     X_test = scaler.transform(X_test)
    
#     # 创建SVM分类器
#     # svm_classifier = SVC(kernel='linear')  # 使用线性核
#     # svm_classifier.fit(X_train, y_train)
#     # y_pred = svm_classifier.predict(X_test)


#     # MLP
#     # mlp = MLPClassifier(hidden_layer_sizes=(10, 10), max_iter=300, alpha=1e-4,
#     #                 solver='adam', verbose=10, random_state=999,
#     #                 learning_rate_init=.001)
#     # mlp.fit(X_train, y_train)
#     # y_pred = mlp.predict(X_test) 

#     mlr = LogisticRegression()
#     mlr.fit(X_train, y_train)
#     y_pred = mlr.predict(X_test)
    
#     # 计算指标
#     acc, sens, spec = calculate_metrics(y_test, y_pred)
    
#     # 存储指标
#     accuracy.append(acc)
#     sensitivity.append(sens)
#     specificity.append(spec)

# # 绘制柱状图
# plot_bar_charts(accuracy, sensitivity, specificity, regions)


In [57]:
train_indices, test_indices = dl.Split_Sets(10, all_data)

# Ensure output directories exist
ensure_dir("EEGDataTNBRaw/"+str(partition)+"/TrainData")
ensure_dir("EEGDataTNBRaw/"+str(partition)+"/ValidData")
ensure_dir("EEGDataTNBRaw/"+str(partition)+"/TestData")

for fold in range(10):
    try:
        # Split into training and test sets
        train_idx = train_indices[fold]
        test_idx = test_indices[fold]

        train_data = all_data[train_idx,:, : ] 
        train_labels = labels[train_idx]
        test_data = all_data[test_idx,:, : ]
        test_labels = labels[test_idx]
        
        # Further split training data into train and validation sets
        train_data_split, valid_data_split, train_labels_split, valid_labels_split = train_test_split(
            train_data, train_labels, test_size=0.1, random_state=seed, stratify=train_labels
        )
        # print(train_data_split.shape,train_labels_split.shape,valid_data_split.shape,valid_labels_split.shape)
        # Convert to PyTorch tensors
        train_tensor = torch.from_numpy(train_data_split).float() # (samples, channels, duration)
        train_labels_tensor = torch.from_numpy(train_labels_split).long()

        valid_tensor = torch.from_numpy(valid_data_split).float()
        valid_labels_tensor = torch.from_numpy(valid_labels_split).long()

        test_tensor = torch.from_numpy(test_data).float()
        test_labels_tensor = torch.from_numpy(test_labels).long()

        # Create TensorDatasets
        train_dataset = TensorDataset(train_tensor, train_labels_tensor)
        valid_dataset = TensorDataset(valid_tensor, valid_labels_tensor)
        test_dataset = TensorDataset(test_tensor, test_labels_tensor)

        # Save datasets
        torch.save(train_dataset, "EEGDataTNBRaw/"+str(partition)+f"/TrainData/train_data_{fold + 1}_fold_with_seed_{seed}.pth")
        torch.save(valid_dataset, "EEGDataTNBRaw/"+str(partition)+f"/ValidData/valid_data_{fold + 1}_fold_with_seed_{seed}.pth")
        torch.save(test_dataset, "EEGDataTNBRaw/"+str(partition)+f"/TestData/test_data_{fold + 1}_fold_with_seed_{seed}.pth")

        logging.info(f"Fold {fold + 1} data saved successfully.")
    except Exception as e:
        logging.error(f"Error processing fold {fold + 1}: {e}")


2025-01-08 17:23:51,296 - INFO - Fold 1 data saved successfully.
2025-01-08 17:23:52,829 - INFO - Fold 2 data saved successfully.
2025-01-08 17:23:54,283 - INFO - Fold 3 data saved successfully.
2025-01-08 17:23:55,708 - INFO - Fold 4 data saved successfully.
2025-01-08 17:23:57,316 - INFO - Fold 5 data saved successfully.
2025-01-08 17:23:58,924 - INFO - Fold 6 data saved successfully.
2025-01-08 17:24:00,530 - INFO - Fold 7 data saved successfully.
2025-01-08 17:24:02,103 - INFO - Fold 8 data saved successfully.
2025-01-08 17:24:03,705 - INFO - Fold 9 data saved successfully.
2025-01-08 17:24:05,293 - INFO - Fold 10 data saved successfully.
