batchsize = 256
512 : 96.85
256 : 98.2
128 : 98.15
64 : 97.95
32 : 98.05

In [1]:
predicted_list = []
labels_list = []

In [2]:
# config.py

config = {
  "data": {
    "high_dim_path": 'data/MRI_0328.csv',
    "low_dim_path": 'data/ALL.csv',
    "labels_path": 'data/LABEL.csv',
    "brain_region_adjacency_path" : 'data/region_adjacency.csv',
    "batch_size": 64,
    "shuffle": False,
    "test_size":0.01,
    "val_size":0.2,
    "random_state":12345
  },
  "model": {
    "type": "Baseline_GCN", # 设置模型跑哪个
    "way_adjmatrix" : "zero", # 邻接矩阵如何构造
    "high_dim_input_size": 498 ,  
    "low_dim_input_size":17,
    "embedding_dim":128,
    "output_dim":2,
    "hidden_channels":128,
    "num_heads":4 ,
    "num_features":3 # 每个脑区的特征数
      
  },
  "train": {
    "repeat_times" :1, # 简单完成交叉验证的任务
    "epochs": 10,
    "learning_rate": 0.005,
    "device": "cuda:1"
  },
  "earlystopping":{
    "is_on":False,
    "patience":5,
    "delta":0.01
  }
}

In [3]:
# 集成版本

import torch
import torch.nn as nn
import torch.optim as optim
# import json
from data.data_loader import load_and_align_data, create_data_loader
from models.model import *
from utils import *  # 假设你有评估和早停的辅助函数
from torch_geometric.data import Data          
import torch.optim.lr_scheduler as lr_scheduler
from torch_geometric.utils import dense_to_sparse
#from config import config
import random
import numpy as np
import pandas as pd

# 设置训练设备
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")

# 结果的接收器
random.seed(99)
result = []
repeat_times = config['train']['repeat_times']
random_state = config['data']['random_state']

if repeat_times != 1:
    random_state = [random.randint(0, 10000) for _ in range(config['train']['repeat_times'])]
    print(f'共分割样本{repeat_times}次，随机数种子为：{random_state}')
elif repeat_times == 1:
    random_state = [config['data']['random_state']]
    print(f'仅进行{repeat_times}次分割样本，随机数种子为：{random_state}')
    
brain_adj_matrix = read_brain_region_adjacency(config["data"]["brain_region_adjacency_path"])
brain_edge_index,_ = dense_to_sparse(brain_adj_matrix)

for seed in random_state:
    # 加载数据并创建数据集
    train_dataset, val_dataset, _ = load_and_align_data(high_dim_path = config['data']['high_dim_path'], 
                                                      low_dim_path = config['data']['low_dim_path'],
                                                      labels_path = config['data']['labels_path'],
                                                      test_size = config['data']['test_size'],
                                                      val_size = config['data']['val_size'],
                                                      random_state = seed)
    
    # 创建数据加载器
    train_loader = create_data_loader(train_dataset, batch_size=config['data']['batch_size'], shuffle=config['data']['shuffle'])
    val_loader = create_data_loader(val_dataset, batch_size=config['data']['batch_size'], shuffle=config['data']['shuffle'])
    
    if config['model']['type'] == 'Baseline_GCN':
        print(f"Model:{config['model']['type']}")
        model = Baseline_GCN(high_dim_input_size=config["model"]["high_dim_input_size"],  # 适当调整这些参数，这里可以写成config
                     low_dim_input_size=config["model"]["low_dim_input_size"],
                     embedding_dim=config["model"]["embedding_dim"],
                     output_dim=config["model"]["output_dim"],  # 根据您的任务调整
                     hidden_channels=config["model"]["hidden_channels"]).to(device)
    elif config['model']['type'] == 'Baseline_MLP':
        print(f"Model:{config['model']['type']}")
        model = Baseline_MLP(high_dim_input_size=config["model"]["high_dim_input_size"],  # 适当调整这些参数，这里可以写成config
                     low_dim_input_size=config["model"]["low_dim_input_size"],
                     embedding_dim=config["model"]["embedding_dim"],
                     output_dim=config["model"]["output_dim"],  # 根据您的任务调整
                     hidden_channels=config["model"]["hidden_channels"]).to(device)
    elif config['model']['type'] == 'high_low_MLP':
        print(f"Model:{config['model']['type']}")
        model = high_low_MLP(high_dim_input_size=config["model"]["high_dim_input_size"],  # 适当调整这些参数，这里可以写成config
                     low_dim_input_size=config["model"]["low_dim_input_size"],
                     embedding_dim=config["model"]["embedding_dim"],
                     output_dim=config["model"]["output_dim"],  # 根据您的任务调整
                     hidden_channels=config["model"]["hidden_channels"]).to(device)
    elif config['model']['type'] == 'only_high_MLP':
        print(f"Model:{config['model']['type']}")
        model = only_high_MLP(high_dim_input_size=config["model"]["high_dim_input_size"],  # 适当调整这些参数，这里可以写成config
                     low_dim_input_size=config["model"]["low_dim_input_size"],
                     embedding_dim=config["model"]["embedding_dim"],
                     output_dim=config["model"]["output_dim"],  # 根据您的任务调整
                     hidden_channels=config["model"]["hidden_channels"]).to(device)
    elif config['model']['type'] == 'only_low_MLP':
        print(f"Model:{config['model']['type']}")
        model = only_high_MLP(high_dim_input_size=config["model"]["high_dim_input_size"],  # 适当调整这些参数，这里可以写成config
                     low_dim_input_size=config["model"]["low_dim_input_size"],
                     embedding_dim=config["model"]["embedding_dim"],
                     output_dim=config["model"]["output_dim"],  # 根据您的任务调整
                     hidden_channels=config["model"]["hidden_channels"]).to(device)
    elif config['model']['type'] == 'Only_high_GCN':
        print(f"Model:{config['model']['type']}")
        model = Only_high_GCN(high_dim_input_size=config["model"]["high_dim_input_size"],  # 适当调整这些参数，这里可以写成config
                     low_dim_input_size=config["model"]["low_dim_input_size"],
                     embedding_dim=config["model"]["embedding_dim"],
                     output_dim=config["model"]["output_dim"],  # 根据您的任务调整
                     hidden_channels=config["model"]["hidden_channels"]).to(device)
    elif config['model']['type'] == 'test_high':
        print(f"Model:{config['model']['type']}")
        model = test_high(high_dim_input_size=config["model"]["high_dim_input_size"],  # 适当调整这些参数，这里可以写成config
                     low_dim_input_size=config["model"]["low_dim_input_size"],
                     embedding_dim=config["model"]["embedding_dim"],
                     output_dim=config["model"]["output_dim"],  # 根据您的任务调整
                     hidden_channels=config["model"]["hidden_channels"]).to(device)
    elif config['model']['type'] == 'test_high_correlation':
        print(f"Model:{config['model']['type']}")
        model = test_high_correlation(high_dim_input_size=config["model"]["high_dim_input_size"],  # 适当调整这些参数，这里可以写成config
                     low_dim_input_size=config["model"]["low_dim_input_size"],
                     embedding_dim=config["model"]["embedding_dim"],
                     output_dim=config["model"]["output_dim"],  # 根据您的任务调整
                     hidden_channels=config["model"]["hidden_channels"]).to(device)
    elif config['model']['type'] == 'BrainGNN':
        print(f"Model:{config['model']['type']}")
        model = BrainGNN(high_dim_input_size=config["model"]["high_dim_input_size"],  # 适当调整这些参数，这里可以写成config
                     low_dim_input_size=config["model"]["low_dim_input_size"],
                     embedding_dim=config["model"]["embedding_dim"],
                     output_dim=config["model"]["output_dim"],  # 根据您的任务调整
                     hidden_channels=config["model"]["hidden_channels"],
                     num_features=config["model"]["num_features"]
                        ).to(device)
    else:
        print(f"Not found Model:{config['model']['type']}")
    
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=config['train']['learning_rate'])
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.3, patience=5)
    
    # 初始化早停对象
    early_stopping = EarlyStopping(patience=config["earlystopping"]["patience"], delta=config["earlystopping"]["delta"])
    
    best_val_loss = float('inf')
    
    for epoch in range(config['train']['epochs']):
        model.train()
        total_loss = 0
        correct = 0
        total = 0
        for high_dim_features, low_dim_features, labels in train_loader: 
            batch_size = high_dim_features.size(0)  # 获取当前批次的大小
            
            # 为当前批次生成全连接的邻接矩阵
            if config["model"]["way_adjmatrix"] == 'zero':
                adj_matrix = torch.zeros((batch_size, batch_size))
            elif config["model"]["way_adjmatrix"] == 'only_three':
                adj_matrix = build_adj_matrix_only_three(high_dim_features,low_dim_features,sigma=1)
            elif config["model"]["way_adjmatrix"] == 'similarity_add_three':
                adj_matrix = build_adj_matrix_similarity_add_three(high_dim_features,low_dim_features,sigma=1)
            elif config["model"]["way_adjmatrix"] == 'sex':
                adj_matrix = build_adj_matrix_sex(high_dim_features,low_dim_features,sigma=1)
            elif config["model"]["way_adjmatrix"] == 'apoe':
                adj_matrix = build_adj_matrix_apoe(high_dim_features,low_dim_features,sigma=1)
            elif config["model"]["way_adjmatrix"] == 'mmse':
                adj_matrix = build_adj_matrix_mmse(high_dim_features,low_dim_features,sigma=1)
            edge_index, _ = dense_to_sparse(adj_matrix)
            
            # 准备数据
            high_dim_features = high_dim_features.to(device)
            low_dim_features = low_dim_features.float().to(device)
            labels = labels.to(device)
            
            high_dim_cov_matrix = cov_builder(high_dim_features , labels).to(device)

            brain_edge_index = brain_edge_index.to(device)
    
            # 前向传播
            outputs = model(high_dim_features, low_dim_features, edge_index)#, high_dim_cov_matrix) #edge_index, high_dim_cov_matrix)
            # outputs = model(high_dim_features, low_dim_features, edge_index, high_dim_cov_matrix)
            loss = criterion(outputs, labels)
    
            # 反向传播和优化
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
        train_loss = total_loss / len(train_loader)
        train_acc = 100 * correct / total
    
        # 计算验证集上的损失
        model.eval()
        val_total_loss = 0
        val_correct = 0
        val_total = 0
        val_acc_list = []
    
        with torch.no_grad():
            for high_dim_features, low_dim_features, labels in val_loader: 
                batch_size = high_dim_features.size(0)  # 获取当前批次的大小
                
                # 为当前批次生成全连接的邻接矩阵
                if config["model"]["way_adjmatrix"] == 'zero':
                    adj_matrix = torch.zeros((batch_size, batch_size))
                elif config["model"]["way_adjmatrix"] == 'only_three':
                    adj_matrix = build_adj_matrix_only_three(high_dim_features,low_dim_features,sigma=1)
                elif config["model"]["way_adjmatrix"] == 'similarity_add_three':
                    adj_matrix = build_adj_matrix_similarity_add_three(high_dim_features,low_dim_features,sigma=1)
                elif config["model"]["way_adjmatrix"] == 'sex':
                    adj_matrix = build_adj_matrix_sex(high_dim_features,low_dim_features,sigma=1)
                elif config["model"]["way_adjmatrix"] == 'apoe':
                    adj_matrix = build_adj_matrix_apoe(high_dim_features,low_dim_features,sigma=1)
                elif config["model"]["way_adjmatrix"] == 'mmse':
                    adj_matrix = build_adj_matrix_mmse(high_dim_features,low_dim_features,sigma=1)
                edge_index, _ = dense_to_sparse(adj_matrix)
                    
                high_dim_features = high_dim_features.to(device)
                low_dim_features = low_dim_features.float().to(device)
                labels = labels.to(device)
                edge_index = edge_index.to(device)
                
                high_dim_cov_matrix = cov_builder(high_dim_features , labels).to(device)
                
                outputs = model(high_dim_features, low_dim_features, edge_index)#,high_dim_cov_matrix)
                #outputs = model(high_dim_features, low_dim_features, edge_index,high_dim_cov_matrix)

                loss = criterion(outputs, labels)
                val_total_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
                
        val_loss =  val_total_loss/len(val_loader)
        val_acc = 100 * val_correct / val_total
        scheduler.step(val_loss)
        val_acc_list.append(val_acc)
        print(f'Epoch {epoch+1}, Train Loss: {train_loss:.6f}, Train Acc: {train_acc:.6f}, Val Loss: {val_loss:.6f}, Val Acc: {val_acc:.6f}')
        # print(f'Wrong_sample:{val_total-val_correct},Index:{[index for index, (item1, item2) in enumerate(zip(predicted, labels)) if item1 != item2]}')
        # print(predicted)
        
        if config["earlystopping"]["is_on"]:
            early_stopping(val_loss,model)
            if  early_stopping.early_stop:
                print("Early stopping")
                break
    predicted_list = predicted_list + predicted.tolist()
    labels_list = labels_list + labels.tolist()
                
    result.append(max(val_acc_list))
print(f'Finished! \n Acc:{np.mean(result),max(result)}, \n list:{result}')

仅进行1次分割样本，随机数种子为：[12345]
Model:Baseline_GCN
Epoch 1, Train Loss: 0.300169, Train Acc: 85.686275, Val Loss: 0.525677, Val Acc: 78.125000
Epoch 2, Train Loss: 0.145822, Train Acc: 94.901961, Val Loss: 0.514913, Val Acc: 85.937500
Epoch 3, Train Loss: 0.102226, Train Acc: 95.882353, Val Loss: 0.329462, Val Acc: 89.062500
Epoch 4, Train Loss: 0.053207, Train Acc: 97.647059, Val Loss: 0.365220, Val Acc: 89.843750
Epoch 5, Train Loss: 0.037812, Train Acc: 99.019608, Val Loss: 0.386690, Val Acc: 89.062500
Epoch 6, Train Loss: 0.021932, Train Acc: 99.607843, Val Loss: 0.348850, Val Acc: 89.843750
Epoch 7, Train Loss: 0.021718, Train Acc: 99.607843, Val Loss: 0.359640, Val Acc: 90.625000
Epoch 8, Train Loss: 0.016049, Train Acc: 99.607843, Val Loss: 0.484167, Val Acc: 90.625000
Epoch 9, Train Loss: 0.009881, Train Acc: 99.803922, Val Loss: 0.580955, Val Acc: 88.281250
Epoch 10, Train Loss: 0.006969, Train Acc: 100.000000, Val Loss: 0.574343, Val Acc: 89.843750
Finished! 
 Acc:(89.84375, 89.8437

In [4]:
len(predicted_list) #2000
predicted_list.count(0)
predicted_list.count(1)
labels_list.count(0)
sum([1 for label, predicted in zip(labels_list, predicted_list) if label == predicted])/len(predicted_list)

0.890625

In [5]:
predicted = torch.tensor(predicted_list)
labels = torch.tensor(labels_list)

# 确定类别总数
num_classes = torch.max(torch.cat((predicted, labels))) + 1

# 初始化混淆矩阵
confusion_matrix = torch.zeros(num_classes, num_classes)

# 填充混淆矩阵
for t, p in zip(labels.view(-1), predicted.view(-1)):
    confusion_matrix[t.long(), p.long()] += 1

confusion_matrix

tensor([[25.,  5.],
        [ 2., 32.]])

In [6]:
# 计算性能指标
TP = int(confusion_matrix[0][0])
FP = int(confusion_matrix[0][1])
FN = int(confusion_matrix[1][0])
TN = int(confusion_matrix[1][1])

# 精确率 Precision
precision = TP / (TP + FP)
# 召回率 Recall
recall = TP / (TP + FN)
# F1分数
F1 = 2 * (precision * recall) / (precision + recall)

# 计算总体准确率
accuracy = (TP + TN) / (TP + TN + FN + FP)

print(f'Accuracy: {accuracy:.4f}')
print(f'Precision per class: {precision:.4f}')
print(f'Recall per class: {recall:.4f}')
print(f'F1 Score per class: {F1:.4f}')

Accuracy: 0.8906
Precision per class: 0.8333
Recall per class: 0.9259
F1 Score per class: 0.8772
