In [1]:
# Delete .files 
import os

def delete_dot_files(root_dir):
    """
    递归删除 root_dir 及其子目录下所有以 '.' 开头的文件（隐藏文件）。
    """
    for dirpath, dirnames, filenames in os.walk(root_dir):
        for fname in filenames:
            if fname.startswith('.'):
                full_path = os.path.join(dirpath, fname)
                try:
                    os.remove(full_path)
                    print(f"Deleted: {full_path}")
                except Exception as e:
                    print(f"Failed to delete {full_path}: {e}")

if __name__ == "__main__":
    # 将下面路径修改为你想要清理的根目录
    target_directory = "/workspace/nnUNet_raw/Dataset005_BraTS2023_PED"
    delete_dot_files(target_directory)


Deleted: /workspace/nnUNet_raw/Dataset005_BraTS2023_PED/._dataset.json
Deleted: /workspace/nnUNet_raw/Dataset005_BraTS2023_PED/imagesTr/._BraTS-PED-00002-000_0000.nii.gz
Deleted: /workspace/nnUNet_raw/Dataset005_BraTS2023_PED/imagesTr/._BraTS-PED-00002-000_0001.nii.gz
Deleted: /workspace/nnUNet_raw/Dataset005_BraTS2023_PED/imagesTr/._BraTS-PED-00002-000_0002.nii.gz
Deleted: /workspace/nnUNet_raw/Dataset005_BraTS2023_PED/imagesTr/._BraTS-PED-00002-000_0003.nii.gz
Deleted: /workspace/nnUNet_raw/Dataset005_BraTS2023_PED/imagesTr/._BraTS-PED-00003-000_0000.nii.gz
Deleted: /workspace/nnUNet_raw/Dataset005_BraTS2023_PED/imagesTr/._BraTS-PED-00003-000_0001.nii.gz
Deleted: /workspace/nnUNet_raw/Dataset005_BraTS2023_PED/imagesTr/._BraTS-PED-00003-000_0002.nii.gz
Deleted: /workspace/nnUNet_raw/Dataset005_BraTS2023_PED/imagesTr/._BraTS-PED-00003-000_0003.nii.gz
Deleted: /workspace/nnUNet_raw/Dataset005_BraTS2023_PED/imagesTr/._BraTS-PED-00004-000_0000.nii.gz
Deleted: /workspace/nnUNet_raw/Dataset

In [None]:
#5 PED
# Update dataset.json
import os
import json

# === 用户需要修改的部分 ===
# 指向 nnUNet_raw 下的 Dataset 文件夹
dataset_root = "/workspace/nnUNet_raw/Dataset005_BraTS2023_PED"

# dataset.json 中的 “name”和 “description”
dataset_name = "Dataset005_BraTS2023_PED"
description = "nnU-Net dataset 6: BraTS2023_MET"

# 通道映射（必须是字符串键："0","1","2","3" → 对应模态名称）
channel_names = {
    "0": "T1C",
    "1": "T1N",
    "2": "T2F",
    "3": "T2W"
}

# 文件后缀
file_ending = ".nii.gz"

# “labels” 字段必须包含 "background": 0，且其他类别从 1 开始连续
labels_mapping = {
    "background": 0,
    "1": 1,
    "2": 2,
    "3": 3
}
# ==================================

# 拼接子文件夹路径
imagesTr_dir = os.path.join(dataset_root, "imagesTr")
labelsTr_dir = os.path.join(dataset_root, "labelsTr")
imagesTs_dir = os.path.join(dataset_root, "imagesTs")

# ------------------------------------------------------------------------------
# 1. 构造 training 列表：跳过隐藏文件，确保每个 case 的所有通道图像和对应 label 都存在
# ------------------------------------------------------------------------------
training_list = []
seen_cases = set()

for fname in os.listdir(imagesTr_dir):
    # 跳过以 '.' 开头的隐藏文件（如 .DS_Store 或 ._xxx）
    if fname.startswith("."):
        continue
    # 只匹配通道 0 文件名："<case_id>_0000.nii.gz"
    if not fname.endswith(f"_0000{file_ending}"):
        continue

    case_id = fname.replace(f"_0000{file_ending}", "")
    if case_id.startswith(".") or case_id in seen_cases:
        continue
    seen_cases.add(case_id)

    # 检查对应的 label 是否存在
    label_path = os.path.join(labelsTr_dir, case_id + file_ending)
    if not os.path.isfile(label_path):
        print(f"跳过（缺少 label 文件）：{case_id}")
        continue

    # 检查所有通道图像是否都存在
    image_files = []
    ok = True
    for ch_key in sorted(channel_names, key=lambda x: int(x)):
        img_name = f"{case_id}_{int(ch_key):04d}{file_ending}"
        img_path = os.path.join(imagesTr_dir, img_name)
        if not os.path.isfile(img_path):
            print(f"跳过（缺少通道图像）：{case_id}，缺少 {img_name}")
            ok = False
            break
        image_files.append(img_name)
    if not ok:
        continue

    training_list.append({
        "image": image_files,
        "label": case_id + file_ending
    })

# ------------------------------------------------------------------------------
# 2. 构造 test 列表（若 imagesTs 存在），同样跳过隐藏文件并确保每个 case 的所有通道都存在
# ------------------------------------------------------------------------------
test_list = []
if os.path.isdir(imagesTs_dir):
    seen_test = set()
    for fname in os.listdir(imagesTs_dir):
        if fname.startswith("."):
            continue
        if not fname.endswith(f"_0000{file_ending}"):
            continue

        case_id = fname.replace(f"_0000{file_ending}", "")
        if case_id.startswith(".") or case_id in seen_test:
            continue
        seen_test.add(case_id)

        image_files = []
        ok = True
        for ch_key in sorted(channel_names, key=lambda x: int(x)):
            img_name = f"{case_id}_{int(ch_key):04d}{file_ending}"
            img_path = os.path.join(imagesTs_dir, img_name)
            if not os.path.isfile(img_path):
                print(f"跳过测试集（缺少通道图像）：{case_id}，缺少 {img_name}")
                ok = False
                break
            image_files.append(img_name)
        if not ok:
            continue

        test_list.append({ "image": image_files })

# ------------------------------------------------------------------------------
# 3. 合成 dataset.json 内容
# ------------------------------------------------------------------------------
dataset_json = {
    "name": dataset_name,
    "description": description,
    "tensorImageSize": "4D",
    "channel_names": channel_names,
    "file_ending": file_ending,
    "labels": labels_mapping,
    "numTraining": len(training_list),
    "numTest": len(test_list),
    "training": training_list
}
if test_list:
    dataset_json["test"] = test_list

# ------------------------------------------------------------------------------
# 4. 写入到 dataset.json（覆盖原文件）
# ------------------------------------------------------------------------------
out_path = os.path.join(dataset_root, "dataset.json")
with open(out_path, "w") as f:
    json.dump(dataset_json, f, indent=4, ensure_ascii=False)

print(f"✓ 已生成并保存：{out_path}")
print(f"共计 {len(training_list)} 个训练 case，{len(test_list)} 个测试 case。")



In [2]:
#!/usr/bin/env python3
# nnunet_v2_inference_api_batch_3d_with_bootstrap_metrics.py

import os
import torch
import numpy as np
import nibabel as nib
from glob import glob
from nnunetv2.inference.predict_from_raw_data import nnUNetPredictor
from contextlib import redirect_stdout
from scipy.ndimage import distance_transform_edt, binary_erosion

# ---------- 必填路径与参数 ----------
TASK_NAME        = "Dataset005_BraTS2023_PED"       # 你的数据集文件夹名（与 nnUNet_raw/nnUNet_preprocessed 一致）
FOLD_INDEX       = 0                                # 要使用的折数：0-4
MODEL_TYPE       = "3d_fullres"                     # 这里改为 3d_fullres（也可是 3d_lowres，如果你训练了低分辨率模型）
NNUNET_RAW_ROOT  = "/workspace/nnUNet_raw"           # nnUNet_raw 根目录
NNUNET_PREP_ROOT = "/workspace/nnUNet_preprocessed"  # nnUNet_preprocessed 根目录
NNUNET_RESULTS   = "/workspace/nnUNet_results"       # nnUNet_results 根目录
# ----------------------------------------

# --- 在导入 nnUNetPredictor 之前，先设置好环境变量 ----
os.environ["nnUNet_raw"]          = NNUNET_RAW_ROOT
os.environ["nnUNet_preprocessed"] = NNUNET_PREP_ROOT
os.environ["nnUNet_results"]      = NNUNET_RESULTS
# -----------------------------------------------------

# 1) 构造关键路径
trainer_dir = os.path.join(
    NNUNET_RESULTS,
    TASK_NAME,
    f"nnUNetTrainer__nnUNetPlans__{MODEL_TYPE}"
)

# imagesTs 与 labelsTs 目录保持不变，用来放待推理的 3D NIfTI 文件和（可选）真值标签
imagesTs = os.path.join(NNUNET_RAW_ROOT, TASK_NAME, "imagesTs")
labelsTs = os.path.join(NNUNET_RAW_ROOT, TASK_NAME, "labelsTs")

# 预测输出目录：我们把预测结果放在训练结果的子目录 predictions/fold_<FOLD_INDEX> 中
out_dir  = os.path.join(trainer_dir, "predictions", f"fold_{FOLD_INDEX}")
os.makedirs(out_dir, exist_ok=True)

# 2) 构建 3D 模型的 nnUNetPredictor 并加载已训练好的权重
predictor = nnUNetPredictor(
    tile_step_size=0.5,
    use_gaussian=True,
    use_mirroring=True,
    perform_everything_on_device=True,
    device=torch.device("cuda", 0),  # 指定 GPU 0；如需多卡，可设成 device_ids 列表
    verbose=False,               # 关闭 nnUNet 自带的详细日志
    verbose_preprocessing=False, # 预处理阶段也不打印日志
)

# 初始化 predictor，并加载 checkpoint（这里使用 "checkpoint_best.pth" 或者 "checkpoint_final.pth"）
predictor.initialize_from_trained_model_folder(
    trainer_dir,
    use_folds=(FOLD_INDEX,),
    checkpoint_name="checkpoint_best.pth"  # 或者 "checkpoint_final.pth"
)

# 3) 批量推理 imagesTs 下所有 .nii.gz 文件，并屏蔽 predictor 内部 print
print(f"\n>>> 开始批量推理（共 {len(os.listdir(imagesTs))} 个病例）…")
with open(os.devnull, "w") as devnull:
    # redirect_stdout 会把 predictor 内部所有 print 输出重定向到 /dev/null
    with redirect_stdout(devnull):
        predictor.predict_from_files(
            imagesTs,            # 传入整个 imagesTs 文件夹，nnU-Net 会自动遍历所有 .nii.gz
            out_dir,
            save_probabilities=False,  # 不保存概率图，若想保存请设为 True
            overwrite=True             # 若已经存在同名预测，则覆盖
        )
print(">>> 批量推理完成\n")

# 4) 如果存在 labelsTs，就做指标计算并使用 1000 次自助采样（bootstrap）
if os.path.isdir(labelsTs):
    gt_paths   = sorted(glob(os.path.join(labelsTs, "*.nii.gz")))
    pred_paths = sorted(glob(os.path.join(out_dir,    "*.nii.gz")))

    n_cases = len(gt_paths)
    if n_cases == 0:
        print("没有找到任何 .nii.gz 文件，跳过定量评估")
        exit(0)

    # 每个病例的平均指标列表
    dice_all_cases = []
    hd95_all_cases = []
    sens_all_cases = []
    spec_all_cases = []

    # 先逐病例计算平均指标
    for gt_path, pred_path in zip(gt_paths, pred_paths):
        gt_img = nib.load(gt_path).get_fdata().astype(np.int32)
        pr_img = nib.load(pred_path).get_fdata().astype(np.int32)

        classes = np.unique(gt_img)
        classes = classes[classes != 0]

        dice_per_class = []
        hd95_per_class = []
        sens_per_class = []
        spec_per_class = []

        total_voxels = gt_img.size

        for cls in classes:
            gt_mask = (gt_img == cls)
            pr_mask = (pr_img == cls)

            # 如果 GT 与预测都为空，则跳过
            if gt_mask.sum() == 0 and pr_mask.sum() == 0:
                continue

            # 计算 Dice
            inter = np.logical_and(gt_mask, pr_mask).sum()
            union = gt_mask.sum() + pr_mask.sum()
            dice = 2 * inter / (union + 1e-6)
            dice_per_class.append(dice)

            # 计算 Sensitivity（召回率）： TP / (TP + FN)
            TP = inter
            FN = gt_mask.sum() - TP
            sensitivity = TP / (TP + FN + 1e-6)
            sens_per_class.append(sensitivity)

            # 计算 Specificity： TN / (TN + FP)
            FP = pr_mask.sum() - TP
            TN = total_voxels - (TP + FP + FN)
            specificity = TN / (TN + FP + 1e-6)
            spec_per_class.append(specificity)

            # 计算 HD95（95th percentile Hausdorff Distance）
            if gt_mask.sum() > 0 and pr_mask.sum() > 0:
                gt_eroded = binary_erosion(gt_mask)
                gt_surface = gt_mask & (~gt_eroded)
                pr_eroded = binary_erosion(pr_mask)
                pr_surface = pr_mask & (~pr_eroded)

                dt_gt = distance_transform_edt(~gt_mask)
                dt_pr = distance_transform_edt(~pr_mask)

                dist_gt_to_pr = dt_pr[gt_surface]
                dist_pr_to_gt = dt_gt[pr_surface]

                all_surface_distances = np.concatenate((dist_gt_to_pr, dist_pr_to_gt))
                if all_surface_distances.size > 0:
                    hd95 = np.percentile(all_surface_distances, 95)
                else:
                    hd95 = 0.0
            else:
                hd95 = 0.0

            hd95_per_class.append(hd95)

        # 如果某个例子没有前景类别，则指标设为 0
        if len(dice_per_class) == 0:
            avg_dice = 0.0
            avg_hd95 = 0.0
            avg_sens = 0.0
            avg_spec = 0.0
        else:
            avg_dice = np.mean(dice_per_class)
            avg_hd95 = np.mean(hd95_per_class) if len(hd95_per_class) > 0 else 0.0
            avg_sens = np.mean(sens_per_class)
            avg_spec = np.mean(spec_per_class)

        dice_all_cases.append(avg_dice)
        hd95_all_cases.append(avg_hd95)
        sens_all_cases.append(avg_sens)
        spec_all_cases.append(avg_spec)

    dice_all_cases = np.array(dice_all_cases)
    hd95_all_cases = np.array(hd95_all_cases)
    sens_all_cases = np.array(sens_all_cases)
    spec_all_cases = np.array(spec_all_cases)

    # 4a) 原始平均 ± STD（基于样本直接计算）
    mean_dice_raw = np.mean(dice_all_cases)
    std_dice_raw  = np.std(dice_all_cases)
    mean_hd95_raw = np.mean(hd95_all_cases)
    std_hd95_raw  = np.std(hd95_all_cases)
    mean_sens_raw = np.mean(sens_all_cases)
    std_sens_raw  = np.std(sens_all_cases)
    mean_spec_raw = np.mean(spec_all_cases)
    std_spec_raw  = np.std(spec_all_cases)

    print(f"Fold {FOLD_INDEX} 原始指标（样本直接计算）：")
    print(f"  Dice:        {mean_dice_raw:.4f} ± {std_dice_raw:.4f}")
    print(f"  HD95:        {mean_hd95_raw:.4f} ± {std_hd95_raw:.4f}")
    print(f"  Sensitivity: {mean_sens_raw:.4f} ± {std_sens_raw:.4f}")
    print(f"  Specificity: {mean_spec_raw:.4f} ± {std_spec_raw:.4f}\n")

    # 4b) 使用 1000 次 Bootstrapping 计算指标的分布
    n_bootstrap = 1000
    bootstrap_means = {
        "dice": np.zeros(n_bootstrap, dtype=np.float32),
        "hd95": np.zeros(n_bootstrap, dtype=np.float32),
        "sens": np.zeros(n_bootstrap, dtype=np.float32),
        "spec": np.zeros(n_bootstrap, dtype=np.float32),
    }

    rng = np.random.default_rng(seed=42)
    for i in range(n_bootstrap):
        # 从 0..n_cases-1 中有放回地抽样
        idxs = rng.integers(low=0, high=n_cases, size=n_cases)
        bootstrap_means["dice"][i] = np.mean(dice_all_cases[idxs])
        bootstrap_means["hd95"][i] = np.mean(hd95_all_cases[idxs])
        bootstrap_means["sens"][i] = np.mean(sens_all_cases[idxs])
        bootstrap_means["spec"][i] = np.mean(spec_all_cases[idxs])

    # 计算 Bootstrap 平均值和标准差
    mean_dice_bs = np.mean(bootstrap_means["dice"])
    std_dice_bs  = np.std(bootstrap_means["dice"])
    mean_hd95_bs = np.mean(bootstrap_means["hd95"])
    std_hd95_bs  = np.std(bootstrap_means["hd95"])
    mean_sens_bs = np.mean(bootstrap_means["sens"])
    std_sens_bs  = np.std(bootstrap_means["sens"])
    mean_spec_bs = np.mean(bootstrap_means["spec"])
    std_spec_bs  = np.std(bootstrap_means["spec"])

    print(f"Fold {FOLD_INDEX} Bootstrap 指标（1000 次抽样后的均值 ± STD）：")
    print(f"  Dice:        {mean_dice_bs:.4f} ± {std_dice_bs:.4f}")
    print(f"  HD95:        {mean_hd95_bs:.4f} ± {std_hd95_bs:.4f}")
    print(f"  Sensitivity: {mean_sens_bs:.4f} ± {std_sens_bs:.4f}")
    print(f"  Specificity: {mean_spec_bs:.4f} ± {std_spec_bs:.4f}\n")

else:
    print("labelsTs 不存在，跳过定量评估")


nnUNet_raw is not defined and nnU-Net can only be used on data for which preprocessed files are already present on your system. nnU-Net cannot be used for experiment planning and preprocessing like this. If this is not intended, please read documentation/setting_up_paths.md for information on how to set this up properly.
nnUNet_preprocessed is not defined and nnU-Net can not be used for preprocessing or training. If this is not intended, please read documentation/setting_up_paths.md for information on how to set this up.
nnUNet_results is not defined and nnU-Net cannot be used for training or inference. If this is not intended behavior, please read documentation/setting_up_paths.md for information on how to set this up.

>>> 开始批量推理（共 76 个病例）…


100%|██████████| 8/8 [00:02<00:00,  2.99it/s]
100%|██████████| 8/8 [00:01<00:00,  5.64it/s]
100%|██████████| 8/8 [00:01<00:00,  5.64it/s]
100%|██████████| 8/8 [00:01<00:00,  5.64it/s]
100%|██████████| 4/4 [00:00<00:00,  5.99it/s]
100%|██████████| 8/8 [00:01<00:00,  5.64it/s]
100%|██████████| 8/8 [00:01<00:00,  5.64it/s]
100%|██████████| 8/8 [00:01<00:00,  5.64it/s]
100%|██████████| 8/8 [00:01<00:00,  5.64it/s]
100%|██████████| 8/8 [00:01<00:00,  5.64it/s]
100%|██████████| 8/8 [00:01<00:00,  5.64it/s]
100%|██████████| 12/12 [00:02<00:00,  5.53it/s]
100%|██████████| 8/8 [00:01<00:00,  5.64it/s]
100%|██████████| 8/8 [00:01<00:00,  5.64it/s]
100%|██████████| 8/8 [00:01<00:00,  5.64it/s]
100%|██████████| 8/8 [00:01<00:00,  5.64it/s]
100%|██████████| 8/8 [00:01<00:00,  5.64it/s]
100%|██████████| 8/8 [00:01<00:00,  5.64it/s]
100%|██████████| 8/8 [00:01<00:00,  5.64it/s]


>>> 批量推理完成

Fold 0 原始指标（样本直接计算）：
  Dice:        0.5939 ± 0.1926
  HD95:        11.3898 ± 12.8291
  Sensitivity: 0.6263 ± 0.1966
  Specificity: 0.9996 ± 0.0004

Fold 0 Bootstrap 指标（1000 次抽样后的均值 ± STD）：
  Dice:        0.5936 ± 0.0435
  HD95:        11.4318 ± 3.0490
  Sensitivity: 0.6256 ± 0.0452
  Specificity: 0.9996 ± 0.0001



In [None]:
#!/usr/bin/env python3
# nnunet_v2_inference_api_batch_3d_with_bootstrap_metrics.py

import os
import torch
import numpy as np
import nibabel as nib
from glob import glob
from nnunetv2.inference.predict_from_raw_data import nnUNetPredictor
from contextlib import redirect_stdout
from scipy.ndimage import distance_transform_edt, binary_erosion

# ---------- 必填路径与参数 ----------
TASK_NAME        = "Dataset005_BraTS2023_PED"       # 你的数据集文件夹名（与 nnUNet_raw/nnUNet_preprocessed 一致）
FOLD_INDEX       = 0                                # 要使用的折数：0-4
MODEL_TYPE       = "2d"                     # 这里改为 3d_fullres（也可是 3d_lowres，如果你训练了低分辨率模型）
NNUNET_RAW_ROOT  = "/workspace/nnUNet_raw"           # nnUNet_raw 根目录
NNUNET_PREP_ROOT = "/workspace/nnUNet_preprocessed"  # nnUNet_preprocessed 根目录
NNUNET_RESULTS   = "/workspace/nnUNet_results"       # nnUNet_results 根目录
# ----------------------------------------

# --- 在导入 nnUNetPredictor 之前，先设置好环境变量 ----
os.environ["nnUNet_raw"]          = NNUNET_RAW_ROOT
os.environ["nnUNet_preprocessed"] = NNUNET_PREP_ROOT
os.environ["nnUNet_results"]      = NNUNET_RESULTS
# -----------------------------------------------------

# 1) 构造关键路径
trainer_dir = os.path.join(
    NNUNET_RESULTS,
    TASK_NAME,
    f"nnUNetTrainer__nnUNetPlans__{MODEL_TYPE}"
)

# imagesTs 与 labelsTs 目录保持不变，用来放待推理的 3D NIfTI 文件和（可选）真值标签
imagesTs = os.path.join(NNUNET_RAW_ROOT, TASK_NAME, "imagesTs")
labelsTs = os.path.join(NNUNET_RAW_ROOT, TASK_NAME, "labelsTs")

# 预测输出目录：我们把预测结果放在训练结果的子目录 predictions/fold_<FOLD_INDEX> 中
out_dir  = os.path.join(trainer_dir, "predictions", f"fold_{FOLD_INDEX}")
os.makedirs(out_dir, exist_ok=True)

# 2) 构建 3D 模型的 nnUNetPredictor 并加载已训练好的权重
predictor = nnUNetPredictor(
    tile_step_size=0.5,
    use_gaussian=True,
    use_mirroring=True,
    perform_everything_on_device=True,
    device=torch.device("cuda", 0),  # 指定 GPU 0；如需多卡，可设成 device_ids 列表
    verbose=False,               # 关闭 nnUNet 自带的详细日志
    verbose_preprocessing=False, # 预处理阶段也不打印日志
)

# 初始化 predictor，并加载 checkpoint（这里使用 "checkpoint_best.pth" 或者 "checkpoint_final.pth"）
predictor.initialize_from_trained_model_folder(
    trainer_dir,
    use_folds=(FOLD_INDEX,),
    checkpoint_name="checkpoint_best.pth"  # 或者 "checkpoint_final.pth"
)

# 3) 批量推理 imagesTs 下所有 .nii.gz 文件，并屏蔽 predictor 内部 print
print(f"\n>>> 开始批量推理（共 {len(os.listdir(imagesTs))} 个病例）…")
with open(os.devnull, "w") as devnull:
    # redirect_stdout 会把 predictor 内部所有 print 输出重定向到 /dev/null
    with redirect_stdout(devnull):
        predictor.predict_from_files(
            imagesTs,            # 传入整个 imagesTs 文件夹，nnU-Net 会自动遍历所有 .nii.gz
            out_dir,
            save_probabilities=False,  # 不保存概率图，若想保存请设为 True
            overwrite=True             # 若已经存在同名预测，则覆盖
        )
print(">>> 批量推理完成\n")

# 4) 如果存在 labelsTs，就做指标计算并使用 1000 次自助采样（bootstrap）
if os.path.isdir(labelsTs):
    gt_paths   = sorted(glob(os.path.join(labelsTs, "*.nii.gz")))
    pred_paths = sorted(glob(os.path.join(out_dir,    "*.nii.gz")))

    n_cases = len(gt_paths)
    if n_cases == 0:
        print("没有找到任何 .nii.gz 文件，跳过定量评估")
        exit(0)

    # 每个病例的平均指标列表
    dice_all_cases = []
    hd95_all_cases = []
    sens_all_cases = []
    spec_all_cases = []

    # 先逐病例计算平均指标
    for gt_path, pred_path in zip(gt_paths, pred_paths):
        gt_img = nib.load(gt_path).get_fdata().astype(np.int32)
        pr_img = nib.load(pred_path).get_fdata().astype(np.int32)

        classes = np.unique(gt_img)
        classes = classes[classes != 0]

        dice_per_class = []
        hd95_per_class = []
        sens_per_class = []
        spec_per_class = []

        total_voxels = gt_img.size

        for cls in classes:
            gt_mask = (gt_img == cls)
            pr_mask = (pr_img == cls)

            # 如果 GT 与预测都为空，则跳过
            if gt_mask.sum() == 0 and pr_mask.sum() == 0:
                continue

            # 计算 Dice
            inter = np.logical_and(gt_mask, pr_mask).sum()
            union = gt_mask.sum() + pr_mask.sum()
            dice = 2 * inter / (union + 1e-6)
            dice_per_class.append(dice)

            # 计算 Sensitivity（召回率）： TP / (TP + FN)
            TP = inter
            FN = gt_mask.sum() - TP
            sensitivity = TP / (TP + FN + 1e-6)
            sens_per_class.append(sensitivity)

            # 计算 Specificity： TN / (TN + FP)
            FP = pr_mask.sum() - TP
            TN = total_voxels - (TP + FP + FN)
            specificity = TN / (TN + FP + 1e-6)
            spec_per_class.append(specificity)

            # 计算 HD95（95th percentile Hausdorff Distance）
            if gt_mask.sum() > 0 and pr_mask.sum() > 0:
                gt_eroded = binary_erosion(gt_mask)
                gt_surface = gt_mask & (~gt_eroded)
                pr_eroded = binary_erosion(pr_mask)
                pr_surface = pr_mask & (~pr_eroded)

                dt_gt = distance_transform_edt(~gt_mask)
                dt_pr = distance_transform_edt(~pr_mask)

                dist_gt_to_pr = dt_pr[gt_surface]
                dist_pr_to_gt = dt_gt[pr_surface]

                all_surface_distances = np.concatenate((dist_gt_to_pr, dist_pr_to_gt))
                if all_surface_distances.size > 0:
                    hd95 = np.percentile(all_surface_distances, 95)
                else:
                    hd95 = 0.0
            else:
                hd95 = 0.0

            hd95_per_class.append(hd95)

        # 如果某个例子没有前景类别，则指标设为 0
        if len(dice_per_class) == 0:
            avg_dice = 0.0
            avg_hd95 = 0.0
            avg_sens = 0.0
            avg_spec = 0.0
        else:
            avg_dice = np.mean(dice_per_class)
            avg_hd95 = np.mean(hd95_per_class) if len(hd95_per_class) > 0 else 0.0
            avg_sens = np.mean(sens_per_class)
            avg_spec = np.mean(spec_per_class)

        dice_all_cases.append(avg_dice)
        hd95_all_cases.append(avg_hd95)
        sens_all_cases.append(avg_sens)
        spec_all_cases.append(avg_spec)

    dice_all_cases = np.array(dice_all_cases)
    hd95_all_cases = np.array(hd95_all_cases)
    sens_all_cases = np.array(sens_all_cases)
    spec_all_cases = np.array(spec_all_cases)

    # 4a) 原始平均 ± STD（基于样本直接计算）
    mean_dice_raw = np.mean(dice_all_cases)
    std_dice_raw  = np.std(dice_all_cases)
    mean_hd95_raw = np.mean(hd95_all_cases)
    std_hd95_raw  = np.std(hd95_all_cases)
    mean_sens_raw = np.mean(sens_all_cases)
    std_sens_raw  = np.std(sens_all_cases)
    mean_spec_raw = np.mean(spec_all_cases)
    std_spec_raw  = np.std(spec_all_cases)

    print(f"Fold {FOLD_INDEX} 原始指标（样本直接计算）：")
    print(f"  Dice:        {mean_dice_raw:.4f} ± {std_dice_raw:.4f}")
    print(f"  HD95:        {mean_hd95_raw:.4f} ± {std_hd95_raw:.4f}")
    print(f"  Sensitivity: {mean_sens_raw:.4f} ± {std_sens_raw:.4f}")
    print(f"  Specificity: {mean_spec_raw:.4f} ± {std_spec_raw:.4f}\n")

    # 4b) 使用 1000 次 Bootstrapping 计算指标的分布
    n_bootstrap = 1000
    bootstrap_means = {
        "dice": np.zeros(n_bootstrap, dtype=np.float32),
        "hd95": np.zeros(n_bootstrap, dtype=np.float32),
        "sens": np.zeros(n_bootstrap, dtype=np.float32),
        "spec": np.zeros(n_bootstrap, dtype=np.float32),
    }

    rng = np.random.default_rng(seed=42)
    for i in range(n_bootstrap):
        # 从 0..n_cases-1 中有放回地抽样
        idxs = rng.integers(low=0, high=n_cases, size=n_cases)
        bootstrap_means["dice"][i] = np.mean(dice_all_cases[idxs])
        bootstrap_means["hd95"][i] = np.mean(hd95_all_cases[idxs])
        bootstrap_means["sens"][i] = np.mean(sens_all_cases[idxs])
        bootstrap_means["spec"][i] = np.mean(spec_all_cases[idxs])

    # 计算 Bootstrap 平均值和标准差
    mean_dice_bs = np.mean(bootstrap_means["dice"])
    std_dice_bs  = np.std(bootstrap_means["dice"])
    mean_hd95_bs = np.mean(bootstrap_means["hd95"])
    std_hd95_bs  = np.std(bootstrap_means["hd95"])
    mean_sens_bs = np.mean(bootstrap_means["sens"])
    std_sens_bs  = np.std(bootstrap_means["sens"])
    mean_spec_bs = np.mean(bootstrap_means["spec"])
    std_spec_bs  = np.std(bootstrap_means["spec"])

    print(f"Fold {FOLD_INDEX} Bootstrap 指标（1000 次抽样后的均值 ± STD）：")
    print(f"  Dice:        {mean_dice_bs:.4f} ± {std_dice_bs:.4f}")
    print(f"  HD95:        {mean_hd95_bs:.4f} ± {std_hd95_bs:.4f}")
    print(f"  Sensitivity: {mean_sens_bs:.4f} ± {std_sens_bs:.4f}")
    print(f"  Specificity: {mean_spec_bs:.4f} ± {std_spec_bs:.4f}\n")

else:
    print("labelsTs 不存在，跳过定量评估")
