In [ ]:
import pydicom
import SimpleITK as sitk
import numpy as np
import os
import json
from skimage.draw import polygon

# 路径设置
dicom_dir = "data\dcm"
rtss_path = "data\meta\RS1.3.6.1.4.1.2452.6.2667341226.1244394865.4006930326.1855796959.dcm"
output_dir = "data\output"
os.makedirs(os.path.join(output_dir, "imagesTr"), exist_ok=True)
os.makedirs(os.path.join(output_dir, "labelsTr"), exist_ok=True)

# 读取 DICOM 文件并转换为 NIfTI
dicom_files = [os.path.join(dicom_dir, f) for f in os.listdir(dicom_dir) if f.endswith('.dcm')]
dicom_files.sort()
reader = sitk.ImageSeriesReader()
reader.SetFileNames(dicom_files)
image = reader.Execute()
sitk.WriteImage(image, os.path.join(output_dir, "imagesTr", "patient001_0000.nii.gz"))

# 读取 RTStructure 文件
rtss = pydicom.dcmread(rtss_path)
roi_sequence = rtss.StructureSetROISequence
roi_names = {roi.ROINumber: roi.ROIName for roi in roi_sequence}

# 生成单一掩码文件，包含所有 ROI
def rtstruct_to_combined_mask(rtss, image, roi_names):
    # 初始化掩码，与图像大小一致
    image_array = sitk.GetArrayFromImage(image)  # z, y, x
    combined_mask = np.zeros(image_array.shape, dtype=np.uint8)
    
    # 为每个 ROI 分配整数标签
    for roi_number, roi_name in roi_names.items():
        label_value = list(roi_names.values()).index(roi_name) + 1  # 背景为 0，ROI 从 1 开始
        
        # 找到对应的轮廓序列
        contour_sequence = [cs for cs in rtss.ROIContourSequence if cs.ReferencedROINumber == roi_number]
        if not contour_sequence:
            continue
        
        # 遍历每个轮廓
        for contour in contour_sequence[0].ContourSequence:
            points = np.array(contour.ContourData).reshape(-1, 3)
            # 转换为图像坐标
            indices = [image.TransformPhysicalPointToIndex(p) for p in points]
            x_coords = [idx[0] for idx in indices]  # x 坐标
            y_coords = [idx[1] for idx in indices]  # y 坐标
            z_slice = indices[0][2]  # z 坐标（假设同一轮廓在同一切片）
            
            # 检查边界
            if not all(0 <= i < s for i, s in zip([min(x_coords), min(y_coords), z_slice], combined_mask.shape[::-1])):
                continue
            
            # 填充多边形区域
            rr, cc = polygon(y_coords, x_coords, shape=combined_mask[z_slice].shape)
            combined_mask[z_slice, rr, cc] = label_value
    
    return combined_mask

# 生成并保存单一掩码文件
combined_mask = rtstruct_to_combined_mask(rtss, image, roi_names)
mask_img = sitk.GetImageFromArray(combined_mask)
mask_img.CopyInformation(image)  # 保持空间信息一致
sitk.WriteImage(mask_img, os.path.join(output_dir, "labelsTr", "patient001.nii.gz"))

# 生成标签映射
label_mapping = {"background": 0}
for i, roi_name in enumerate(roi_names.values(), 1):
    label_mapping[roi_name] = i

# 生成 dataset.json
dataset_json = {
    "channel_names": {"0": "CT"},  # 根据模态调整
    "labels": label_mapping,
    "file_ending": ".nii.gz",
    "overwrite_image_reader_writer": "NibabelIO"
}
with open(os.path.join(output_dir, "dataset.json"), "w") as f:
    json.dump(dataset_json, f, indent=4)

print("数据集生成完成！")