# 3D lesion data processing into nnUNet format
https://github.com/MIC-DKFZ/nnUNet/blob/master/documentation/dataset_format.md

In [None]:
import os
from pathlib import Path
import nibabel as nib
import shutil
import glob
from tqdm import tqdm
import SimpleITK as sitk


data_folder = Path("/media/liushifeng/KINGSTON/nnUNet_raw/Dataset001_3dlesion")
train_images = data_folder / "imagesTr"
train_labels = data_folder / "labelsTr"

In [None]:
# ULS / Deeplesion 3D dataset
uls_folder = Path("/media/liushifeng/KINGSTON/ULS Jan 2025/ULS23/novel_data/ULS23_DeepLesion3D/")
image_folder = uls_folder / "images"
label_folder = uls_folder / "labels"

# check image shapes are 3D + channel
for i in os.listdir(image_folder):
    if len(nib.load(image_folder / i).shape) != 4:
        print(i, "need 4 dims")

# copy images over and add prefix
file_type = ".nii.gz"
prefix = "ULSDL3D"
for f in os.listdir(image_folder):
    if f.endswith(file_type):
        shutil.copy2(image_folder / f, train_images / f"{prefix}_{f}")

file_type = ".nii.gz"
for f in os.listdir(label_folder):
    if f.endswith(file_type):
        shutil.copy2(label_folder / f, train_labels / f"{prefix}_{f}")

# remove images that do not have labels
label_files = os.listdir(train_labels)
to_remove = [x for x in os.listdir(train_images) if x not in label_files]
print("removing:", to_remove)
for p in to_remove:
    os.remove(train_images / p)

In [None]:
# internal AutoPET dataset
# use lymphoma and melanoma dataset Bs as training data
folder = Path("/home/liushifeng/Desktop/AutoPET dataset/dataset B_lymphoma/")
prefix = "AutoPET-Lymphoma-B"
for ct_path in tqdm(glob.glob(str(folder) + "/**/CT.nii.gz", recursive=True)):
    name = Path(ct_path).parents[1].stem
    shutil.copy2(ct_path, train_images / f"{prefix}_{name}_{Path(ct_path).name}")

folder = Path("/home/liushifeng/Desktop/AutoPET dataset/SEG_Lymphoma_B_Rifki-selected/")
for f in tqdm(os.listdir(folder)):
    if f.endswith(".nrrd"):
        name = Path(f).stem[4:].split(".")[0]
        output_name = f"{prefix}_PETCT_{name}_CT.nii.gz"
        sitk.WriteImage(sitk.ReadImage(folder / f), train_labels / output_name)

ap_folder = Path("/home/liushifeng/Desktop/AutoPET dataset/dataset B_melanoma/")
prefix = "AutoPET-Melanoma-B"
for ct_path in tqdm(glob.glob(str(ap_folder) + "/**/CT.nii.gz", recursive=True)):
    name = Path(ct_path).parents[1].stem
    shutil.copy2(ct_path, train_images / f"{prefix}_{name}_{Path(ct_path).name}")

folder = Path("/home/liushifeng/Desktop/AutoPET dataset/SEG_Melanoma_B_Rifki/")
for f in tqdm(os.listdir(folder)):
    if f.endswith(".nrrd"):
        name = Path(f).stem[4:].split(".")[0]
        output_name = f"{prefix}_PETCT_{name}_CT.nii.gz"
        sitk.WriteImage(sitk.ReadImage(folder / f), train_labels / output_name)

# # use lymphoma and melanoma dataset Cs as test data
# ap_folder = Path("/home/liushifeng/Desktop/AutoPET dataset/dataset C_lymphoma/")
# prefix = "AutoPET-Lymphoma-C"
# for ct_path in tqdm(glob.glob(str(ap_folder) + "/**/CT.nii.gz", recursive=True)):
#     name = Path(ct_path).parents[1].stem
#     shutil.copy2(ct_path, TEST_FOLDER_PATH / f"{prefix}_{name}_{Path(ct_path).name}")
#
# ap_folder = Path("/home/liushifeng/Desktop/AutoPET dataset/dataset C_melanoma/")
# prefix = "AutoPET-Melanoma-C"
# for ct_path in tqdm(glob.glob(str(ap_folder) + "/**/CT.nii.gz", recursive=True)):
#     name = Path(ct_path).parents[1].stem
#     shutil.copy2(ct_path, TEST_FOLDER_PATH / f"{prefix}_{name}_{Path(ct_path).name}")

In [None]:
image_files = os.listdir(train_images)
label_files = os.listdir(train_labels)

images_with_no_labels = [x for x in image_files if x not in label_files]
labels_with_no_images = [x for x in label_files if x not in image_files]

In [None]:
print("removing:", images_with_no_labels, labels_with_no_images)
for p in images_with_no_labels:
    os.remove(train_images / p)

for p in labels_with_no_images:
    os.remove(train_labels / p)

In [None]:
# Add _0000 to all training images to indicate channel
for f in os.listdir(train_images):
    os.rename(train_images / f, train_images / f.replace('.nii.gz', '_0000.nii.gz'))

In [None]:
for f in tqdm(os.listdir(train_labels)):
    img_path = train_images / f.replace('.nii.gz', '_0000.nii.gz')
    label_path = train_labels / f
    img = sitk.ReadImage(img_path)
    seg = sitk.ReadImage(label_path)
    seg_img = sitk.Resample(seg, img, sitk.Transform(), sitk.sitkNearestNeighbor, 0.0, img.GetPixelID())

    print(f"resampled {f}")
    sitk.WriteImage(seg_img, label_path)

## Visualize data