In [1]:
import os

DATA_PATH = "Resampling_Data"
OUTPUT_PATH = "Padding_Data"
os.makedirs(OUTPUT_PATH, exist_ok=True)
os.makedirs(os.path.join(OUTPUT_PATH, "train"), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_PATH, "val"), exist_ok=True)
TRAIN_DATA_PATH = os.path.join(DATA_PATH, "train")
VAL_DATA_PATH = os.path.join(DATA_PATH, "val")

In [34]:
import numpy as np
import nibabel as nib
import json

def pad_to_multiple(array, multiple=32, mode='constant', value=0):
    """
    Pad array so that each spatial dimension is a multiple of `multiple`.
    Returns padded array and padding info: [(pad_before, pad_after), ...]
    """
    shape = array.shape
    pad_width = []
    
    for dim in shape:
        total_pad = (multiple - dim % multiple) % multiple
        pad_before = total_pad // 2
        pad_after = total_pad - pad_before
        pad_width.append((pad_before, pad_after))

    padded = np.pad(array, pad_width, mode=mode, constant_values=value)
    return padded, pad_width

def update_metadata_with_padding(metadata, pad_width):
    metadata['padding'] = pad_width
    return metadata

def process_and_pad(dir_name, image_path, mask_path, metadata_path, out_path, padding_multiple=32):
    os.makedirs(out_path, exist_ok=True)
    # Load data
    image = np.load(image_path)
    mask = np.load(mask_path)
    with open(metadata_path, 'r') as f:
        metadata = json.load(f)

    # Padding
    padded_image, pad_width_img = pad_to_multiple(image, multiple=padding_multiple, mode='constant', value=0)
    padded_mask, pad_width_mask = pad_to_multiple(mask, multiple=padding_multiple, mode='constant', value=0)

    assert pad_width_img == pad_width_mask, "Image and mask padding must match"

    # Update metadata
    updated_metadata = update_metadata_with_padding(metadata, pad_width_img)

    # Save
    # print(padded_image.shape)
    # print(padded_image.shape)
    # print(padded_image.nbytes / 1024**3, "GB")  # 打印大小（单位：GB）
    # basename = os.path.splitext(os.path.basename(image_path))[0].replace("_resampled", "")
    # test_array = np.zeros((100, 100, 100), dtype=np.float32)
    # np.save(os.path.join(out_path, "test.npy"), test_array)

    np.save(os.path.join(out_path, "GED4.npy"), padded_image)
    np.save(os.path.join(out_path, "mask_GED4.npy"), padded_mask)

    with open(os.path.join(out_path, "metadata.json"), "w") as f:
        json.dump(updated_metadata, f, indent=4)

    print(f"✅ Padded data saved to {out_path}")


In [35]:
dir_list = os.listdir(TRAIN_DATA_PATH)
for idx, dir_name in enumerate(dir_list):
    if not os.path.isdir(os.path.join(TRAIN_DATA_PATH, dir_name)):
        continue
    tmp_GED4 = os.path.join(TRAIN_DATA_PATH, dir_name, "GED4.npy")
    tmp_mask = os.path.join(TRAIN_DATA_PATH, dir_name, "mask_GED4.npy")
    tmp_metadata = os.path.join(TRAIN_DATA_PATH, dir_name, "metadata.json")
    
    output_dir = os.path.join(OUTPUT_PATH, "train", dir_name)
    padded_GED4 = process_and_pad(dir_name, tmp_GED4, tmp_mask, tmp_metadata, output_dir, padding_multiple=32)

dir_list = os.listdir(VAL_DATA_PATH)
for idx, dir_name in enumerate(dir_list):
    if not os.path.isdir(os.path.join(VAL_DATA_PATH, dir_name)):
        continue
    tmp_GED4 = os.path.join(VAL_DATA_PATH, dir_name, "GED4.npy")
    tmp_mask = os.path.join(VAL_DATA_PATH, dir_name, "mask_GED4.npy")
    tmp_metadata = os.path.join(VAL_DATA_PATH, dir_name, "metadata.json")
    
    output_dir = os.path.join(OUTPUT_PATH, "val", dir_name)
    padded_GED4 = process_and_pad(dir_name, tmp_GED4, tmp_mask, tmp_metadata, output_dir, padding_multiple=32)

    
    

✅ Padded data saved to Padding_Data/train/0487-B1-S2


OSError: Not enough free space to write 51118080 bytes after offset 128