In [6]:

def pad_stack_z_with_2d_padding(
    folder_path,
    padded_folder_path=None,
    xy_size=960,
    target_depth=300,
    pad_value=255
):
    """
    Pads 2D slices in a folder to (xy_size, xy_size) and stacks them into a 3D volume.
    Optionally saves padded images to disk if `padded_folder_path` is given.

    Returns:
    --------
    np.ndarray: padded stack of shape (target_depth, xy_size, xy_size, 3)
    """

    def padd_picture(image_path, size):
        img = Image.open(image_path).convert('RGB')
        new_img = Image.new('RGB', (size, size), (255, 255, 255))
        new_img.paste(img, (0, 0))
        return new_img

    def numeric_sort_key(filename):
        numbers = re.findall(r'\d+', filename)
        return int(numbers[0]) if numbers else -1

    # Get sorted image filenames
    image_files = sorted(
        [f for f in os.listdir(folder_path) if f.endswith(('.jpg', '.png'))],
        key=numeric_sort_key
    )

    padded_images = []

    for file in image_files:
        source_path = os.path.join(folder_path, file)
        padded_img = padd_picture(source_path, xy_size)

        # Save if a destination path is given
        if padded_folder_path:
            dest_path = os.path.join(padded_folder_path, file)
            os.makedirs(os.path.dirname(dest_path), exist_ok=True)
            padded_img.save(dest_path)

        padded_images.append(np.array(padded_img))

    stack = np.stack(padded_images, axis=0)  # (Z, xy_size, xy_size, 3)
    current_depth = stack.shape[0]

    if current_depth > target_depth:
        raise ValueError(f"Stack depth {current_depth} exceeds target {target_depth}")

    # Pad in Z-axis
    pad_slices = target_depth - current_depth
    if pad_slices > 0:
        padding = np.full(
            (pad_slices, xy_size, xy_size, 3),
            fill_value=pad_value,
            dtype=stack.dtype
        )
        stack = np.concatenate([stack, padding], axis=0)

    return stack

In [7]:
padded_stack = pad_stack_z_with_2d_padding(
    folder_path='../data/pictures_raw/train/tomo_fd41c4',
    padded_folder_path='../data/pictures_process/train_process/tomo_fd41c4',
    xy_size=960,
    #target_depth=800,
    #pad_value=255  # white background
)

print(padded_stack.shape)

(300, 960, 960, 3)


In [28]:

def load_stack_and_label(tomo_folder, label, n_slices=300, img_size=(960, 960)):
    """
    Load n_slices images from tomo_folder, stack them, and return with label.
    Designed to be used inside a tf.py_function to work with tf.data.Dataset.

    Args:
        tomo_folder (tf.Tensor): Path to tomo folder as tf.string tensor.
        label (tf.Tensor): Label tensor.
        n_slices (int): Number of slices to load.
        img_size (tuple): Target image size (H, W).

    Returns:
        tuple: (stacked volume, label), where volume shape = (n_slices, H, W, 3)
    """

    def _load(tomo_folder_path, label_value):
        # tomo_folder_path is a byte string here, decode it
        folder = tomo_folder_path.numpy().decode(),
        print(folder)
        print(type(folder))
        slice_files = sorted([f for f in os.listdir(folder) if f.endswith('.jpg')])

        slice_files = slice_files[:n_slices]

        slices = []
        for fname in slice_files:
            path = os.path.join(folder, fname)
            img = Image.open(path).convert('RGB').resize(img_size)
            arr = np.array(img, dtype=np.float32) / 255.0
            slices.append(arr)

        # Pad if fewer slices than n_slices
        if len(slices) < n_slices:
            n_pad = n_slices - len(slices)
            pad_img = np.ones((img_size[0], img_size[1], 3), dtype=np.float32)
            slices.extend([pad_img] * n_pad)

        stack = np.stack(slices, axis=0).astype(np.float32)  # shape: (n_slices, H, W, 3)

        return stack, np.float32(label_value)

    volume, labels = tf.py_function(_load, [tomo_folder, label], [tf.float32, tf.float32])

    # Set static shape info so TensorFlow knows the output shape
    volume.set_shape((n_slices, img_size[0], img_size[1], 3))
    labels.set_shape(())

    return volume, labels


In [13]:
def selection_images_labels(df, num_slices=[300], num_motors=[0,1]):

   # Step 3: Load parent directory and finding all subdirs
    dir_mean_image = '../data/pictures_process/train_process/'
    parent_dir = Path(dir_mean_image)
    subdirs = [str(p) for p in parent_dir.iterdir() if p.is_dir()]
    names = [p.name for p in parent_dir.iterdir() if p.is_dir()]

   # Step 1: Filter tomos
    #tomo_ids = 'tomo_0a8f05', 'tomo_0a180f', 'tomo_0c3a99', 'tomo_0c3d78'
    tomo_ids = select_tomo_ids(df, number_of_slices=num_slices, number_of_motors=num_motors)
    df_select = df[df['tomo_id'].isin(names)].copy()


    # Step 4: Match subdirectories and labels
    filtered_image_paths = []
    labels = []

    for _, row in df_select.iterrows():
        tomo_id = row['tomo_id']
        matched = [p for p in subdirs if tomo_id in os.path.basename(p)]

        if matched:
            filtered_image_paths.append(matched[0])  # If multiple, take the first
            labels.append(row['Number_of_motors'])
        else:
            print(f"⚠️ No image found for tomo_id: {tomo_id}")

    print(f"Matched {len(filtered_image_paths)} image-label pairs")

    labels = np.array(labels, dtype=np.float32)
    return filtered_image_paths, labels


In [24]:
def batches_stack_images_ram(
    filtered_image_paths,
    labels,
    shuffle=True,
    batch_size=2,
    split=True,
    val_fraction=0.2,
    test_fraction=0.2,
    seed=42,
    xy_size=960,
    target_depth=800
):
    dataset_size = len(filtered_image_paths)
    data = list(zip(filtered_image_paths, labels))
    if shuffle:
        rng = np.random.default_rng(seed)
        rng.shuffle(data)

    filtered_image_paths, labels = zip(*data)
    filtered_image_paths = list(filtered_image_paths)
    labels = list(labels)

    if split:
        val_size = 2#int(val_fraction * dataset_size)
        test_size = 2#int(test_fraction * dataset_size)
        train_size = dataset_size - val_size - test_size

        test_paths = filtered_image_paths[:test_size]
        test_labels = labels[:test_size]
        val_paths = filtered_image_paths[test_size:test_size + val_size]
        val_labels = labels[test_size:test_size + val_size]
        train_paths = filtered_image_paths[test_size + val_size:]
        train_labels = labels[test_size + val_size:]

        train_ds = tf.data.Dataset.from_tensor_slices((train_paths, train_labels))
        val_ds = tf.data.Dataset.from_tensor_slices((val_paths, val_labels))
        test_ds = tf.data.Dataset.from_tensor_slices((test_paths, test_labels))

        train_ds = train_ds.map(lambda x, y: load_stack_and_label(x, y, n_slices=target_depth, img_size=(xy_size, xy_size)),
            num_parallel_calls=tf.data.AUTOTUNE).batch(batch_size)

        val_ds = val_ds.map(lambda x, y: load_stack_and_label(x, y, n_slices=target_depth, img_size=(xy_size, xy_size)),
            num_parallel_calls=tf.data.AUTOTUNE).batch(batch_size)

        test_ds = test_ds.map(lambda x, y: load_stack_and_label(x, y, n_slices=target_depth, img_size=(xy_size, xy_size)),
            num_parallel_calls=tf.data.AUTOTUNE).batch(batch_size)

        return train_ds, val_ds, test_ds, test_paths, test_labels

    else:
        dataset = tf.data.Dataset.from_tensor_slices((filtered_image_paths, labels))
        dataset = dataset.map(lambda x, y: load_stack_and_label(x, y,n_slices=target_depth, img_size=(xy_size, xy_size))).batch(batch_size)
        return dataset, filtered_image_paths, labels


In [20]:
path_train_csv= '../data/csv_raw/train_labels.csv'
path_image= '../data/pictures_process/train_process'
df = pd.read_csv(path_train_csv).copy()
df.head(10)

Unnamed: 0.1,Unnamed: 0,row_id,tomo_id,Motor_axis_0,Motor_axis_1,Motor_axis_2,Array_shape_axis_0,Array_shape_axis_1,Array_shape_axis_2,Voxel_spacing,Number_of_motors
0,0,16,tomo_049310,-1.0,-1.0,-1.0,500,924,956,19.7,0
1,1,30,tomo_098751,-1.0,-1.0,-1.0,500,924,956,16.1,0
2,2,54,tomo_136c8d,-1.0,-1.0,-1.0,500,924,956,19.7,0
3,3,59,tomo_146de2,-1.0,-1.0,-1.0,500,924,956,16.1,0
4,4,84,tomo_1dc5f9,-1.0,-1.0,-1.0,500,924,956,19.7,0
5,5,121,tomo_28f9c1,-1.0,-1.0,-1.0,500,924,956,16.1,0
6,6,173,tomo_39b15b,-1.0,-1.0,-1.0,500,924,956,16.1,0
7,7,181,tomo_3b8291,-1.0,-1.0,-1.0,500,924,956,19.7,0
8,8,193,tomo_40b215,-1.0,-1.0,-1.0,500,924,956,16.1,0
9,9,219,tomo_4baff0,-1.0,-1.0,-1.0,500,924,956,16.1,0


In [32]:
filtered_image_paths,labels = selection_images_labels(df, num_slices=[300], num_motors=[0,1])
filtered_image_paths

Matched 10 image-label pairs


['../data/pictures_process/train_process/tomo_2dd6bd',
 '../data/pictures_process/train_process/tomo_3264bc',
 '../data/pictures_process/train_process/tomo_54e1a7',
 '../data/pictures_process/train_process/tomo_974fd4',
 '../data/pictures_process/train_process/tomo_b8595d',
 '../data/pictures_process/train_process/tomo_c3619a',
 '../data/pictures_process/train_process/tomo_d6e3c7',
 '../data/pictures_process/train_process/tomo_db2a10',
 '../data/pictures_process/train_process/tomo_e96200',
 '../data/pictures_process/train_process/tomo_fd41c4']

In [30]:
train_ds, val_ds, test_ds, test_paths, test_labels = batches_stack_images_ram(filtered_image_paths,
    labels,
    shuffle=True,
    batch_size=2,
    split=True,
    val_fraction=0.2,
    test_fraction=0.2,
    seed=42,
    xy_size=960,
    target_depth=800)


In [31]:
for volume, label in train_ds.take(1):
    print("Volume batch shape:", volume.shape)
    print("Label batch shape:", label.shape)

('../data/pictures_process/train_process/tomo_54e1a7',)('../data/pictures_process/train_process/tomo_974fd4',)
<class 'tuple'>
('../data/pictures_process/train_process/tomo_3264bc',)
<class 'tuple'>

<class 'tuple'>
('../data/pictures_process/train_process/tomo_fd41c4',)
<class 'tuple'>
('../data/pictures_process/train_process/tomo_e96200',)
<class 'tuple'>
('../data/pictures_process/train_process/tomo_b8595d',)
<class 'tuple'>


2025-06-06 16:45:57.732543: W tensorflow/core/framework/op_kernel.cc:1768] INVALID_ARGUMENT: TypeError: listdir: path should be string, bytes, os.PathLike, integer or None, not tuple
Traceback (most recent call last):

  File "/Users/benedettaveronesi/.pyenv/versions/3.10.6/envs/flagelleux/lib/python3.10/site-packages/tensorflow/python/ops/script_ops.py", line 269, in __call__
    return func(device, token, args)

  File "/Users/benedettaveronesi/.pyenv/versions/3.10.6/envs/flagelleux/lib/python3.10/site-packages/tensorflow/python/ops/script_ops.py", line 147, in __call__
    outputs = self._call(device, args)

  File "/Users/benedettaveronesi/.pyenv/versions/3.10.6/envs/flagelleux/lib/python3.10/site-packages/tensorflow/python/ops/script_ops.py", line 154, in _call
    ret = self._func(*args)

  File "/Users/benedettaveronesi/.pyenv/versions/3.10.6/envs/flagelleux/lib/python3.10/site-packages/tensorflow/python/autograph/impl/api.py", line 642, in wrapper
    return func(*args, **kwarg

InvalidArgumentError: {{function_node __wrapped__IteratorGetNext_output_types_2_device_/job:localhost/replica:0/task:0/device:CPU:0}} TypeError: listdir: path should be string, bytes, os.PathLike, integer or None, not tuple
Traceback (most recent call last):

  File "/Users/benedettaveronesi/.pyenv/versions/3.10.6/envs/flagelleux/lib/python3.10/site-packages/tensorflow/python/ops/script_ops.py", line 269, in __call__
    return func(device, token, args)

  File "/Users/benedettaveronesi/.pyenv/versions/3.10.6/envs/flagelleux/lib/python3.10/site-packages/tensorflow/python/ops/script_ops.py", line 147, in __call__
    outputs = self._call(device, args)

  File "/Users/benedettaveronesi/.pyenv/versions/3.10.6/envs/flagelleux/lib/python3.10/site-packages/tensorflow/python/ops/script_ops.py", line 154, in _call
    ret = self._func(*args)

  File "/Users/benedettaveronesi/.pyenv/versions/3.10.6/envs/flagelleux/lib/python3.10/site-packages/tensorflow/python/autograph/impl/api.py", line 642, in wrapper
    return func(*args, **kwargs)

  File "/var/folders/nj/wg4pl11n6hb1zcpwyf6khsxr0000gn/T/__autograph_generated_file6_h6tttz.py", line 32, in _load
    slice_files = ag__.converted_call(ag__.ld(sorted), ([ag__.ld(f) for f in ag__.converted_call(ag__.ld(os).listdir, (ag__.ld(folder),), None, fscope_1) if ag__.converted_call(ag__.ld(f).endswith, ('.jpg',), None, fscope_1)],), None, fscope_1)

  File "/Users/benedettaveronesi/.pyenv/versions/3.10.6/envs/flagelleux/lib/python3.10/site-packages/tensorflow/python/autograph/impl/api.py", line 335, in converted_call
    return _call_unconverted(f, args, kwargs, options, False)

  File "/Users/benedettaveronesi/.pyenv/versions/3.10.6/envs/flagelleux/lib/python3.10/site-packages/tensorflow/python/autograph/impl/api.py", line 459, in _call_unconverted
    return f(*args)

TypeError: listdir: path should be string, bytes, os.PathLike, integer or None, not tuple


	 [[{{node EagerPyFunc}}]] [Op:IteratorGetNext]