In [1]:
import os
import h5py
import numpy as np

In [7]:
# Parameters
directory = "../data/h5_temp"  # Path to the directory containing HDF5 files
batch_size = 32  # Number of samples to read from each file
output_shape = (batch_size, 5, 10, 10)  # Desired concatenated shape

# List all HDF5 files in the directory
h5_files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.h5')]

# Ensure there are at least 5 files
if len(h5_files) < 5:
    raise ValueError(f"Expected at least 5 files, but found {len(h5_files)}")

# Initialize a list to hold batches
batches = []

# Iterate through the first 5 HDF5 files
for h5_file in h5_files[:5]:
    with h5py.File(h5_file, 'r') as h5:
        # Read the first `batch_size` samples from the dataset
        data = h5['cells'][:batch_size]  # Shape: (32, 1, 10, 10)
        print(data.shape)
        batches.append(data)

# Stack the batches along the second axis to get the desired shape
result = np.stack(batches, axis=1)  # Shape: (32, 5, 10, 10)
result = np.squeeze(result, axis=2)  # Shape: (32, 5, 10, 10)

print(f"Concatenated result shape: {result.shape}")

(32, 1, 10, 10)
(32, 1, 10, 10)
(32, 1, 10, 10)
(32, 1, 10, 10)
(32, 1, 10, 10)
Concatenated result shape: (32, 5, 10, 10)
