## Chunking Upenn specimen

Convert HDF5 file to .nii.gz files for nnUNet inference

<!-- ![img] (chunking) -->
<img src="chunking.png" width="500">


In [1]:
# Import python library
import numpy as np
import h5py as h5
from skimage import exposure
import os
import nibabel as nib
import tifffile as tf

Define helper functions: 

`generate_niigz`: Input 3D numpy img, write .nii.gz file to destination 

`writetiff`: Input 3D numpy img, write .tiff file to destination

`chunk`: Loop through the channels, and calculate the coordinate of chunked blocks, saving chunked blocks as either .nii.gz or .tiff

In [3]:
def generate_niigz(img, output_path):
    """
    Map to 8 bit
    """

    img = exposure.rescale_intensity(img,
                               in_range=(np.min(img), np.percentile(img,99.99)), 
                               out_range=np.uint8)

    # Create the NIfTI object from the 3D array and save the NIfTI object
    nib.Nifti1Image(img.astype(np.uint8), None).to_filename(output_path)

def writetiff(img, output_path):
    """
    Keep 16 bit, less compression
    """

    print("Saving TIFF...")
    with tf.TiffWriter(output_path) as tif:
        for i in range(len(img)):
            tif.write(img[i], contiguous=True)


def chunk(h5path, overlap = 0.25, blocksize = 1024, ftype = ".nii.gz", res="0", nlvl = ""):
    """
    Params: 
    - h5path
    - savepath
    - overlap_percentage (can be 0)
    - ftype: ".nii.gz" or ".tiff"
    - nlvl: how many levels from the top
    - res: resolution, use "0"
    - nblk (for chunking by specifying number of blocks)
    - blocksize (for chunking by specifying blocksize)
    """
    savepath = os.path.dirname(h5path) + os.sep + "nnunet"

    if not os.path.exists(savepath):
        os.mkdir(savepath)

    chan = [("s01", "0001"),
            ("s00", "0000" )]

    with h5.File(h5path, 'r') as f:
        img_shape = f['t00000'][chan[0][0]][res]['cells'].shape
    f.close()

    x_num_blocks = int((img_shape[1] - overlap * blocksize) // ((1 - overlap) * blocksize))
    y_num_blocks = int((img_shape[2] - overlap * blocksize) // ((1 - overlap) * blocksize))

    # Loop through each block
    for ch in chan:
        for i in range(x_num_blocks):
            for j in range(y_num_blocks):
                # Calculate start and end indices for x and y dimensions with overlap
                x_start = int(i * blocksize * (1-overlap))
                x_end = int(x_start + blocksize)
                y_start = int(j * blocksize * (1-overlap))
                y_end = int(y_start + blocksize)

                with h5.File(h5path, 'r') as f:
                    if nlvl == "":
                        img = f['t00000'][ch[0]][res]['cells'][:, x_start:x_end, y_start:y_end].astype(np.uint16)
                    else:
                        img = f['t00000'][ch[0]][res]['cells'][:nlvl, x_start:x_end, y_start:y_end].astype(np.uint16)
                    if ftype == ".nii.gz":
                        img = np.moveaxis(img,0,2)
                f.close()

                # Prepare filename for the block
                # fname = os.path.basename(os.path.dirname(h5path)) + f"_blk_{i}_{j}_" + ch[1] + ftype
                fname = h5path.split("-23_")[1].split("_well")[0] + f"_blk_{i}_{j}_" + ch[1] + ftype
                fpath = savepath + os.sep + fname

                # # Print the indices for verification (you can remove this if not needed)
                if ftype == ".nii.gz":
                    generate_niigz(img, fpath)
                else:
                    writetiff(img, fpath)

                print(x_start, x_end, y_start, y_end)



Chunking run code:

In [None]:
h5_path = "" #Enter your path
chunk(h5_path, 
      overlap = 0.25, 
      blocksize = 1024, 
      ftype = ".nii.gz", 
      res="0", 
      nlvl = "")
print("chunking: ", h5_path)