## Converter Module

This shows a Module that converts BioFormat Images to a tif file that contains an omexml as well as a 6D array

In [1]:
import javabridge as jv
import bioformats
import skimage
import numpy as np
from lxml import etree as etl
from xml.dom import minidom
from skimage.external import tifffile
import sys

Helper Functions

In [2]:
# Helper Functions from sebi06 github, that wrap bioformats

BF2NP_DTYPE = {
    0: np.int8,
    1: np.uint8,
    2: np.int16,
    3: np.uint16,
    4: np.int32,
    5: np.uint32,
    6: np.float32,
    7: np.double
}

def get_dimension_only(imagefile, imageID=0):
    
    rdr = bioformats.get_image_reader(None, path=imagefile)
    # for "whatever" reason the number of total series can only be accessed here ...
    try:
        totalseries = np.int(rdr.rdr.getSeriesCount())
    except:
        totalseries = 1  # in case there is only ONE series

    series_dimensions = []
    # cycle through all the series and check the dimensions
    for sc in range(0, totalseries):
        rdr.rdr.setSeries(sc)
        dimx = rdr.rdr.getSizeX()
        dimy = rdr.rdr.getSizeY()
        series_dimensions.append((dimx, dimy))

        if len(series_dimensions) == 1:
            multires = False
        elif len(series_dimensions) > 1:
            if len(set(series_dimensions)) > 1:
                multires = True
            elif len(set(series_dimensions)) == 1:
                multires = False
    
    # rdr.rdr is the actual BioFormats reader. rdr handles its lifetime
    javametadata = jv.JWrapper(rdr.rdr.getMetadataStore())
    imagecount = javametadata.getImageCount()
    
    imageIDs = []
    for id in range(0, imagecount):
        imageIDs.append(id)
        
    # get dimensions for CTZXY
    metadata = get_metadata_store(imagefile)
    pixels = metadata.image(imageID).Pixels
    SizeC = pixels.SizeC
    SizeT = pixels.SizeT
    SizeZ = pixels.SizeZ
    SizeX = pixels.SizeX
    SizeY = pixels.SizeY

    print('Series: ', totalseries)
    print('Size T: ', SizeT)
    print('Size Z: ', SizeZ)
    print('Size C: ', SizeC)
    print('Size X: ', SizeX)
    print('Size Y: ', SizeY)

    # usually the x-axis of an image is from left --> right and y from top --> bottom
    # in order to be compatible with numpy arrays XY are switched
    # for numpy arrays the 2st axis are columns (top --> down) = Y-Axis for an image

    sizes = [totalseries, SizeT, SizeZ, SizeC, SizeY, SizeX]
    rdr.close()
    
    return sizes, javametadata, totalseries, imageIDs, series_dimensions, multires


def get_metadata_store(imagefile):
    
    # get OME-XML and change the encoding to UTF-8
    omexml = get_OMEXML(imagefile)
    # get the metadata from the OME-XML
    omexmlmetadata = bioformats.OMEXML(omexml)
    
    return omexmlmetadata


def get_image6d(imagefile, sizes, pyramid='single', pylevel=0):
    """
    This function will read the image data and store them into a 6D numpy array.
    The 6D array has the following dimension order: [Series, T, Z, C, X, Y].
    """
    rdr = bioformats.ImageReader(imagefile, perform_init=True)
    readstate = 'OK'
    readproblems = []

    if pyramid=='single':
        
        print('Reading single pyramid level ...')
        sizes[0] = 1
        img6d = np.zeros(sizes, dtype=BF2NP_DTYPE[rdr.rdr.getPixelType()])
        
        # main loop to read the images from the data file
        for seriesID in range(pylevel, pylevel+1):
            for timepoint in range(0, sizes[1]):
                for zplane in range(0, sizes[2]):
                    for channel in range(0, sizes[3]):
                        try:
                            img6d[seriesID, timepoint, zplane, channel, :, :] =\
                            rdr.read(series=seriesID, c=channel, z=zplane, t=timepoint, rescale=False)
                        except:
                            print('Problem reading data into Numpy Array for Series', seriesID, sys.exc_info()[1])
                            readstate = 'NOK'
                            readproblems = sys.exc_info()[1]
    
    if pyramid=='all':
        
        print('Reading all pyramid levels ...')
        img6d = np.zeros(sizes, dtype=BF2NP_DTYPE[rdr.rdr.getPixelType()])
    
        # main loop to read the images from the data file
        for seriesID in range(0, sizes[0]):
            for timepoint in range(0, sizes[1]):
                for zplane in range(0, sizes[2]):
                    for channel in range(0, sizes[3]):
                        try:
                            img6d[seriesID, timepoint, zplane, channel, :, :] =\
                            rdr.read(series=seriesID, c=channel, z=zplane, t=timepoint, rescale=False)
                        except:
                            print('Problem reading data into Numpy Array for Series', seriesID, sys.exc_info()[1])
                            readstate = 'NOK'
                            readproblems = sys.exc_info()[1]

    rdr.close()

    return img6d, readstate


def get_OMEXML(imagefile):

    # get OME-XML and change the encoding to UTF-8
    omexml = bioformats.get_omexml_metadata(imagefile)
    omexml = omexml.encode('utf-8')
    # omexml = unidecode(omexml)
    
    return omexml


def prettify(rough_string):
    """Return a pretty-printed XML string for the Element.
    """
    reparsed = minidom.parseString(rough_string)
    return reparsed.toprettyxml(indent="  ")

Import czi data (can be any bioformat)

In [3]:
# Start Java VM
jv.start_vm(class_path=bioformats.JARS)

In [4]:
#path = "T=5_Z=3_CH=2_CZT_All_CH_per_Slice.czi"
path = r'c:\Users\m1srh\OneDrive - Carl Zeiss AG\Projects\Apeer\ZenCore_Workflows\ParticleAnalysis\Filtertest1_POL.czi'
sizes, jmd, totalseries, imageIDs, series_dimensions, multires = get_dimension_only(path)

Series:  4
Size T:  1
Size Z:  1
Size C:  1
Size X:  11286
Size Y:  11313


In [5]:
print('MultiRes : ', multires)

# to read only the highest resolution modify sizes
#sizes[0] = 1
#print(type(sizes))
#MetaInfo['Sizes'][1]

MultiRes :  True


In [6]:
# Get Array with order [Series, T, Z, C, X, Y]
array6D, readstate = get_image6d(path, sizes, pyramid='single', pylevel=0)
#array6D, readstate = get_image6d_pylevel(path, sizes, pylevel=0)
np.shape(array6D)

Reading single pyramid level ...


(4, 1, 1, 1, 11313, 11286)

In [None]:
# Get Omexml
omexml = get_OMEXML(path)
omexmlString = prettify(omexml.decode("utf-8"))

In [None]:
omexmlString_mod = omexmlString.replace('\n      <MetadataOnly/>', '\n      <TiffData/>')

Export array and omexml to tif

In [None]:
outputname = 'T=5_Z=3_CH=2_CZT_All_CH_per_Slice.ome.tiff'

In [None]:
tifffile.imsave(outputname, array6D, description=omexmlString_mod, metadata={'axes': 'STZCXY'})

In [None]:
# Write Omexml to file with pretty format
text_file = open("ome.xml", "w")
text_file.write(omexmlString_mod)
text_file.close()

Test output

In [None]:
test = tifffile.imread(outputname)
np.shape(test)

In [None]:
jv.kill_vm()