# Convert tiff files to hdf5 file (for ilastik analysis)

In [6]:
# import necessary libraries

import tifffile
import glob
import h5py
import os
import numpy as np

In [7]:
def load_tiff_sequence ( imdir, imgtype='tiff', range=None ):
    """
    load tiff sequence stored in the same directory
    e.g. 
    vol = load_tiff_sequence (imgdir, '.png', range=[])
    """

    imlist = glob.glob( imdir + '*.' + imgtype )
    imlist.sort() # sort numerically
    
    if range is not None:
        imlist = imlist[ range[0]:range[1]]
        
    #get image properties by reading the first image
    im = tifffile.imread(imlist[0])
    imsize_x = im.shape[1]
    imsize_y = im.shape[0]
    imsize_z = len( imlist )
    imsize = ( imsize_z, imsize_y, imsize_x )
    imtype = im.dtype
    
    stack = np.zeros( imsize, dtype=imtype )
    for (i, impath) in enumerate(imlist):
        im = tifffile.imread( impath )
        stack[i,:,:] = im
        
    return stack

In [8]:
def write_as_hdf5( stack, h5name, destname, 
                   chunks_enabled=True, chunksize=None,
                   attributes=None ):
    """
    e.g.
    write_as_hdf5(vol, 'test.hdf5', 'resolution_0', True, (100,100,100))
    """
    if chunks_enabled:
        if chunksize is None:
            chunks = True
        else:
            chunks = chunksize
    else:
        chunks = None
        
    with h5py.File( h5name, 'w', driver='stdio' ) as hf:
        data = hf.create_dataset (destname,
                                  chunks=chunks,
                                  data=stack )
        if attributes is not None:
            for key, value in attributes.items():
                data.attrs[key] = value

In [19]:
# Data Download
# !wget https://www.dropbox.com/s/fuj2ndxzoijgy2n/190604_%23144_lung_raw_tiff.zip
!wget https://www.dropbox.com/s/3n88u2pvh7i60zw/190604_P_%23144_lung_ctrl_x125_639_Probabilities.h5
# !wget https://www.dropbox.com/s/ftt9ebiayn5fi8i/190604_P_%23144_lung_ctrl_x125_639.hdf5

# ! unzip 190604_#144_lung_raw_tiff_10slices.zip

--2022-08-10 05:20:35--  https://www.dropbox.com/s/3n88u2pvh7i60zw/190604_P_%23144_lung_ctrl_x125_639_Probabilities.h5
Resolving www.dropbox.com (www.dropbox.com)... 162.125.4.18, 2620:100:601d:18::a27d:512
Connecting to www.dropbox.com (www.dropbox.com)|162.125.4.18|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /s/raw/3n88u2pvh7i60zw/190604_P_%23144_lung_ctrl_x125_639_Probabilities.h5 [following]
--2022-08-10 05:20:35--  https://www.dropbox.com/s/raw/3n88u2pvh7i60zw/190604_P_%23144_lung_ctrl_x125_639_Probabilities.h5
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://uc0571f10e6e6c06042d6bab71fd.dl.dropboxusercontent.com/cd/0/inline/Bqtu79dU2graITP4hVf-8w3KLT4LHDn6Nbyo9_le1s7_ZrlORC5KIIHByT0dLAgQm4zD9d-DJdKWatjvAHZ1T-w0m5CnBAJYZmjeLnZZ9V8HSMzuv6DTRzOqUbJY8CBLZlZmbaGDl_b_gJsyyvWW3ox8muvsJwQ6Q98e9ogcF-pySA/file# [following]
--2022-08-10 05:20:36--  https://uc0571f10e6e6c0604

In [11]:
#Choose Tiff file folder
os.chdir("/content/190604_#144_lung_raw_tiff_10slices")
print(os.getcwd())

#Read Tiff file
imgdir = "/content/190604_#144_lung_raw_tiff_10slices//"
img = load_tiff_sequence( imgdir, imgtype='tiff')

print(img.shape)

#Save as hdf5
filename = "/content/190604_P_#144_lung_ctrl_x125_639_10slices.hdf5"
dname = "content"

write_as_hdf5( img, filename, dname, chunks_enabled=True, chunksize=(10,100,100) )

/content/190604_#144_lung_raw_tiff_10slices
(10, 2160, 2560)


# probability threshold (after ilastik analysis)

In [12]:
# import necessary libraries
import tifffile
import glob
import h5py
import os
import numpy as np

In [13]:
def load_tiff_sequence ( imdir, imgtype='tiff', range=None ):
    """
    load tiff sequence stored in the same directory
    e.g. 
    vol = load_tiff_sequence (imgdir, '.png', range=[])
    """

    imlist = glob.glob( imdir + '*.' + imgtype )
    imlist.sort() # sort numerically
    
    if range is not None:
        imlist = imlist[ range[0]:range[1]]
        
    #get image properties by reading the first image
    im = tifffile.imread(imlist[0])
    imsize_x = im.shape[1]
    imsize_y = im.shape[0]
    imsize_z = len( imlist )
    imsize = ( imsize_z, imsize_y, imsize_x )
    imtype = im.dtype
    
    stack = np.zeros( imsize, dtype=imtype )
    for (i, impath) in enumerate(imlist):
        im = tifffile.imread( impath )
        stack[i,:,:] = im
        
    return stack

In [14]:
def write_as_hdf5( stack, h5name, destname, 
                   chunks_enabled=True, chunksize=None,
                   attributes=None ):
    """
    e.g.
    write_as_hdf5(vol, 'test.hdf5', 'resolution_0', True, (100,100,100))
    """
    if chunks_enabled:
        if chunksize is None:
            chunks = True
        else:
            chunks = chunksize
    else:
        chunks = None
        
    with h5py.File( h5name, 'w', driver='stdio' ) as hf:
        data = hf.create_dataset (destname,
                                  chunks=chunks,
                                  data=stack )
        if attributes is not None:
            for key, value in attributes.items():
                data.attrs[key] = value

In [28]:
h5name = "/content/190604_P_#144_lung_ctrl_x125_639_Probabilities_10slices.h5"
hf = h5py.File( h5name, "r" )

In [32]:
data = hf["expmat"]
print (data.shape)
l1_prob = data[:,:,:,0] # probability of label1

(10, 2160, 2560, 2)


In [33]:
# maks a binary mask
binary24 = (l1_prob > 24)
print (binary24.sum()*8.25*8.25*10)
# make binary into uint8
binary24 = (255*binary24).astype( 'uint16' )
# export as tiff
filename = "/content/190604_P_#144_lung_ctrl_x125_639_bin10A.tiff"
tifffile.imsave( filename, binary24 )

165508942.5


In [34]:
# maks a binary mask
binary50 = (l1_prob > 50)
print (binary50.sum()*8.25*8.25*10)
# make binary into uint8
binary50 = (255*binary50).astype( 'uint16' )
# export as tiff
filename = "/content/190604_P_#144_lung_ctrl_x125_639_bin20B.tiff"
tifffile.imsave( filename, binary50 )

133056061.875


In [35]:
# maks a binary mask
binary75 = (l1_prob > 75)
print (binary75.sum()*8.25*8.25*10)
# make binary into uint8
binary75 = (255*binary75).astype( 'uint16' )
# export as tiff
filename = "/content/190604_P_#144_lung_ctrl_x125_639_bin30C.tiff"
tifffile.imsave( filename, binary75 )

119030422.5


In [36]:
# maks a binary mask
binary101 = (l1_prob > 101)
print (binary101.sum()*8.25*8.25*10)
# make binary into uint8
binary101 = (255*binary101).astype( 'uint16' )
# export as tiff
filename = "/content/190604_P_#144_lung_ctrl_x125_639_bin40D.tiff"
tifffile.imsave( filename, binary101 )

103172540.625


In [37]:
# maks a binary mask
binary126 = (l1_prob > 126)
print (binary126.sum()*8.25*8.25*10)
# make binary into uint8
binary126 = (255*binary126).astype( 'uint16' )
# export as tiff
filename = "/content/190604_P_#144_lung_ctrl_x125_639_bin50E.tiff"
tifffile.imsave( filename, binary126 )

85788016.875


In [38]:
# maks a binary mask
binary152 = (l1_prob > 152)
print (binary152.sum()*8.25*8.25*10)
# make binary into uint8
binary152 = (255*binary152).astype( 'uint16' )
# export as tiff
filename = "/content/190604_P_#144_lung_ctrl_x125_639_bin60F.tiff"
tifffile.imsave( filename, binary152 )

72314364.375


In [39]:
# maks a binary mask
binary178 = (l1_prob > 178)
print (binary178.sum()*8.25*8.25*10)
# make binary into uint8
binary178 = (255*binary178).astype( 'uint16' )
# export as tiff
filename = "/content/190604_P_#144_lung_ctrl_x125_639_bin70G.tiff"
tifffile.imsave( filename, binary178 )

59966465.625


In [40]:
# maks a binary mask
binary203 = (l1_prob > 203)
print (binary203.sum()*8.25*8.25*10)
# make binary into uint8
binary203 = (255*binary203).astype( 'uint16' )
# export as tiff
filename = "/content/190604_P_#144_lung_ctrl_x125_639_bin80H.tiff"
tifffile.imsave( filename, binary203 )

49488243.75


In [41]:
# maks a binary mask
binary229 = (l1_prob > 229)
print (binary229.sum()*8.25*8.25*10)
# make binary into uint8
binary229 = (255*binary229).astype( 'uint16' )
# export as tiff
filename = "/content/190604_P_#144_lung_ctrl_x125_639_bin90I.tiff"
tifffile.imsave( filename, binary229 )

33390781.875


# Count all signal

In [1]:
# import necessary libraries
# from skimage.external import tifffile # Error, use tifffile library
import tifffile
from scipy.ndimage import label
from matplotlib import pyplot as plt
import pandas as pd
import scipy.ndimage as ndi
import glob
import h5py
import os
import numpy as np

In [2]:
def load_tiff_sequence ( imdir, imgtype='tiff', range=None ):
    """
    load tiff sequence stored in the same directory
    e.g. 
    vol = load_tiff_sequence (imgdir, '.png', range=[])
    """

    imlist = glob.glob( imdir + '*.' + imgtype )
    imlist.sort() # sort numerically
    
    if range is not None:
        imlist = imlist[ range[0]:range[1]]
        
    #get image properties by reading the first image
    im = tifffile.imread(imlist[0])
    imsize_x = im.shape[1]
    imsize_y = im.shape[0]
    imsize_z = len( imlist )
    imsize = ( imsize_z, imsize_y, imsize_x )
    imtype = im.dtype
    
    stack = np.zeros( imsize, dtype=imtype )
    for (i, impath) in enumerate(imlist):
        im = tifffile.imread( impath )
        stack[i,:,:] = im
        
    return stack

In [3]:
def write_as_hdf5( stack, h5name, destname, 
                   chunks_enabled=True, chunksize=None,
                   attributes=None ):
    """
    e.g.
    write_as_hdf5(vol, 'test.hdf5', 'resolution_0', True, (100,100,100))
    """
    if chunks_enabled:
        if chunksize is None:
            chunks = True
        else:
            chunks = chunksize
    else:
        chunks = None
        
    with h5py.File( h5name, 'w', driver='stdio' ) as hf:
        data = hf.create_dataset (destname,
                                  chunks=chunks,
                                  data=stack )
        if attributes is not None:
            for key, value in attributes.items():
                data.attrs[key] = value

In [4]:
def ask_hdf5_size( h5name, dsetname=None ):
    
    # obtain file handle
    hf = h5py.File( h5name, 'r' )
    
    if dsetname is None:
        # get the name of the 0th dataset
        dsetname = list( hf.keys() )[0]
        dset = hf[ dsetname ]
    else:
        # get dataset
        dset = hf[ dsetname ]
    
    # print size
    print( "Data set size:", dset.shape )
    
    # close handle
    hf.close()

In [5]:
def load_hdf5( h5name, dsetname=None, multichannel=True ):
    
    # obtain file handle
    hf = h5py.File( h5name, 'r' )
    
    if dsetname is None:
        # get the name of the 0th dataset
        dsetname = list( hf.keys() )[0]
        dset = hf[ dsetname ]
    else:
        # get dataset
        dset = hf[ dsetname ]
    
    if multichannel:
        # load data as numpy array
        data = dset[ :, :, :, 0] # 0th channel = cells
        #data = dset[ :, :, :, 0] # 0th channel = cells
    else:
        data = dset[ :, :, :] # 0th channel = cells
        #data = dset[ :, :, :] # 0th channel = cells

    # close handle
    hf.close()
    
    return data

In [48]:
def calculate_prob_hdf5(file_list, threshold):
    
    # load probabiltiy image
    prob = load_hdf5( file, "expmat", multichannel=True )
    print (prob.shape)
    
    ### Binarize probability image
    thresh = threshold * 255
    binary = ( prob > thresh )
    print ("Total volume of detected signals:", binary.sum()*8.25*8.25*10)
    
    # this defines "connectivity" between voxels
    # structure = ndi.generate_binary_structure( 3, 3 )
    
    # this defines "connectivity" between voxels
    structure = np.array( [[[0,0,0],
                            [0,0,0],
                           [0,0,0]],
                           [[0,0,0],
                            [0,0,0],
                            [0,0,0]],
                           [[0,0,0],
                            [0,0,0],
                            [0,0,0]]])
        
    # Label isolated objects
    objects, num_objects = label( binary, structure )
    print( "Number of detected objects:", objects.max() )
        
    # make binary into uint16
    binary16 = (255*binary).astype( 'uint16' )
    
    # export as tiff
    #basename = os.path.basename(file)
    #filename = rootdir[:-5] + "tiff/" + basename[:-4] + ".tif"
    #tifffile.imsave( filename, binary16 )
    
    ### Find center of mass
    ids = np.arange( 1, num_objects+1 )
    coms = ndi.center_of_mass( binary, objects, ids )
    
    # convert to numpy array
    coms = np.array( coms )
    
    # Compute volume of each object
    unique, counts = np.unique( objects, return_counts=True )
    # remove 0
    unique = unique[1:]
    counts = counts[1:]
    
    # create empty dataframe
    df = pd.DataFrame()
    
    # colum "ID"
    df['ID'] = unique
    
    # column "X", "Y", "Z"
    df['X'] = coms[ :, 2 ]
    df['Y'] = coms[ :, 1 ]
    df['Z'] = coms[ :, 0 ]
    
    # colum "volume"
    df["volume"] = counts
    
    # save as csv
    basename = os.path.basename(file)
    csvdir = rootdir + "csv"
    if not os.path.exists(csvdir):
      os.mkdir(csvdir)
    filename = rootdir + "csv/" + basename[:-26] + f"_p{int(threshold*100)}_all_639.csv"
    df.to_csv( filename, index=False, float_format='%.2f' )

In [20]:
# Define root diretory
rootdir = "/content/"

In [21]:
# get files which ends with 'probability'
file_list = glob.glob( rootdir + "*_Probabilities_10slices.h5" )
print( file_list )

['/content/190604_P_#144_lung_ctrl_x125_639_Probabilities_10slices.h5']


In [22]:
file = file_list[0]
prob = load_hdf5( file, "expmat", multichannel=False )

In [49]:
# loop through all files and thresholds
thresholds = [0.1, 0.3, 0.5, 0.7, 0.9]
for file in file_list:
    for thresh in thresholds:
      ask_hdf5_size( file, dsetname=None )
      raw = load_hdf5(file, multichannel=False)
      print (file.rsplit("/")[-1][:-3])
      calculate_prob_hdf5(file, thresh)

Data set size: (10, 2160, 2560, 2)
190604_P_#144_lung_ctrl_x125_639_Probabilities_10slices
(10, 2160, 2560)
Total volume of detected signals: 160266768.75
Number of detected objects: 235470
Data set size: (10, 2160, 2560, 2)
190604_P_#144_lung_ctrl_x125_639_Probabilities_10slices
(10, 2160, 2560)
Total volume of detected signals: 117569120.625
Number of detected objects: 172737
Data set size: (10, 2160, 2560, 2)
190604_P_#144_lung_ctrl_x125_639_Probabilities_10slices
(10, 2160, 2560)
Total volume of detected signals: 84283835.625
Number of detected objects: 123833
Data set size: (10, 2160, 2560, 2)
190604_P_#144_lung_ctrl_x125_639_Probabilities_10slices
(10, 2160, 2560)
Total volume of detected signals: 59966465.625
Number of detected objects: 88105
Data set size: (10, 2160, 2560, 2)
190604_P_#144_lung_ctrl_x125_639_Probabilities_10slices
(10, 2160, 2560)
Total volume of detected signals: 33390781.875
Number of detected objects: 49059
