### 2D frame dataset generation for CIDRE-based retrospective flat/dark-field model estimation

Run the cells below to extract a suitable dataset of 2D images from the 3D microscopy image stacks included in *source*.
Frames exported to *outdir* will satisfy the following inclusion criteria:
* relative number of black background pixels (*black_rel_thr*)
* mean intensity relative to maximum (*mean_rel_thr*)

In [None]:
import numpy as np
import random
import tifffile as tiff

from os import listdir, mkdir, path, walk


##### I/O image paths

In [None]:
# path to TIFF z-stacks
# NOTE: to be adapted...
source = 'path/to/input'
outdir = 'path/to/output'


##### Inclusion criteria

In [3]:
# inclusion/exclusion criteria
bg_lvl = 0
bg_rel_thr = 0.5
mean_rel_thr = 0.005

# maximum output dataset size [B]
max_dset_size = 50e9


##### Evaluate I/O directory content

In [4]:
# get current number of 2D frames in outdir
if not(path.isdir(outdir)):
    num_slc = 1
    mkdir(outdir)
else:
    num_slc = len([name for name in listdir(outdir) if path.isfile(path.join(outdir, name))]) + 1

# get all z-stacks file paths in source
stack_lst = []
for (dir, _, fnames) in walk(source):
    stack_lst += [path.join(dir, f) for f in fnames]
random.shuffle(stack_lst)


##### Export 2D frames imposing the required inclusion criteria

In [None]:
"""
TODO: generalize again to RGB and grayscale image stacks!!!
"""

ds_size = 0
num_out = 1
num_stk = len(stack_lst)

for f in stack_lst:
    prc_progress = 100 * (num_out / num_stk)
    print('\nProcessing image stack {0}/{1}\t\t{2:0.1f}%'.format(num_out, num_stk, prc_progress), end='\r')

    # read image stack
    img = tiff.imread(f)
    img_max = np.iinfo(img.dtype).max

    # loop over z-slices
    for z in range(img.shape[0]):

        # check criteria and save "valid" z-slice to TIFF
        if ds_size < max_dset_size:
            slc = img[z, :, :]
            if np.count_nonzero(slc > bg_lvl) / np.size(slc) > bg_rel_thr \
                and np.mean(slc[slc != 0]) > mean_rel_thr * img_max:

                tiff.imwrite(path.join(outdir, f'{num_slc}.tiff'), slc)

                # update total dataset size and slice counter
                ds_size += slc.itemsize * slc.size
                num_slc += 1
        
        """      
        # break if all channels reached max size
        if np.all(ds_size) >= max_dset_size:
            break
        """ 

    # increase processed stack counter
    num_out += 1

# print exported dataset information
print(f"\n\nCIDRE dataset size: {num_slc} images\t({1e-9*ds_size:2.1f}GB)")
