In [None]:
from pprint import pprint 

import os
from glob import glob
from skimage import io, img_as_ubyte
from skimage.filters import threshold_otsu, gaussian
from skimage.morphology import binary_dilation, binary_closing

from scipy.ndimage import binary_fill_holes

from skimage.measure import label, regionprops
##regionprops_table is only available in v0.16 and after
import tifffile

import numpy as np
import matplotlib.pyplot as plt

In [None]:
!python --version

root_path = "/media/kondo/Ext4_for_Colab/tomizawa/paper/" #@param {type:"string"}
#@markdown  - Assign a path for your root directory. This root directory should include an image dataset directory ("/data/data_original/") and "/utils/" directory.
#@markdown  - The image dataset is available at a database (URL in #README on the GitHub).
#@markdown  - The library "utils" is available at the same GitHub page with this jupyter notebook.

%cd $root_path 
!pwd
!ls

Python 3.8.10
[Errno 2] No such file or directory: '/media/kondo/Ext4_for_Colab/tomizawa/paper/'
/content
/content
sample_data


In [None]:
strain_list = ['Aus', 'Tak', 'RIL5']
day_list = ['0d','1d','2d','3d','4d','7d']

data_path = './data/data_original/'

In [None]:
## Note that Tak is processed by a different function, 
## because of its different naming rule
def make_output_directories_Aus_RIL5(results_path, day_list, strain):
    '''
    Aus, RIL5
    '''
    # output directory
    if not os.path.isdir(results_path):
        os.mkdir(results_path)

    # hierarchy 1：strain=Aus, RIL5
    if not os.path.isdir(results_path + strain):
        os.mkdir(results_path + strain)

    # hierarchy2:day
    for day in day_list:
        if not os.path.isdir(results_path + strain + '/' + day):
            os.mkdir(results_path + strain + '/' + day)

    # hierarchy3:sex
    for day in day_list:
        for sex in ['M', 'F']:
            if not os.path.isdir(results_path + strain + '/' + day + '/' + strain + '_' + day + '_' + sex):
                os.mkdir(results_path + strain + '/' + day + '/' + strain + '_' + day + '_' + sex)



def make_output_directories_Tak(results_path, day_list, strain='Tak', strain_sex_list=['Tak-1', 'Tak-2']):
    '''
    Tak
    '''
    # output directory
    if not os.path.isdir(results_path):
        os.mkdir(results_path)

    # hierarchy1：strain='Tak'
    if not os.path.isdir(results_path + strain):
        os.mkdir(results_path + strain)

    # hierarchy2:day
    for day in day_list:
        if not os.path.isdir(results_path + strain + '/' + day):
            os.mkdir(results_path + strain + '/' + day)

    # hierarchy3:sex
    for day in day_list:
        for strain_sex in strain_sex_list:
            sex = 'M' if strain_sex=='Tak-1' else 'F' if strain_sex=='Tak-2' else None
            if not os.path.isdir(results_path + strain + '/' + day + '/' + strain_sex + '_' + day + '_' + sex):
                os.mkdir(results_path + strain + '/' + day + '/' + strain_sex + '_' + day + '_' + sex)


def make_output_directories(results_path, day_list, strain_list):
    '''
    all strains
    wrapping
    '''
    for strain in strain_list:
        if strain == 'Tak':
            make_output_directories_Tak(results_path, day_list)
        else:
            make_output_directories_Aus_RIL5(results_path, day_list, strain)
            

            
def get_output_fnames_for_a_strain(data_path, results_path, strain, day=None):
    '''
    for single strain
    day=None for all day. If you want to process only a specific day, specify like day=7
    '''
    if day:
        fnames_in = glob(data_path + strain + '/' + day + '/*/*.tif')
    else:
        fnames_in = glob(data_path + strain + '/*/*/*.tif')
    fnames_in.sort()
    str_input = data_path.split('/')[-2]
    str_output = results_path.split('/')[-2]
    fnames_out = [fname.replace(str_input, str_output) for fname in fnames_in]
    return fnames_in, fnames_out

def get_output_fnames(data_path, results_path, strain_list, day=None):
    '''
    all strains
    wrapping
    
    output :: dict['Aus'] = [fnames_in_list, fnames_out_list]
    '''
    fnames_dict = {}
    for strain in strain_list:
        fnames_in, fnames_out = get_output_fnames_for_a_strain(data_path, results_path, strain, day)
        fnames_dict[strain] = fnames_in, fnames_out
    return fnames_dict
        

In [None]:
def fill_scale_bar(img, fill=255):
    #fill : white255, black0
    img[1000:, 1200:] = fill #explanation
    img[1100:, 900:] = fill #scale bar
    return img



def make_binary(img):
    """
    make a binary mask of an image.
    
    :param img: img(YXC), 8-bit RGB, np.array
    :return mask: binary(0,1), np.array
    """
    ## blue channel
    tmp = img[:,:,2]

    ## blur
    tmp = gaussian(tmp, sigma=10) #default sigma=1

    ## binarize
    thresh = threshold_otsu(tmp)
    tmp = tmp < thresh #bool
    
    ## dilate, fill_holes
    tmp = binary_dilation(tmp, np.full((5,5),1)) #5x5, all 1
    tmp = binary_fill_holes(tmp)
    
    tmp = tmp.astype(int) #bool → 0,1
    
    mask = tmp
    
    return mask



def get_largest_label(mask):
    """
    get only a gemmaling label (exclude small noisy labels) by extracting the largest area label.
    
    :param mask: binary (0,1), np.array
    :return mask: binary (0,1), np.array

    """
    ## label
    label_image = label(mask)
    regions = regionprops(label_image) #list for each label

    ## when multiple labels exist
    if len(regions)==1:
        pass
    elif len(regions)!=1:
        #print(fname)
        #print(len(regions))                        
        label_indx = 0
        area_tmp = regions[label_indx].area
        for i, prop in enumerate(regions):
            if prop.area > area_tmp:
                area_tmp = prop.area
                label_indx = i

        label_image[label_image!=(label_indx+1)]=0
        mask = (label_image/(label_indx + 1)).astype(int) ## 0,1
    return mask





In [None]:
def make_files_no_scale_bar(fnames_dict, save=False):
    for strain in fnames_dict.keys():
        print(strain)
        input_fnames, output_fnames = fnames_dict[strain]
        print(len(input_fnames))
        print(input_fnames[0], output_fnames[0])

        for in_fname, out_fname in zip(input_fnames, output_fnames):
            img = io.imread(in_fnameå
            img = fill_scale_bar(img)
        
            if save:
                with tifffile.TiffWriter(out_fname, imagej=True) as tif:
                    tif.save(img_as_ubyte(img[np.newaxis, np.newaxis, np.newaxis, 
                                                  :, :,:]))##ImageJ assumes TZCYXS order
            else:
                fig, ax = plt.subplots(1,1, figsize=(5,5))
                ax.imshow(img)
                plt.show()



def make_binary_files(fnames_dict, save=False):
    for strain in fnames_dict.keys():
        print(strain)
        input_fnames, output_fnames = fnames_dict[strain]
        print(len(input_fnames))
        print(input_fnames[0], output_fnames[0])

        for in_fname, out_fname in zip(input_fnames, output_fnames):
            img = io.imread(in_fname)

            mask = make_binary(img)                    
            mask[1000:, 1200:] = 0  ##erase scale bar
            mask = get_largest_label(mask) ## get label with largest area
            
            if save:
                mask_save = mask*255 ## 8-bit
                ## save as a tiffile
                with tifffile.TiffWriter(out_fname, imagej=True) as tif:
                    tif.save(img_as_ubyte(mask_save[np.newaxis, np.newaxis, np.newaxis, :, :,
                                                    np.newaxis]))##ImageJ assumes TZCYXS order
            else: ## without save, plot img
                fig, ax = plt.subplots(1,2, figsize=(10,5))
                ax[0].imshow(img)
                ax[1].imshow(mask)
                plt.show()



def make_black_back_files(fnames_dict, save=False):
    for strain in fnames_dict.keys():
        print(strain)
        input_fnames, output_fnames = fnames_dict[strain]
        print(input_fnames[0], output_fnames[0])
        print(len(input_fnames))

        for in_fname, out_fname in zip(input_fnames, output_fnames):
            img = io.imread(in_fname)

            mask = make_binary(img)                    
            mask[1000:, 1200:] = 0  ## erase scale bar
            mask = get_largest_label(mask) ## get largest label
            
            ## fill background with black, add C channel
            mask_bb = mask[:,:,np.newaxis]
            mask_bb = img*mask_bb

            if save:
                with tifffile.TiffWriter(out_fname, imagej=True) as tif:
                    tif.save(img_as_ubyte(mask_bb[np.newaxis, np.newaxis, np.newaxis, 
                                                  :, :,:]))##ImageJ assumes TZCYXS order
            else: ## without save, plot img 
                fig, ax = plt.subplots(1,2, figsize=(10,5))
                ax[0].imshow(img)
                ax[1].imshow(mask_bb)
                plt.show()
    

def make_blur_files(fnames_dict, save=False):
    for strain in fnames_dict.keys():
        print(strain)
        input_fnames, output_fnames = fnames_dict[strain]
        print(len(input_fnames))
        print(input_fnames[0], output_fnames[0])

        for in_fname, out_fname in zip(input_fnames, output_fnames):
            img = io.imread(in_fname)

            mask = make_binary(img)                    
            mask[1000:, 1200:] = 0  ## erase scale bar
            mask = get_largest_label(mask) ## get largest label

            ### blur
            mask = gaussian(mask, sigma=60)
            mask = (mask - np.min(mask))/(np.max(mask)-np.min(mask)) # clip to 0~1

            if save:
                mask_save = (mask*255).astype(int) ## 8-bit
                ## tiffile
                with tifffile.TiffWriter(out_fname, imagej=True) as tif:
                    tif.save(img_as_ubyte(mask_save[np.newaxis, np.newaxis, np.newaxis, :, :,
                                                    np.newaxis]))##ImageJ assumes TZCYXS order
            else: ## without save, plot
                fig, ax = plt.subplots(1,2, figsize=(10,5))
                ax[0].imshow(img)
                ax[1].imshow(mask)
                plt.show()


    

In [None]:
## original images, but without scale bar (fillled with white)
results_path = './data/data_original_noScaleBar/'

make_output_directories(results_path, day_list, strain_list) ## not necessary when save=False
fnames_dict = get_output_fnames(data_path, results_path, strain_list)

make_files_no_scale_bar(fnames_dict, save=True)


In [None]:
### binary
results_path = './data/data_binary/'

make_output_directories(results_path, day_list, strain_list) ## not necessary when save=False
fnames_dict = get_output_fnames(data_path, results_path, strain_list)

make_binary_files(fnames_dict, save=True)


In [None]:
### fill background with black
results_path = './data/data_black-back/'
print(results_path)

make_output_directories(results_path, day_list, strain_list) ## not necessary when save=False
fnames_dict = get_output_fnames(data_path, results_path, strain_list)

make_black_back_files(fnames_dict, save=True)



In [None]:
### binary→blur
results_path = './data/data_blur/'
print(results_path)

fnames_dict = get_output_fnames(data_path, results_path, strain_list)
make_output_directories(results_path, day_list, strain_list) ## not necessary when save=False

make_blur_files(fnames_dict, save=True)