# Batch process segmentation made with Ilastik

This code performs the identical steps as in notebook 1A, but in a more streamlined way, allowing for batch processing without visual inspection.

The processing functions have been moved to the process_colonies.py script.

In [1]:
#next two lines make sure that Matplotlib plots are shown properly in Jupyter Notebook
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

#main data analysis packages
import numpy as np
import seaborn as sns
import pandas as pd
import dask.array as da

#path handling
import pathlib
import h5py

#dask cash
from dask.cache import Cache
cache = Cache(4e9)  # Leverage 4 gigabytes of memory
cache.register()    # Turn cache on globally

import process_colonies as pc

## Set Paths and Settings

In [13]:
#set path to registered file
path_reg_im = pathlib.Path("/Volumes/ScientificData/Users/Giulia(botgiu00)/Collaborations/Ashley/2023-04-11-agar-pad-processed/Registration_max_frames/")

#set path to Ilastik output file
path_seg_im = pathlib.Path("/Volumes/ScientificData/Users/Giulia(botgiu00)/Collaborations/Ashley/2023-04-11-agar-pad-processed/Registration_max_frames/")

# #set path to registered file
# path_reg_im = pathlib.Path("/Users/simonvanvliet/TempData/2023-04-11-agar-pad-processed/Registration_max_frames/")

# #set path to Ilastik output file
# path_seg_im = pathlib.Path("/Users/simonvanvliet/TempData/2023-04-11-agar-pad-processed/Registration_max_frames/")

#set path to temporary store label images (use local machine folder for speed)
temp_data_path = pathlib.Path.home() / 'TempData'
temp_data_path.mkdir(exist_ok=True)

#set path to output csv files
path_data_files = pathlib.Path('/Volumes/ScientificData/Users/Giulia(botgiu00)/Collaborations/Ashley/2023-04-11-agar-pad-processed/DataFiles/') 
path_data_files.mkdir(exist_ok=True)

#set filenames
exp_name = "20230411"

#specify properties to extract 
prop_list = ['label', 
            'area', 'centroid', 
            'axis_major_length', 'axis_minor_length']

#specify processing settings
settings = {
    'calc_edge_dist'    : True, #set to true to calculate distance between colony edges, more accurate that center to center distance, but very slow
    'prop_list'         : prop_list,
    #specify the order of the strains in the Ilastik layers
    'idx_SA1'   : 0, #SA1 is GFP
    'idx_SA2'   : 1, # SA2 is RFP
    'idx_BG'    : 2,
    'idx_PA'    : 3,
    #specify the strain names (can be ignored because names are extracted in the 2. notebook)
    #'SA1'       : 'enter strain name here',
    #'SA2'       : 'enter strain name here',
    #specify the segementation processing parameters for pseudomonas
    'sigma'             : 1, # sigma for gaussian filter
    'threshold_PA'      : 0.5, # threshold for segmentation
    'closing_radius_PA' : 7, # radius for closing operation
    'min_cell_area_PA'  : 20, # minimum area for a cell to be considered
    'max_hole_area_PA'  : 100, # maximum area for a hole to be filled
    #specify the segementation processing parameters for staph
    'sigma'             : 1, # sigma for gaussian filter
    'threshold_SA'      : 0.5, # threshold for segmentation
    'closing_radius_SA' : 5, # radius for closing operation
    'min_cell_area_SA'  : 20, # minimum area for a cell to be considered
    'max_hole_area_SA'  : 100, # maximum area for a hole to be filled    
    #store path metadata
    'temp_path'         : temp_data_path,
    'path_reg_im'       : path_reg_im,
    'path_seg_im'       : path_seg_im,
    'path_data_files'    : path_data_files,
    'exp_name'          : exp_name
}


#load metadata and add to settings
metadata_path = settings['path_data_files'] / f'agarpad_{exp_name}.csv'
pos_metadata = pd.read_csv(metadata_path, index_col=0)
settings['pos_metadata'] = pos_metadata

## Loop positions
First make sure that all positions are found

In [14]:
pos_list = [f.name for f in sorted(path_seg_im.glob('*_Probabilities.h5'))]
for pos in pos_list: print(pos)

20230411_reg_p000-images_Probabilities.h5
20230411_reg_p001-images_Probabilities.h5
20230411_reg_p002-images_Probabilities.h5
20230411_reg_p003-images_Probabilities.h5
20230411_reg_p004-images_Probabilities.h5
20230411_reg_p005-images_Probabilities.h5
20230411_reg_p006-images_Probabilities.h5
20230411_reg_p007-images_Probabilities.h5
20230411_reg_p008-images_Probabilities.h5
20230411_reg_p009-images_Probabilities.h5
20230411_reg_p010-images_Probabilities.h5
20230411_reg_p011-images_Probabilities.h5
20230411_reg_p012-images_Probabilities.h5
20230411_reg_p013-images_Probabilities.h5
20230411_reg_p014-images_Probabilities.h5
20230411_reg_p015-images_Probabilities.h5
20230411_reg_p016-images_Probabilities.h5
20230411_reg_p017-images_Probabilities.h5
20230411_reg_p018-images_Probabilities.h5
20230411_reg_p019-images_Probabilities.h5
20230411_reg_p020-images_Probabilities.h5
20230411_reg_p021-images_Probabilities.h5
20230411_reg_p022-images_Probabilities.h5
20230411_reg_p023-images_Probabili

now we loop positions, this will take a while

In [15]:
df_all = []
csv_dir_pos = settings['path_data_files'] / settings['exp_name']
csv_dir_pos.mkdir(exist_ok=True)

#segment track and process all positions
for pos in pos_list:
    try:
        #segment colony and store label image
        df = pc.process_pos(pos, settings, store_2_disk=True, clean_disk=True)
        df_all.append(df)
    except:
        print("Error processing position {}".format(pos))      
         
        
df_combined = pd.concat(df_all).reset_index(drop=True)  
csv_name = settings['path_data_files'] / f"{settings['expname']}_all_data.csv"
df_combined.to_csv(csv_name)         

Segementing & processing position 0
Segementing & processing position 1
Segementing & processing position 2
