# Parameter sweep for the puncta analysis

## Requirements
- A folder with images that should be analyzed.  All z-layers for a specific sample must be combined into a single file. To combine z-layers and channels, run [run_images_to_stack.ipynb](run_images_to_stack.ipynb). 

- To compute puncta statistics per cell, cell segmentation should be provided as an additional channel. To segment cells/nuclei, run [run_cell_segmentation.ipynb](run_cell_segmentation.ipynb).

<hr style="height:2px;">

## Config

<hr style="height:2px;">

### The following code imports and declares functions used for the processing:

In [1]:
#################################
#  Don't modify the code below  #
#################################

import json
import os
import itertools
import pandas as pd 
import copy
from am_utils.utils import combine_statistics
from punctatools.lib.segment import segment_puncta_batch
from punctatools.lib.quantify import quantify_batch
from punctatools.lib.utils import load_parameters

2022-01-24 15:12:44,465 [INFO] WRITING LOG OUTPUT TO /home/amedyukh/.cellpose/run.log


### Specify the parameter file with the default parameter values


In [2]:
parameter_file = '/research/sharedresources/cbi/public/data/example_data/punctatools/parameters.json'

### Specidy the output directory for the parameter sweep

In [3]:
output_dir = '/research/sharedresources/cbi/public/data/_outputs/punctatools/parameter_sweep'

### Script path for puncta analysis

In [4]:
script_name_puncta = '/research/sharedresources/cbi/public/scripts/run_puncta_analysis.py'

### Do you want to run the analysis now or submit to the cluster?

Specify `True` for running now, `False` for submitting to the cluster. To submit to the cluster, copy all commands to the cluster prompt. See more details on the computational cluster [here](https://wiki.stjude.org/display/RC/HPC+Basics+Bootcamp).

In [5]:
run_now = False

### Specify parameters for the cluster execution


`n_cores`: number of cores to use for parallel analysis

`memory`: memory limit per core in Gigabytes

In [6]:
n_cores = 20
memory = 5

python_path = "'/research/sharedresources/cbi/public/conda_envs/punctatools/bin/:$PATH'"


### The following code loads the parameters 

In [7]:
#################################
#  Don't modify the code below  #
#################################

with open(parameter_file) as f:
    kwargs = json.load(f)

kwargs['puncta_analysis_dir'] = os.path.realpath(output_dir)
kwargs

{'converted_data_dir': '/research/sharedresources/cbi/public/data/example_data/punctatools/stacks',
 'cell_segmentation_dir': '/research/sharedresources/cbi/public/data/_outputs/punctatools/cells',
 'cells_channel': 0,
 'diameter': 120,
 'model_type': 'cyto',
 'do_3D': True,
 'remove_small_mode': '2D',
 'remove_small_diam_fraction': 0.5,
 'flow_threshold': 0.4,
 'cellprob_threshold': 0,
 'gpu': True,
 'puncta_analysis_dir': '/research/sharedresources/cbi/public/data/_outputs/punctatools/parameter_sweep',
 'puncta_segm_dir': 'puncta',
 'puncta_stat_dir': 'puncta_stats',
 'cell_stat_dir': 'cell_stats',
 'puncta_channels': [1, 2],
 'cell_segmentation': True,
 'minsize_um': 0.2,
 'maxsize_um': 2,
 'num_sigma': 5,
 'overlap': 1,
 'threshold_detection': [0.001, 0.0001],
 'threshold_background': 3,
 'global_background': False,
 'global_background_percentile': 95,
 'background_percentile': 50,
 'threshold_segmentation': 50,
 'segmentation_mode': 1,
 'remove_out_of_cell': False,
 'maxrad_um': N

### Please specify the parameter values you would like to sweep over

In [8]:
threshold_detection = [0.003, 0.002]
threshold_segmentation = [0.001]
threshold_background = [3]

<hr style="height:2px;">

## Processing

<hr style="height:2px;">

### The following code prints all parameter combinations

In [9]:
#################################
#  Don't modify the code below  #
#################################


sweep_vars = []
sweep_values = []

cur_vars = vars().copy()
for var in cur_vars.keys():
    if var in kwargs and var != 'output_dir':
        sweep_vars.append(var)
        sweep_values.append(cur_vars[var])
        
print('Parameters to sweep over')
for k, v in zip(sweep_vars, sweep_values):
    print(rf"{k}: {v}")

combinations = list(itertools.product(*sweep_values))

print(f'\nNumber of combinations: {len(combinations)}:')
for c in combinations:
    print(c)
    
print('\nDoes this look correct? If not, go back and specify the parameter values to sweep over')

Parameters to sweep over
threshold_detection: [0.003, 0.002]
threshold_segmentation: [0.001]
threshold_background: [3]

Number of combinations: 2:
(0.003, 0.001, 3)
(0.002, 0.001, 3)

Does this look correct? If not, go back and specify the parameter values to sweep over


### The following code runs the batch analysis for the given parameter combinations or generates bsub commands


In [10]:
#################################
#  Don't modify the code below  #
#################################

outputs = []
if not run_now:
    kwargs['n_jobs'] = n_cores

for vals in combinations:
    if run_now:
        print('Parameter values:', vals)
    kwargs_new = kwargs.copy()
    fn_out = ''
    for k, v in zip(sweep_vars, vals):
        kwargs_new[k] = v
        fn_out += rf"{k}={v}_"
        
    fn_out = fn_out.rstrip('_')
    kwargs_new['puncta_analysis_dir'] = os.path.join(kwargs['puncta_analysis_dir'], fn_out)
    outputs.append(kwargs_new['puncta_analysis_dir'])
    p_file = os.path.realpath(os.path.join(kwargs_new['puncta_analysis_dir'], 'parameters.json'))
    
    if run_now:
        print('Output_dir:', fn_out)
    
    os.makedirs(kwargs_new['puncta_analysis_dir'], exist_ok=True)
    
    with open(p_file, 'w') as f:
        json.dump(kwargs_new, f, indent=4)
        
    command = rf'python "{os.path.realpath(script_name_puncta)}" -p {p_file}'
    if run_now:
        print('Run the analysis...')
        os.system(command)
    else:
        command = f'bsub -P Puncta -J ParamSweep -q standard -n {n_cores} -R "rusage[mem={memory}G]" '\
          f' "export PATH={python_path}; ' \
        + command.replace('"', '\'') + "\""
        print(command)
    
    print('\n')
        

bsub -P Puncta -J ParamSweep -q standard -n 20 -R "rusage[mem=5G]"  "export PATH='/research/sharedresources/cbi/public/conda_envs/punctatools/bin/:$PATH'; python '/research/sharedresources/cbi/public/scripts/run_puncta_analysis.py' -p /research/sharedresources/cbi/public/data/_outputs/punctatools/parameter_sweep/threshold_detection=0.003_threshold_segmentation=0.001_threshold_background=3/parameters.json"


bsub -P Puncta -J ParamSweep -q standard -n 20 -R "rusage[mem=5G]"  "export PATH='/research/sharedresources/cbi/public/conda_envs/punctatools/bin/:$PATH'; python '/research/sharedresources/cbi/public/scripts/run_puncta_analysis.py' -p /research/sharedresources/cbi/public/data/_outputs/punctatools/parameter_sweep/threshold_detection=0.002_threshold_segmentation=0.001_threshold_background=3/parameters.json"




### The following code combines statistics

If the analysis was submitted to the cluster, wait for it to complete before running this cell.

In [None]:
#################################
#  Don't modify the code below  #
#################################

roi_quant_dr = kwargs['roi_quant_dir']
puncta_quant_dr = kwargs['puncta_quant_dir']

stats = [pd.DataFrame()]*2

for vals, op in zip(combinations, outputs):
    for i, quant_dir in enumerate([roi_quant_dr, puncta_quant_dr]):
        fn = os.path.join(op, quant_dir + '.csv')
        df = pd.read_csv(fn)
        
        for k, v in zip(sweep_vars, vals):
            df[k] = v
        stats[i] = pd.concat([stats[i], df], ignore_index=True)
        
for i, stat_dir in enumerate([roi_quant_dr, puncta_quant_dr]):
    stats[i].to_csv(os.path.join(kwargs['puncta_analysis_dir'], stat_dir + '.csv'))