# Notebook to automatically process multiple datasets
__Written by Haixing Fang, Jon Wright and James Ball__  
__Date: 21/02/2025__

With this notebook, we can process multiple datasets and samples.  
You choose which notebooks you would like to run, and which samples to run them on.  
Notebooks will be ran with the same parameters you defined.

In [None]:
import os

os.environ['OMP_NUM_THREADS'] = '1'
os.environ['OPENBLAS_NUM_THREADS'] = '1'
os.environ['MKL_NUM_THREADS'] = '1'
os.environ['PYDEVD_DISABLE_FILE_VALIDATION'] = '1'  # ignore papermill debugger warnings

exec(open('/data/id11/nanoscope/install_ImageD11_from_git.py').read())

In [None]:
# this cell is tagged with 'parameters'
# to view the tag, select the cell, then find the settings gear icon (right or left sidebar) and look for Cell Tags

PYTHONPATH = setup_ImageD11_from_git( ) # ( os.path.join( os.environ['HOME'],'Code'), 'ImageD11_git' )

# give it a path to an existing dataset to determine the required paths
dset_path = "path/to/dataset.h5"

# you can specify a list of samples to process
sample_list = None

# or you can specify a skips_dict and a samples_dict
skips_dict = {'sample':['ff1']}  # for example, we already processed layer ff1
samples_dict = None

# common prefix to all datasets
dset_prefix = 'ff'

# which notebooks should be run?
notebooks_to_run = None

In [None]:
import ImageD11.sinograms.dataset
from ImageD11.nbGui import segmenter_gui
from ImageD11.nbGui import nb_utils as utils

import pprint

In [None]:
ds = ImageD11.sinograms.dataset.load(dset_path)
print(ds)

In [None]:
# try to get list of samples automatically
if sample_list is None:
    sample_list = sorted([name for name in os.listdir(ds.dataroot) if os.path.isdir(os.path.join(ds.dataroot, name))])

print(sample_list)

In [None]:
if samples_dict is None:
    samples_dict = utils.find_datasets_to_process(ds.dataroot, skips_dict, dset_prefix, sample_list)

print(samples_dict)

In [None]:
# which notebooks would you like to run?
# example is below for single-phase tomo method
# provided as as list of tuples of:
# (notebook_name, notebook_params_dict)

if notebooks_to_run is None:
    notebooks_to_run = [
        ('0_segment_and_label.ipynb', {}), 
        ('tomo_1_index.ipynb', {}), 
        ('tomo_2_map.ipynb', {}),
        ('tomo_3_refinement.ipynb', {}),
        ('4_visualise.ipynb', {}),
    ]

The next cell will prepare the notebooks for execution by putting them in PROCESSED_DATA/sample/sample_dataset for each dataset.  
It will skip this if any notebooks are already present in the folder

In [None]:
notebooks_to_execute = utils.prepare_notebooks_for_datasets(samples_dict,
                                                            notebooks_to_run,
                                                            ds.dataroot,
                                                            ds.analysisroot,
                                                            PYTHONPATH=PYTHONPATH)

In [None]:
print('I will execute the following notebooks:')
pprint.pprint([nb.split(ds.analysisroot)[1] for nb in notebooks_to_execute])

In [None]:
print('Executing notebooks')
for nb_path in notebooks_to_execute:
    utils.notebook_exec_pmill(nb_path, nb_path, None)