diff --git a/executorlib/task_scheduler/interactive/blockallocation.py b/executorlib/task_scheduler/interactive/blockallocation.py index 7955899d..005a2ffa 100644 --- a/executorlib/task_scheduler/interactive/blockallocation.py +++ b/executorlib/task_scheduler/interactive/blockallocation.py @@ -116,15 +116,16 @@ def submit( # type: ignore args: arguments for the submitted function kwargs: keyword arguments for the submitted function resource_dict (dict): A dictionary of resources required by the task. With the following keys: - - cores (int): number of MPI cores to be used for each function call - - threads_per_core (int): number of OpenMP threads to be used for each function call - - gpus_per_core (int): number of GPUs per worker - defaults to 0 - - cwd (str/None): current working directory where the parallel python task is executed - - openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and - SLURM only) - default False - - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM only) - - error_log_file (str): Name of the error log file to use for storing exceptions raised - by the Python functions submitted to the Executor. + - cores (int): number of MPI cores to be used for each function call + - threads_per_core (int): number of OpenMP threads to be used for each function call + - gpus_per_core (int): number of GPUs per worker - defaults to 0 + - cwd (str/None): current working directory where the parallel python task is executed + - openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI + and SLURM only) - default False + - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM + only) + - error_log_file (str): Name of the error log file to use for storing exceptions + raised by the Python functions submitted to the Executor. Returns: Future: A Future representing the given call. @@ -147,12 +148,10 @@ def shutdown(self, wait: bool = True, *, cancel_futures: bool = False): methods can be called after this one. Args: - wait: If True then shutdown will not return until all running - futures have finished executing and the resources used by the - parallel_executors have been reclaimed. - cancel_futures: If True then shutdown will cancel all pending - futures. Futures that are completed or running will not be - cancelled. + wait: If True then shutdown will not return until all running futures have finished executing and the + resources used by the parallel_executors have been reclaimed. + cancel_futures: If True then shutdown will cancel all pending futures. Futures that are completed or running + will not be cancelled. """ if self._future_queue is not None: if cancel_futures: @@ -197,26 +196,26 @@ def _execute_multiple_tasks( Execute a single tasks in parallel using the message passing interface (MPI). Args: - future_queue (queue.Queue): task queue of dictionary objects which are submitted to the parallel process - cores (int): defines the total number of MPI ranks to use - spawner (BaseSpawner): Spawner to start process on selected compute resources - hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the - context of an HPC cluster this essential to be able to communicate to an - Executor running on a different compute node within the same allocation. And - in principle any computer should be able to resolve that their own hostname - points to the same address as localhost. Still MacOS >= 12 seems to disable - this look up for security reasons. So on MacOS it is required to set this - option to true - init_function (Callable): optional function to preset arguments for functions which are submitted later - cache_directory (str, optional): The directory to store cache files. Defaults to "executorlib_cache". - cache_key (str, optional): By default the cache_key is generated based on the function hash, this can be - overwritten by setting the cache_key. - queue_join_on_shutdown (bool): Join communication queue when thread is closed. Defaults to True. - log_obj_size (bool): Enable debug mode which reports the size of the communicated objects. - error_log_file (str): Name of the error log file to use for storing exceptions raised by the Python functions - submitted to the Executor. - worker_id (int): Communicate the worker which ID was assigned to it for future reference and resource - distribution. + future_queue (queue.Queue): task queue of dictionary objects which are submitted to the parallel process + cores (int): defines the total number of MPI ranks to use + spawner (BaseSpawner): Spawner to start process on selected compute resources + hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the + context of an HPC cluster this essential to be able to communicate to an + Executor running on a different compute node within the same allocation. And + in principle any computer should be able to resolve that their own hostname + points to the same address as localhost. Still MacOS >= 12 seems to disable + this look up for security reasons. So on MacOS it is required to set this + option to true + init_function (Callable): optional function to preset arguments for functions which are submitted later + cache_directory (str, optional): The directory to store cache files. Defaults to "executorlib_cache". + cache_key (str, optional): By default the cache_key is generated based on the function hash, this can be + overwritten by setting the cache_key. + queue_join_on_shutdown (bool): Join communication queue when thread is closed. Defaults to True. + log_obj_size (bool): Enable debug mode which reports the size of the communicated objects. + error_log_file (str): Name of the error log file to use for storing exceptions raised by the Python functions + submitted to the Executor. + worker_id (int): Communicate the worker which ID was assigned to it for future reference and resource + distribution. """ interface = interface_bootup( command_lst=get_interactive_execute_command( diff --git a/executorlib/task_scheduler/interactive/onetoone.py b/executorlib/task_scheduler/interactive/onetoone.py index 3b631565..96bcf09a 100644 --- a/executorlib/task_scheduler/interactive/onetoone.py +++ b/executorlib/task_scheduler/interactive/onetoone.py @@ -176,12 +176,12 @@ def _wrap_execute_task_in_separate_process( cores which can be used in parallel - just like the max_cores parameter. Using max_cores is recommended, as computers have a limited number of compute cores. hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the - context of an HPC cluster this essential to be able to communicate to an - Executor running on a different compute node within the same allocation. And - in principle any computer should be able to resolve that their own hostname - points to the same address as localhost. Still MacOS >= 12 seems to disable - this look up for security reasons. So on MacOS it is required to set this - option to true + context of an HPC cluster this essential to be able to communicate to an + Executor running on a different compute node within the same allocation. And + in principle any computer should be able to resolve that their own hostname + points to the same address as localhost. Still MacOS >= 12 seems to disable + this look up for security reasons. So on MacOS it is required to set this + option to true Returns: RaisingThread, dict: thread for communicating with the python process which is executing the function and dictionary containing the future objects and the number of cores they require