From 78409edbb8b62406b81be5797974a3d0a4d60827 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Sun, 7 Sep 2025 07:44:53 +0200 Subject: [PATCH 1/3] Fix docstrings for BlockAllocationTaskScheduler --- .../interactive/blockallocation.py | 69 +++++++++---------- 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/executorlib/task_scheduler/interactive/blockallocation.py b/executorlib/task_scheduler/interactive/blockallocation.py index 7955899d..66ac9d27 100644 --- a/executorlib/task_scheduler/interactive/blockallocation.py +++ b/executorlib/task_scheduler/interactive/blockallocation.py @@ -116,15 +116,16 @@ def submit( # type: ignore args: arguments for the submitted function kwargs: keyword arguments for the submitted function resource_dict (dict): A dictionary of resources required by the task. With the following keys: - - cores (int): number of MPI cores to be used for each function call - - threads_per_core (int): number of OpenMP threads to be used for each function call - - gpus_per_core (int): number of GPUs per worker - defaults to 0 - - cwd (str/None): current working directory where the parallel python task is executed - - openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and - SLURM only) - default False - - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM only) - - error_log_file (str): Name of the error log file to use for storing exceptions raised - by the Python functions submitted to the Executor. + - cores (int): number of MPI cores to be used for each function call + - threads_per_core (int): number of OpenMP threads to be used for each function call + - gpus_per_core (int): number of GPUs per worker - defaults to 0 + - cwd (str/None): current working directory where the parallel python task is executed + - openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI + and SLURM only) - default False + - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM + only) + - error_log_file (str): Name of the error log file to use for storing exceptions + raised by the Python functions submitted to the Executor. Returns: Future: A Future representing the given call. @@ -147,12 +148,10 @@ def shutdown(self, wait: bool = True, *, cancel_futures: bool = False): methods can be called after this one. Args: - wait: If True then shutdown will not return until all running - futures have finished executing and the resources used by the - parallel_executors have been reclaimed. - cancel_futures: If True then shutdown will cancel all pending - futures. Futures that are completed or running will not be - cancelled. + wait: If True then shutdown will not return until all running futures have finished executing and the + resources used by the parallel_executors have been reclaimed. + cancel_futures: If True then shutdown will cancel all pending futures. Futures that are completed or running + will not be cancelled. """ if self._future_queue is not None: if cancel_futures: @@ -197,26 +196,26 @@ def _execute_multiple_tasks( Execute a single tasks in parallel using the message passing interface (MPI). Args: - future_queue (queue.Queue): task queue of dictionary objects which are submitted to the parallel process - cores (int): defines the total number of MPI ranks to use - spawner (BaseSpawner): Spawner to start process on selected compute resources - hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the - context of an HPC cluster this essential to be able to communicate to an - Executor running on a different compute node within the same allocation. And - in principle any computer should be able to resolve that their own hostname - points to the same address as localhost. Still MacOS >= 12 seems to disable - this look up for security reasons. So on MacOS it is required to set this - option to true - init_function (Callable): optional function to preset arguments for functions which are submitted later - cache_directory (str, optional): The directory to store cache files. Defaults to "executorlib_cache". - cache_key (str, optional): By default the cache_key is generated based on the function hash, this can be - overwritten by setting the cache_key. - queue_join_on_shutdown (bool): Join communication queue when thread is closed. Defaults to True. - log_obj_size (bool): Enable debug mode which reports the size of the communicated objects. - error_log_file (str): Name of the error log file to use for storing exceptions raised by the Python functions - submitted to the Executor. - worker_id (int): Communicate the worker which ID was assigned to it for future reference and resource - distribution. + future_queue (queue.Queue): task queue of dictionary objects which are submitted to the parallel process + cores (int): defines the total number of MPI ranks to use + spawner (BaseSpawner): Spawner to start process on selected compute resources + hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the + context of an HPC cluster this essential to be able to communicate to an + Executor running on a different compute node within the same allocation. And + in principle any computer should be able to resolve that their own hostname + points to the same address as localhost. Still MacOS >= 12 seems to disable + this look up for security reasons. So on MacOS it is required to set this + option to true + init_function (Callable): optional function to preset arguments for functions which are submitted later + cache_directory (str, optional): The directory to store cache files. Defaults to "executorlib_cache". + cache_key (str, optional): By default the cache_key is generated based on the function hash, this can be + overwritten by setting the cache_key. + queue_join_on_shutdown (bool): Join communication queue when thread is closed. Defaults to True. + log_obj_size (bool): Enable debug mode which reports the size of the communicated objects. + error_log_file (str): Name of the error log file to use for storing exceptions raised by the Python functions + submitted to the Executor. + worker_id (int): Communicate the worker which ID was assigned to it for future reference and resource + distribution. """ interface = interface_bootup( command_lst=get_interactive_execute_command( From 87e62e226dbd1138406ee1721f4a93c5c217b480 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Sun, 7 Sep 2025 07:45:41 +0200 Subject: [PATCH 2/3] another small fix --- executorlib/task_scheduler/interactive/onetoone.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/executorlib/task_scheduler/interactive/onetoone.py b/executorlib/task_scheduler/interactive/onetoone.py index 3b631565..96bcf09a 100644 --- a/executorlib/task_scheduler/interactive/onetoone.py +++ b/executorlib/task_scheduler/interactive/onetoone.py @@ -176,12 +176,12 @@ def _wrap_execute_task_in_separate_process( cores which can be used in parallel - just like the max_cores parameter. Using max_cores is recommended, as computers have a limited number of compute cores. hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the - context of an HPC cluster this essential to be able to communicate to an - Executor running on a different compute node within the same allocation. And - in principle any computer should be able to resolve that their own hostname - points to the same address as localhost. Still MacOS >= 12 seems to disable - this look up for security reasons. So on MacOS it is required to set this - option to true + context of an HPC cluster this essential to be able to communicate to an + Executor running on a different compute node within the same allocation. And + in principle any computer should be able to resolve that their own hostname + points to the same address as localhost. Still MacOS >= 12 seems to disable + this look up for security reasons. So on MacOS it is required to set this + option to true Returns: RaisingThread, dict: thread for communicating with the python process which is executing the function and dictionary containing the future objects and the number of cores they require From f3ffe3e3400cad71da74a3bdc31cb79dd434be50 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 7 Sep 2025 05:46:01 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- executorlib/task_scheduler/interactive/blockallocation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/executorlib/task_scheduler/interactive/blockallocation.py b/executorlib/task_scheduler/interactive/blockallocation.py index 66ac9d27..005a2ffa 100644 --- a/executorlib/task_scheduler/interactive/blockallocation.py +++ b/executorlib/task_scheduler/interactive/blockallocation.py @@ -120,11 +120,11 @@ def submit( # type: ignore - threads_per_core (int): number of OpenMP threads to be used for each function call - gpus_per_core (int): number of GPUs per worker - defaults to 0 - cwd (str/None): current working directory where the parallel python task is executed - - openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI + - openmpi_oversubscribe (bool): adds the `--oversubscribe` command line flag (OpenMPI and SLURM only) - default False - - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM + - slurm_cmd_args (list): Additional command line arguments for the srun call (SLURM only) - - error_log_file (str): Name of the error log file to use for storing exceptions + - error_log_file (str): Name of the error log file to use for storing exceptions raised by the Python functions submitted to the Executor. Returns: @@ -148,7 +148,7 @@ def shutdown(self, wait: bool = True, *, cancel_futures: bool = False): methods can be called after this one. Args: - wait: If True then shutdown will not return until all running futures have finished executing and the + wait: If True then shutdown will not return until all running futures have finished executing and the resources used by the parallel_executors have been reclaimed. cancel_futures: If True then shutdown will cancel all pending futures. Futures that are completed or running will not be cancelled.