From a72c2b0c410575011c9bbe2a3fbbdaa942c05dfd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= <janssen@mpie.de>
Date: Sun, 27 Jul 2025 08:36:55 +0200
Subject: [PATCH 01/83] Move scheduler to standalone

---
 executorlib/standalone/scheduler.py           | 66 ++++++++++++++++++
 .../task_scheduler/file/queue_spawner.py      | 67 +------------------
 .../task_scheduler/file/task_scheduler.py     |  6 +-
 tests/test_fluxclusterexecutor.py             |  3 +-
 tests/test_interactive_slurmspawner.py        |  8 +--
 5 files changed, 77 insertions(+), 73 deletions(-)
 create mode 100644 executorlib/standalone/scheduler.py

diff --git a/executorlib/standalone/scheduler.py b/executorlib/standalone/scheduler.py
new file mode 100644
index 00000000..27668c13
--- /dev/null
+++ b/executorlib/standalone/scheduler.py
@@ -0,0 +1,66 @@
+import contextlib
+import subprocess
+from typing import Optional, Union
+
+from pysqa import QueueAdapter
+
+
+
+def terminate_with_pysqa(
+    queue_id: int,
+    config_directory: Optional[str] = None,
+    backend: Optional[str] = None,
+):
+    """
+    Delete job from queuing system
+
+    Args:
+        queue_id (int): Queuing system ID of the job to delete.
+        config_directory (str, optional): path to the config directory.
+        backend (str, optional): name of the backend used to spawn tasks ["slurm", "flux"].
+    """
+    qa = QueueAdapter(
+        directory=config_directory,
+        queue_type=backend,
+        execute_command=pysqa_execute_command,
+    )
+    status = qa.get_status_of_job(process_id=queue_id)
+    if status is not None and status not in ["finished", "error"]:
+        with contextlib.suppress(subprocess.CalledProcessError):
+            qa.delete_job(process_id=queue_id)
+
+
+def pysqa_execute_command(
+    commands: str,
+    working_directory: Optional[str] = None,
+    split_output: bool = True,
+    shell: bool = False,
+    error_filename: str = "pysqa.err",
+) -> Union[str, list[str]]:
+    """
+    A wrapper around the subprocess.check_output function. Modified from pysqa to raise an exception if the subprocess
+    fails to submit the job to the queue.
+
+    Args:
+        commands (str): The command(s) to be executed on the command line
+        working_directory (str, optional): The directory where the command is executed. Defaults to None.
+        split_output (bool, optional): Boolean flag to split newlines in the output. Defaults to True.
+        shell (bool, optional): Additional switch to convert commands to a single string. Defaults to False.
+        error_filename (str, optional): In case the execution fails, the output is written to this file. Defaults to "pysqa.err".
+
+    Returns:
+        Union[str, List[str]]: Output of the shell command either as a string or as a list of strings
+    """
+    if shell and isinstance(commands, list):
+        commands = " ".join(commands)
+    out = subprocess.check_output(
+        commands,
+        cwd=working_directory,
+        stderr=subprocess.STDOUT,
+        universal_newlines=True,
+        shell=not isinstance(commands, list),
+    )
+    if out is not None and split_output:
+        return out.split("\n")
+    else:
+        return out
diff --git a/executorlib/task_scheduler/file/queue_spawner.py b/executorlib/task_scheduler/file/queue_spawner.py
index 16dff14f..3cd55587 100644
--- a/executorlib/task_scheduler/file/queue_spawner.py
+++ b/executorlib/task_scheduler/file/queue_spawner.py
@@ -1,11 +1,10 @@
-import contextlib
 import os
-import subprocess
-from typing import Optional, Union
+from typing import Optional
 
 from pysqa import QueueAdapter
 
 from executorlib.standalone.inputcheck import check_file_exists
+from executorlib.standalone.scheduler import terminate_with_pysqa, pysqa_execute_command
 from executorlib.task_scheduler.file.hdf import dump, get_queue_id
 
 
@@ -43,7 +42,7 @@ def execute_with_pysqa(
     qa = QueueAdapter(
         directory=config_directory,
         queue_type=backend,
-        execute_command=_pysqa_execute_command,
+        execute_command=pysqa_execute_command,
     )
     queue_id = get_queue_id(file_name=file_name)
     if os.path.exists(file_name) and (
@@ -91,30 +90,6 @@ def execute_with_pysqa(
     return queue_id
 
 
-def terminate_with_pysqa(
-    queue_id: int,
-    config_directory: Optional[str] = None,
-    backend: Optional[str] = None,
-):
-    """
-    Delete job from queuing system
-
-    Args:
-        queue_id (int): Queuing system ID of the job to delete.
-        config_directory (str, optional): path to the config directory.
-        backend (str, optional): name of the backend used to spawn tasks ["slurm", "flux"].
-    """
-    qa = QueueAdapter(
-        directory=config_directory,
-        queue_type=backend,
-        execute_command=_pysqa_execute_command,
-    )
-    status = qa.get_status_of_job(process_id=queue_id)
-    if status is not None and status not in ["finished", "error"]:
-        with contextlib.suppress(subprocess.CalledProcessError):
-            qa.delete_job(process_id=queue_id)
-
-
 def terminate_tasks_in_cache(
     cache_directory: str,
     config_directory: Optional[str] = None,
@@ -140,39 +115,3 @@ def terminate_tasks_in_cache(
                 config_directory=config_directory,
                 backend=backend,
             )
-
-
-def _pysqa_execute_command(
-    commands: str,
-    working_directory: Optional[str] = None,
-    split_output: bool = True,
-    shell: bool = False,
-    error_filename: str = "pysqa.err",
-) -> Union[str, list[str]]:
-    """
-    A wrapper around the subprocess.check_output function. Modified from pysqa to raise an exception if the subprocess
-    fails to submit the job to the queue.
-
-    Args:
-        commands (str): The command(s) to be executed on the command line
-        working_directory (str, optional): The directory where the command is executed. Defaults to None.
-        split_output (bool, optional): Boolean flag to split newlines in the output. Defaults to True.
-        shell (bool, optional): Additional switch to convert commands to a single string. Defaults to False.
-        error_filename (str, optional): In case the execution fails, the output is written to this file. Defaults to "pysqa.err".
-
-    Returns:
-        Union[str, List[str]]: Output of the shell command either as a string or as a list of strings
-    """
-    if shell and isinstance(commands, list):
-        commands = " ".join(commands)
-    out = subprocess.check_output(
-        commands,
-        cwd=working_directory,
-        stderr=subprocess.STDOUT,
-        universal_newlines=True,
-        shell=not isinstance(commands, list),
-    )
-    if out is not None and split_output:
-        return out.split("\n")
-    else:
-        return out
diff --git a/executorlib/task_scheduler/file/task_scheduler.py b/executorlib/task_scheduler/file/task_scheduler.py
index fe719d8b..47bcda04 100644
--- a/executorlib/task_scheduler/file/task_scheduler.py
+++ b/executorlib/task_scheduler/file/task_scheduler.py
@@ -17,10 +17,8 @@
 )
 
 try:
-    from executorlib.task_scheduler.file.queue_spawner import (
-        execute_with_pysqa,
-        terminate_with_pysqa,
-    )
+    from executorlib.standalone.scheduler import terminate_with_pysqa
+    from executorlib.task_scheduler.file.queue_spawner import execute_with_pysqa
 except ImportError:
     # If pysqa is not available fall back to executing tasks in a subprocess
     execute_with_pysqa = execute_in_subprocess  # type: ignore
diff --git a/tests/test_fluxclusterexecutor.py b/tests/test_fluxclusterexecutor.py
index 51b18500..27645d86 100644
--- a/tests/test_fluxclusterexecutor.py
+++ b/tests/test_fluxclusterexecutor.py
@@ -11,7 +11,8 @@
 try:
     import flux.job
     from executorlib.task_scheduler.file.hdf import dump
-    from executorlib.task_scheduler.file.queue_spawner import terminate_with_pysqa, terminate_tasks_in_cache, execute_with_pysqa
+    from executorlib.task_scheduler.file.queue_spawner import terminate_tasks_in_cache, execute_with_pysqa
+    from executorlib.standalone.scheduler import terminate_with_pysqa
 
     skip_flux_test = "FLUX_URI" not in os.environ
     pmi = os.environ.get("EXECUTORLIB_PMIX", None)
diff --git a/tests/test_interactive_slurmspawner.py b/tests/test_interactive_slurmspawner.py
index a0af5b67..2617b9e9 100644
--- a/tests/test_interactive_slurmspawner.py
+++ b/tests/test_interactive_slurmspawner.py
@@ -2,7 +2,7 @@
 from executorlib.task_scheduler.interactive.slurmspawner import generate_slurm_command
 
 try:
-    from executorlib.task_scheduler.file.queue_spawner import _pysqa_execute_command
+    from executorlib.standalone.scheduler import pysqa_execute_command
 
     skip_pysqa_test = False
 except ImportError:
@@ -14,7 +14,7 @@
 )
 class TestPysqaExecuteCommand(unittest.TestCase):
     def test_pysqa_execute_command_list(self):
-        out = _pysqa_execute_command(
+        out = pysqa_execute_command(
             commands=["echo", "test"],
             working_directory=None,
             split_output=True,
@@ -25,7 +25,7 @@ def test_pysqa_execute_command_list(self):
         self.assertEqual("test", out[0])
 
     def test_pysqa_execute_command_string(self):
-        out = _pysqa_execute_command(
+        out = pysqa_execute_command(
             commands="echo test",
             working_directory=None,
             split_output=False,
@@ -37,7 +37,7 @@ def test_pysqa_execute_command_string(self):
 
     def test_pysqa_execute_command_fail(self):
         with self.assertRaises(FileNotFoundError):
-            _pysqa_execute_command(
+            pysqa_execute_command(
                 commands=["no/executable/available"],
                 working_directory=None,
                 split_output=True,

From 2ad819ead91729f87e6f3d1678a1b56d16085230 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= <janssen@mpie.de>
Date: Sun, 27 Jul 2025 10:12:01 +0200
Subject: [PATCH 02/83] fix subprocess spawner docstring

---
 executorlib/standalone/interactive/spawner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/executorlib/standalone/interactive/spawner.py b/executorlib/standalone/interactive/spawner.py
index 72f98cfb..85f92218 100644
--- a/executorlib/standalone/interactive/spawner.py
+++ b/executorlib/standalone/interactive/spawner.py
@@ -73,7 +73,7 @@ def __init__(
             cwd (str, optional): The current working directory. Defaults to None.
             cores (int, optional): The number of cores to use. Defaults to 1.
             threads_per_core (int, optional): The number of threads per core. Defaults to 1.
-            oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
+            openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
         """
         super().__init__(
             cwd=cwd,

From fc5f1991fbf7f947185302e6e732bf8c8b9796aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= <janssen@mpie.de>
Date: Sun, 27 Jul 2025 10:53:39 +0200
Subject: [PATCH 03/83] file executor fix parallel execution

---
 executorlib/standalone/command.py         | 29 ++++++++++++++++++-----
 executorlib/task_scheduler/file/shared.py |  1 +
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/executorlib/standalone/command.py b/executorlib/standalone/command.py
index aa396caa..0f6835b6 100644
--- a/executorlib/standalone/command.py
+++ b/executorlib/standalone/command.py
@@ -1,6 +1,7 @@
 import importlib.util
 import os
 import sys
+from typing import Optional
 
 
 def get_command_path(executable: str) -> str:
@@ -16,24 +17,40 @@ def get_command_path(executable: str) -> str:
     return os.path.abspath(os.path.join(__file__, "..", "..", "backend", executable))
 
 
-def get_cache_execute_command(file_name: str, cores: int = 1) -> list:
+def get_cache_execute_command(file_name: str, cores: int = 1, backend: Optional[str] = None) -> list:
     """
     Get command to call backend as a list of two strings
 
     Args:
         file_name (str): The name of the file.
         cores (int, optional): Number of cores used to execute the task. Defaults to 1.
+        backend (str, optional): name of the backend used to spawn tasks ["slurm", "flux"].
 
     Returns:
         list[str]: List of strings containing the python executable path and the backend script to execute
     """
     command_lst = [sys.executable]
     if cores > 1 and importlib.util.find_spec("mpi4py") is not None:
-        command_lst = (
-            ["mpiexec", "-n", str(cores)]
-            + command_lst
-            + [get_command_path(executable="cache_parallel.py"), file_name]
-        )
+        if backend is None:
+            command_lst = (
+                ["mpiexec", "-n", str(cores)]
+                + command_lst
+                + [get_command_path(executable="cache_parallel.py"), file_name]
+            )
+        elif backend == "slurm":
+            command_lst = (
+                ["srun", "-n", str(cores)]
+                + command_lst
+                + [get_command_path(executable="cache_parallel.py"), file_name]
+            )
+        elif backend == "flux":
+            command_lst = (
+                ["flux", "run", "-n", str(cores)]
+                + command_lst
+                + [get_command_path(executable="cache_parallel.py"), file_name]
+            )
+        else:
+            raise ValueError("backend should be None, slurm or flux, not {}".format(backend))
     elif cores > 1:
         raise ImportError(
             "mpi4py is required for parallel calculations. Please install mpi4py."
diff --git a/executorlib/task_scheduler/file/shared.py b/executorlib/task_scheduler/file/shared.py
index 0c5ac882..5d8a90f9 100644
--- a/executorlib/task_scheduler/file/shared.py
+++ b/executorlib/task_scheduler/file/shared.py
@@ -154,6 +154,7 @@ def execute_tasks_h5(
                         command=get_cache_execute_command(
                             file_name=file_name,
                             cores=task_resource_dict["cores"],
+                            backend=backend,
                         ),
                         file_name=file_name,
                         data_dict=data_dict,

From 293adc32ddb2a0ba337b20b79e4d493b31c60983 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= <janssen@mpie.de>
Date: Sun, 27 Jul 2025 11:07:19 +0200
Subject: [PATCH 04/83] add command tests

---
 tests/test_standalone_command.py | 57 ++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 tests/test_standalone_command.py

diff --git a/tests/test_standalone_command.py b/tests/test_standalone_command.py
new file mode 100644
index 00000000..f89821d8
--- /dev/null
+++ b/tests/test_standalone_command.py
@@ -0,0 +1,57 @@
+import sys
+from unittest import TestCase
+from executorlib.standalone.command import get_cache_execute_command, get_interactive_execute_command
+
+
+class TestCommands(TestCase):
+    def test_get_interactive_execute_command_serial(self):
+        output = get_interactive_execute_command(cores=1)
+        self.assertEqual(output[0], sys.executable)
+        self.assertEqual(output[1].split("/")[-1], "interactive_serial.py")
+
+    def test_get_interactive_execute_command_parallel(self):
+        output = get_interactive_execute_command(cores=2)
+        self.assertEqual(output[0], sys.executable)
+        self.assertEqual(output[1].split("/")[-1], "interactive_parallel.py")
+
+    def test_get_cache_execute_command_serial(self):
+        file_name = "test.txt"
+        output = get_cache_execute_command(cores=1, file_name=file_name)
+        self.assertEqual(output[0], sys.executable)
+        self.assertEqual(output[1].split("/")[-1], "cache_serial.py")
+        self.assertEqual(output[2], file_name)
+        output = get_cache_execute_command(cores=1, file_name=file_name, backend="slurm")
+        self.assertEqual(output[0], sys.executable)
+        self.assertEqual(output[1].split("/")[-1], "cache_serial.py")
+        self.assertEqual(output[2], file_name)
+        output = get_cache_execute_command(cores=1, file_name=file_name, backend="flux")
+        self.assertEqual(output[0], sys.executable)
+        self.assertEqual(output[1].split("/")[-1], "cache_serial.py")
+        self.assertEqual(output[2], file_name)
+
+    def test_get_cache_execute_command_parallel(self):
+        file_name = "test.txt"
+        output = get_cache_execute_command(cores=2, file_name=file_name)
+        self.assertEqual(output[0], "mpiexec")
+        self.assertEqual(output[1], "-n")
+        self.assertEqual(output[2], str(2))
+        self.assertEqual(output[3], sys.executable)
+        self.assertEqual(output[4].split("/")[-1], "cache_parallel.py")
+        self.assertEqual(output[5], file_name)
+        output = get_cache_execute_command(cores=2, file_name=file_name, backend="slurm")
+        self.assertEqual(output[0], "srun")
+        self.assertEqual(output[1], "-n")
+        self.assertEqual(output[2], str(2))
+        self.assertEqual(output[3], sys.executable)
+        self.assertEqual(output[4].split("/")[-1], "cache_parallel.py")
+        self.assertEqual(output[5], file_name)
+        output = get_cache_execute_command(cores=2, file_name=file_name, backend="flux")
+        self.assertEqual(output[0], "flux")
+        self.assertEqual(output[1], "run")
+        self.assertEqual(output[2], "-n")
+        self.assertEqual(output[3], str(2))
+        self.assertEqual(output[4], sys.executable)
+        self.assertEqual(output[5].split("/")[-1], "cache_parallel.py")
+        self.assertEqual(output[6], file_name)
+        with self.assertRaises(ValueError):
+            get_cache_execute_command(cores=2, file_name=file_name, backend="test")

From 07e84092e5d167569a09da68467bad9cac0042ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= <janssen@mpie.de>
Date: Sun, 27 Jul 2025 11:08:44 +0200
Subject: [PATCH 05/83] move slurm command to standalone

---
 executorlib/standalone/slurm_command.py       | 48 +++++++++++++++++++
 .../interactive/slurmspawner.py               | 47 +-----------------
 tests/test_interactive_slurmspawner.py        |  2 +-
 3 files changed, 50 insertions(+), 47 deletions(-)
 create mode 100644 executorlib/standalone/slurm_command.py

diff --git a/executorlib/standalone/slurm_command.py b/executorlib/standalone/slurm_command.py
new file mode 100644
index 00000000..7b390b48
--- /dev/null
+++ b/executorlib/standalone/slurm_command.py
@@ -0,0 +1,48 @@
+from typing import Optional
+
+
+SLURM_COMMAND = "srun"
+
+
+def generate_slurm_command(
+    cores: int,
+    cwd: Optional[str],
+    threads_per_core: int = 1,
+    gpus_per_core: int = 0,
+    num_nodes: Optional[int] = None,
+    exclusive: bool = False,
+    openmpi_oversubscribe: bool = False,
+    slurm_cmd_args: Optional[list[str]] = None,
+) -> list[str]:
+    """
+    Generate the command list for the SLURM interface.
+
+    Args:
+        cores (int): The number of cores.
+        cwd (str): The current working directory.
+        threads_per_core (int, optional): The number of threads per core. Defaults to 1.
+        gpus_per_core (int, optional): The number of GPUs per core. Defaults to 0.
+        num_nodes (int, optional): The number of compute nodes to use for executing the task. Defaults to None.
+        exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults to False.
+        openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
+        slurm_cmd_args (list[str], optional): Additional command line arguments. Defaults to [].
+
+    Returns:
+        list[str]: The generated command list.
+    """
+    command_prepend_lst = [SLURM_COMMAND, "-n", str(cores)]
+    if cwd is not None:
+        command_prepend_lst += ["-D", cwd]
+    if num_nodes is not None:
+        command_prepend_lst += ["-N", str(num_nodes)]
+    if threads_per_core > 1:
+        command_prepend_lst += ["--cpus-per-task=" + str(threads_per_core)]
+    if gpus_per_core > 0:
+        command_prepend_lst += ["--gpus-per-task=" + str(gpus_per_core)]
+    if exclusive:
+        command_prepend_lst += ["--exact"]
+    if openmpi_oversubscribe:
+        command_prepend_lst += ["--oversubscribe"]
+    if slurm_cmd_args is not None and len(slurm_cmd_args) > 0:
+        command_prepend_lst += slurm_cmd_args
+    return command_prepend_lst
diff --git a/executorlib/task_scheduler/interactive/slurmspawner.py b/executorlib/task_scheduler/interactive/slurmspawner.py
index 8426012d..309c43d9 100644
--- a/executorlib/task_scheduler/interactive/slurmspawner.py
+++ b/executorlib/task_scheduler/interactive/slurmspawner.py
@@ -2,8 +2,7 @@
 from typing import Optional
 
 from executorlib.standalone.interactive.spawner import SubprocessSpawner
-
-SLURM_COMMAND = "srun"
+from executorlib.standalone.slurm_command import generate_slurm_command
 
 
 def validate_max_workers(max_workers: int, cores: int, threads_per_core: int):
@@ -79,47 +78,3 @@ def generate_command(self, command_lst: list[str]) -> list[str]:
         return super().generate_command(
             command_lst=command_prepend_lst + command_lst,
         )
-
-
-def generate_slurm_command(
-    cores: int,
-    cwd: Optional[str],
-    threads_per_core: int = 1,
-    gpus_per_core: int = 0,
-    num_nodes: Optional[int] = None,
-    exclusive: bool = False,
-    openmpi_oversubscribe: bool = False,
-    slurm_cmd_args: Optional[list[str]] = None,
-) -> list[str]:
-    """
-    Generate the command list for the SLURM interface.
-
-    Args:
-        cores (int): The number of cores.
-        cwd (str): The current working directory.
-        threads_per_core (int, optional): The number of threads per core. Defaults to 1.
-        gpus_per_core (int, optional): The number of GPUs per core. Defaults to 0.
-        num_nodes (int, optional): The number of compute nodes to use for executing the task. Defaults to None.
-        exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults to False.
-        openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
-        slurm_cmd_args (list[str], optional): Additional command line arguments. Defaults to [].
-
-    Returns:
-        list[str]: The generated command list.
-    """
-    command_prepend_lst = [SLURM_COMMAND, "-n", str(cores)]
-    if cwd is not None:
-        command_prepend_lst += ["-D", cwd]
-    if num_nodes is not None:
-        command_prepend_lst += ["-N", str(num_nodes)]
-    if threads_per_core > 1:
-        command_prepend_lst += ["--cpus-per-task=" + str(threads_per_core)]
-    if gpus_per_core > 0:
-        command_prepend_lst += ["--gpus-per-task=" + str(gpus_per_core)]
-    if exclusive:
-        command_prepend_lst += ["--exact"]
-    if openmpi_oversubscribe:
-        command_prepend_lst += ["--oversubscribe"]
-    if slurm_cmd_args is not None and len(slurm_cmd_args) > 0:
-        command_prepend_lst += slurm_cmd_args
-    return command_prepend_lst
diff --git a/tests/test_interactive_slurmspawner.py b/tests/test_interactive_slurmspawner.py
index 2617b9e9..bb04ad34 100644
--- a/tests/test_interactive_slurmspawner.py
+++ b/tests/test_interactive_slurmspawner.py
@@ -1,5 +1,5 @@
 import unittest
-from executorlib.task_scheduler.interactive.slurmspawner import generate_slurm_command
+from executorlib.standalone.slurm_command import generate_slurm_command
 
 try:
     from executorlib.standalone.scheduler import pysqa_execute_command

From 12208680c1d1331e8cb1dc0d7a333038aa5f588c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= <janssen@mpie.de>
Date: Sun, 27 Jul 2025 11:12:48 +0200
Subject: [PATCH 06/83] implement spawner for pysqa

---
 executorlib/task_scheduler/worker/__init__.py |   0
 executorlib/task_scheduler/worker/spawner.py  | 114 ++++++++++++++++++
 2 files changed, 114 insertions(+)
 create mode 100644 executorlib/task_scheduler/worker/__init__.py
 create mode 100644 executorlib/task_scheduler/worker/spawner.py

diff --git a/executorlib/task_scheduler/worker/__init__.py b/executorlib/task_scheduler/worker/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/executorlib/task_scheduler/worker/spawner.py b/executorlib/task_scheduler/worker/spawner.py
new file mode 100644
index 00000000..269ab64d
--- /dev/null
+++ b/executorlib/task_scheduler/worker/spawner.py
@@ -0,0 +1,114 @@
+from typing import Optional
+
+from pysqa import QueueAdapter
+
+from executorlib.standalone.interactive.spawner import BaseSpawner
+from executorlib.standalone.scheduler import pysqa_execute_command, terminate_with_pysqa
+
+
+class PysqaSpawner(BaseSpawner):
+    def __init__(
+        self,
+        cwd: Optional[str] = None,
+        cores: int = 1,
+        openmpi_oversubscribe: bool = False,
+        threads_per_core: int = 1,
+        config_directory: Optional[str] = None,
+        backend: Optional[str] = None,
+        submission_kwargs: Optional[dict] = None,
+    ):
+        """
+        Subprocess interface implementation.
+
+        Args:
+            cwd (str, optional): The current working directory. Defaults to None.
+            cores (int, optional): The number of cores to use. Defaults to 1.
+            threads_per_core (int, optional): The number of threads per core. Defaults to 1.
+            openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
+        """
+        super().__init__(
+            cwd=cwd,
+            cores=cores,
+            openmpi_oversubscribe=openmpi_oversubscribe,
+        )
+        self._process: Optional[int] = None
+        self._threads_per_core = threads_per_core
+        self._config_directory = config_directory
+        self._backend = backend
+        self._submission_kwargs = submission_kwargs
+
+    def bootup(
+        self,
+        command_lst: list[str],
+    ):
+        """
+        Method to start the subprocess interface.
+
+        Args:
+            command_lst (list[str]): The command list to execute.
+        """
+        qa = QueueAdapter(
+            directory=self._config_directory,
+            queue_type=self._backend,
+            execute_command=pysqa_execute_command,
+        )
+        self._process = qa.submit_job(
+            command=" ".join(self.generate_command(command_lst=command_lst)),
+            working_directory=self._cwd,
+            cores=self._cores,
+            **self._submission_kwargs,
+        )
+
+    def generate_command(self, command_lst: list[str]) -> list[str]:
+        """
+        Method to generate the command list.
+
+        Args:
+            command_lst (list[str]): The command list.
+
+        Returns:
+            list[str]: The generated command list.
+        """
+        if self._cores > 1 and self._backend is None:
+            command_prepend = ["mpiexec", "-n", str(self._cores)]
+        elif self._cores > 1 and self._backend == "slurm":
+            command_prepend = ["srun", "-n", str(self._cores)]
+        elif self._cores > 1 and self._backend == "flux":
+            command_prepend = ["flux", "run", "-n", str(self._cores)]
+        elif self._cores > 1:
+            raise ValueError("backend should be None, slurm or flux, not {}".format(self._backend))
+        else:
+            command_prepend = []
+        return command_prepend + command_lst
+
+    def shutdown(self, wait: bool = True):
+        """
+        Method to shutdown the subprocess interface.
+
+        Args:
+            wait (bool, optional): Whether to wait for the interface to shutdown. Defaults to True.
+        """
+        if self._process is not None:
+            terminate_with_pysqa(
+                queue_id=self._process,
+                config_directory=self._config_directory,
+                backend=self._backend,
+            )
+        self._process = None
+
+    def poll(self) -> bool:
+        """
+        Method to check if the subprocess interface is running.
+
+        Returns:
+            bool: True if the interface is running, False otherwise.
+        """
+        qa = QueueAdapter(
+            directory=self._config_directory,
+            queue_type=self._backend,
+            execute_command=pysqa_execute_command,
+        )
+        if self._process is not None:
+            return qa.get_status_of_job(process_id=self._process) in ["running", "pending"]
+        else:
+            return False

From 62c4c9173192e7c0835cfd4a02ecfe2d6e122e2e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= <janssen@mpie.de>
Date: Sun, 27 Jul 2025 21:36:31 +0200
Subject: [PATCH 07/83] transfer changes

---
 executorlib/standalone/slurm_command.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/executorlib/standalone/slurm_command.py b/executorlib/standalone/slurm_command.py
index 7b390b48..4816625c 100644
--- a/executorlib/standalone/slurm_command.py
+++ b/executorlib/standalone/slurm_command.py
@@ -13,6 +13,7 @@ def generate_slurm_command(
     exclusive: bool = False,
     openmpi_oversubscribe: bool = False,
     slurm_cmd_args: Optional[list[str]] = None,
+    pmi_mode: Optional[str] = None,
 ) -> list[str]:
     """
     Generate the command list for the SLURM interface.
@@ -26,6 +27,7 @@ def generate_slurm_command(
         exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults to False.
         openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
         slurm_cmd_args (list[str], optional): Additional command line arguments. Defaults to [].
+        pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None
 
     Returns:
         list[str]: The generated command list.
@@ -33,6 +35,8 @@ def generate_slurm_command(
     command_prepend_lst = [SLURM_COMMAND, "-n", str(cores)]
     if cwd is not None:
         command_prepend_lst += ["-D", cwd]
+    if pmi_mode is not None:
+        command_prepend_lst += ["--mpi=" + pmi_mode]
     if num_nodes is not None:
         command_prepend_lst += ["-N", str(num_nodes)]
     if threads_per_core > 1:
@@ -45,4 +49,4 @@ def generate_slurm_command(
         command_prepend_lst += ["--oversubscribe"]
     if slurm_cmd_args is not None and len(slurm_cmd_args) > 0:
         command_prepend_lst += slurm_cmd_args
-    return command_prepend_lst
+    return command_prepend_lst
\ No newline at end of file

From 778658569e7283c23433112a70cff0f08896bdce Mon Sep 17 00:00:00 2001
From: pyiron-runner <pyiron@mpie.de>
Date: Sun, 27 Jul 2025 21:15:05 +0000
Subject: [PATCH 08/83] Format black

---
 executorlib/standalone/slurm_command.py      | 2 +-
 executorlib/task_scheduler/worker/spawner.py | 9 +++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/executorlib/standalone/slurm_command.py b/executorlib/standalone/slurm_command.py
index 4816625c..3d9ca47d 100644
--- a/executorlib/standalone/slurm_command.py
+++ b/executorlib/standalone/slurm_command.py
@@ -49,4 +49,4 @@ def generate_slurm_command(
         command_prepend_lst += ["--oversubscribe"]
     if slurm_cmd_args is not None and len(slurm_cmd_args) > 0:
         command_prepend_lst += slurm_cmd_args
-    return command_prepend_lst
\ No newline at end of file
+    return command_prepend_lst
diff --git a/executorlib/task_scheduler/worker/spawner.py b/executorlib/task_scheduler/worker/spawner.py
index 269ab64d..30328141 100644
--- a/executorlib/task_scheduler/worker/spawner.py
+++ b/executorlib/task_scheduler/worker/spawner.py
@@ -76,7 +76,9 @@ def generate_command(self, command_lst: list[str]) -> list[str]:
         elif self._cores > 1 and self._backend == "flux":
             command_prepend = ["flux", "run", "-n", str(self._cores)]
         elif self._cores > 1:
-            raise ValueError("backend should be None, slurm or flux, not {}".format(self._backend))
+            raise ValueError(
+                "backend should be None, slurm or flux, not {}".format(self._backend)
+            )
         else:
             command_prepend = []
         return command_prepend + command_lst
@@ -109,6 +111,9 @@ def poll(self) -> bool:
             execute_command=pysqa_execute_command,
         )
         if self._process is not None:
-            return qa.get_status_of_job(process_id=self._process) in ["running", "pending"]
+            return qa.get_status_of_job(process_id=self._process) in [
+                "running",
+                "pending",
+            ]
         else:
             return False

From 36b0b47651a5d4ca22acf2bc49c1e15fecdef540 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 27 Jul 2025 21:16:01 +0000
Subject: [PATCH 09/83] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 executorlib/standalone/slurm_command.py      | 1 -
 executorlib/task_scheduler/worker/spawner.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/executorlib/standalone/slurm_command.py b/executorlib/standalone/slurm_command.py
index 3d9ca47d..f9f4f8db 100644
--- a/executorlib/standalone/slurm_command.py
+++ b/executorlib/standalone/slurm_command.py
@@ -1,6 +1,5 @@
 from typing import Optional
 
-
 SLURM_COMMAND = "srun"
 
 
diff --git a/executorlib/task_scheduler/worker/spawner.py b/executorlib/task_scheduler/worker/spawner.py
index 30328141..af615126 100644
--- a/executorlib/task_scheduler/worker/spawner.py
+++ b/executorlib/task_scheduler/worker/spawner.py
@@ -77,7 +77,7 @@ def generate_command(self, command_lst: list[str]) -> list[str]:
             command_prepend = ["flux", "run", "-n", str(self._cores)]
         elif self._cores > 1:
             raise ValueError(
-                "backend should be None, slurm or flux, not {}".format(self._backend)
+                f"backend should be None, slurm or flux, not {self._backend}"
             )
         else:
             command_prepend = []

From 1cc704414aad5b64be20d35a0240a58bc8b4f23c Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Tue, 19 Aug 2025 11:20:06 +0200
Subject: [PATCH 10/83] block_allocation

---
 executorlib/executor/flux.py                  | 58 +++++++++-----
 executorlib/executor/slurm.py                 | 59 +++++++++-----
 .../pysqaspawner.py}                          | 76 ++++++++++++++++++-
 3 files changed, 149 insertions(+), 44 deletions(-)
 rename executorlib/task_scheduler/{worker/spawner.py => interactive/pysqaspawner.py} (59%)

diff --git a/executorlib/executor/flux.py b/executorlib/executor/flux.py
index 864548d6..e9016576 100644
--- a/executorlib/executor/flux.py
+++ b/executorlib/executor/flux.py
@@ -357,28 +357,46 @@ def __init__(
         if not plot_dependency_graph:
             import pysqa  # noqa
 
-            from executorlib.task_scheduler.file.task_scheduler import (
-                create_file_executor,
-            )
+            if block_allocation:
+                from executorlib.task_scheduler.interactive.pysqaspawner import create_pysqa_block_allocation_scheduler
+                
+                super().__init__(
+                    executor=create_pysqa_block_allocation_scheduler(
+                        max_cores=max_cores,
+                        cache_directory=cache_directory,
+                        hostname_localhost=hostname_localhost,
+                        log_obj_size=log_obj_size,
+                        pmi_mode=pmi_mode,
+                        init_function=init_function,
+                        max_workers=max_workers,
+                        resource_dict=resource_dict,
+                        pysqa_config_directory=pysqa_config_directory,
+                        backend="flux",
+                    )
+                )
+            else:
+                from executorlib.task_scheduler.file.task_scheduler import (
+                    create_file_executor,
+                )
 
-            super().__init__(
-                executor=create_file_executor(
-                    max_workers=max_workers,
-                    backend="flux",
-                    max_cores=max_cores,
-                    cache_directory=cache_directory,
-                    resource_dict=resource_dict,
-                    flux_executor=None,
-                    pmi_mode=pmi_mode,
-                    flux_executor_nesting=False,
-                    flux_log_files=False,
-                    pysqa_config_directory=pysqa_config_directory,
-                    hostname_localhost=hostname_localhost,
-                    block_allocation=block_allocation,
-                    init_function=init_function,
-                    disable_dependencies=disable_dependencies,
+                super().__init__(
+                    executor=create_file_executor(
+                        max_workers=max_workers,
+                        backend="flux",
+                        max_cores=max_cores,
+                        cache_directory=cache_directory,
+                        resource_dict=resource_dict,
+                        flux_executor=None,
+                        pmi_mode=pmi_mode,
+                        flux_executor_nesting=False,
+                        flux_log_files=False,
+                        pysqa_config_directory=pysqa_config_directory,
+                        hostname_localhost=hostname_localhost,
+                        block_allocation=block_allocation,
+                        init_function=init_function,
+                        disable_dependencies=disable_dependencies,
+                    )
                 )
-            )
         else:
             super().__init__(
                 executor=DependencyTaskScheduler(
diff --git a/executorlib/executor/slurm.py b/executorlib/executor/slurm.py
index 3a4e202b..655a2d80 100644
--- a/executorlib/executor/slurm.py
+++ b/executorlib/executor/slurm.py
@@ -165,28 +165,47 @@ def __init__(
         if not plot_dependency_graph:
             import pysqa  # noqa
 
-            from executorlib.task_scheduler.file.task_scheduler import (
-                create_file_executor,
-            )
+            if block_allocation:
+                from executorlib.task_scheduler.interactive.pysqaspawner import create_pysqa_block_allocation_scheduler
+                
+                super().__init__(
+                    executor=create_pysqa_block_allocation_scheduler(
+                        max_cores=max_cores,
+                        cache_directory=cache_directory,
+                        hostname_localhost=hostname_localhost,
+                        log_obj_size=log_obj_size,
+                        pmi_mode=pmi_mode,
+                        init_function=init_function,
+                        max_workers=max_workers,
+                        resource_dict=resource_dict,
+                        pysqa_config_directory=pysqa_config_directory,
+                        backend="slurm",
+                    ),
+                )
 
-            super().__init__(
-                executor=create_file_executor(
-                    max_workers=max_workers,
-                    backend="slurm",
-                    max_cores=max_cores,
-                    cache_directory=cache_directory,
-                    resource_dict=resource_dict,
-                    pmi_mode=pmi_mode,
-                    flux_executor=None,
-                    flux_executor_nesting=False,
-                    flux_log_files=False,
-                    pysqa_config_directory=pysqa_config_directory,
-                    hostname_localhost=hostname_localhost,
-                    block_allocation=block_allocation,
-                    init_function=init_function,
-                    disable_dependencies=disable_dependencies,
+            else:
+                from executorlib.task_scheduler.file.task_scheduler import (
+                    create_file_executor,
+                )
+
+                super().__init__(
+                    executor=create_file_executor(
+                        max_workers=max_workers,
+                        backend="slurm",
+                        max_cores=max_cores,
+                        cache_directory=cache_directory,
+                        resource_dict=resource_dict,
+                        pmi_mode=pmi_mode,
+                        flux_executor=None,
+                        flux_executor_nesting=False,
+                        flux_log_files=False,
+                        pysqa_config_directory=pysqa_config_directory,
+                        hostname_localhost=hostname_localhost,
+                        block_allocation=block_allocation,
+                        init_function=init_function,
+                        disable_dependencies=disable_dependencies,
+                    )
                 )
-            )
         else:
             super().__init__(
                 executor=DependencyTaskScheduler(
diff --git a/executorlib/task_scheduler/worker/spawner.py b/executorlib/task_scheduler/interactive/pysqaspawner.py
similarity index 59%
rename from executorlib/task_scheduler/worker/spawner.py
rename to executorlib/task_scheduler/interactive/pysqaspawner.py
index af615126..0fbc321b 100644
--- a/executorlib/task_scheduler/worker/spawner.py
+++ b/executorlib/task_scheduler/interactive/pysqaspawner.py
@@ -1,9 +1,14 @@
-from typing import Optional
+from time import sleep
+from typing import Callable, Optional
 
 from pysqa import QueueAdapter
 
+from executorlib.standalone.inputcheck import validate_number_of_cores
 from executorlib.standalone.interactive.spawner import BaseSpawner
 from executorlib.standalone.scheduler import pysqa_execute_command, terminate_with_pysqa
+from executorlib.task_scheduler.interactive.blockallocation import (
+    BlockAllocationTaskScheduler,
+)
 
 
 class PysqaSpawner(BaseSpawner):
@@ -11,11 +16,15 @@ def __init__(
         self,
         cwd: Optional[str] = None,
         cores: int = 1,
-        openmpi_oversubscribe: bool = False,
         threads_per_core: int = 1,
+        gpus_per_core: int = 0,
+        num_nodes: Optional[int] = None,
+        exclusive: bool = False,
+        openmpi_oversubscribe: bool = False,
+        slurm_cmd_args: Optional[list[str]] = None,
+        pmi_mode: Optional[str] = None,
         config_directory: Optional[str] = None,
         backend: Optional[str] = None,
-        submission_kwargs: Optional[dict] = None,
     ):
         """
         Subprocess interface implementation.
@@ -33,9 +42,13 @@ def __init__(
         )
         self._process: Optional[int] = None
         self._threads_per_core = threads_per_core
+        self._gpus_per_core = gpus_per_core
+        self._num_nodes = num_nodes
+        self._exclusive = exclusive
+        self._slurm_cmd_args = slurm_cmd_args
+        self._pmi_mode = pmi_mode
         self._config_directory = config_directory
         self._backend = backend
-        self._submission_kwargs = submission_kwargs
 
     def bootup(
         self,
@@ -52,12 +65,30 @@ def bootup(
             queue_type=self._backend,
             execute_command=pysqa_execute_command,
         )
+        if self._gpus_per_core > 0:
+            raise ValueError()
+        if self._num_nodes is not None:
+            raise ValueError()
+        if self._exclusive:
+            raise ValueError()
+        if self._pmi_mode is not None:
+            raise ValueError()
         self._process = qa.submit_job(
             command=" ".join(self.generate_command(command_lst=command_lst)),
             working_directory=self._cwd,
             cores=self._cores,
             **self._submission_kwargs,
         )
+        while True:
+            status = qa.get_status_of_job(process_id=self._process)
+            if status in ["running", "pending"]:
+                break
+            elif status is None:
+                raise RuntimeError(
+                    f"Failed to start the process with command: {command_lst}"
+                )
+            else:
+                sleep(1)  # Wait for the process to start
 
     def generate_command(self, command_lst: list[str]) -> list[str]:
         """
@@ -117,3 +148,40 @@ def poll(self) -> bool:
             ]
         else:
             return False
+
+
+def create_pysqa_block_allocation_scheduler(
+    max_cores: Optional[int] = None,
+    cache_directory: Optional[str] = None,
+    hostname_localhost: Optional[bool] = None,
+    log_obj_size: bool = False,
+    pmi_mode: Optional[str] = None,
+    init_function: Optional[Callable] = None,
+    max_workers: Optional[int] = None,
+    resource_dict: Optional[dict] = None,
+    pysqa_config_directory: Optional[str] = None,
+    backend: Optional[str] = None,
+):
+    if backend is None:
+        raise ValueError("Backend must be either 'slurm' or 'flux'.")
+    if resource_dict is None:
+        resource_dict = {}
+    cores_per_worker = resource_dict.get("cores", 1)
+    resource_dict["cache_directory"] = cache_directory
+    resource_dict["hostname_localhost"] = hostname_localhost
+    resource_dict["log_obj_size"] = log_obj_size
+    resource_dict["pmi_mode"] = pmi_mode
+    resource_dict["init_function"] = init_function
+    resource_dict["config_directory"] = pysqa_config_directory
+    resource_dict["backend"] = backend
+    max_workers = validate_number_of_cores(
+        max_cores=max_cores,
+        max_workers=max_workers,
+        cores_per_worker=cores_per_worker,
+        set_local_cores=False,
+    )
+    return BlockAllocationTaskScheduler(
+        max_workers=max_workers,
+        executor_kwargs=resource_dict,
+        spawner=PysqaSpawner,
+    )

From 02f0ce788cee5efaf7869ee6fc4d5fb0375b139d Mon Sep 17 00:00:00 2001
From: pyiron-runner <pyiron@mpie.de>
Date: Tue, 19 Aug 2025 09:20:57 +0000
Subject: [PATCH 11/83] Format black

---
 executorlib/executor/flux.py  | 6 ++++--
 executorlib/executor/slurm.py | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/executorlib/executor/flux.py b/executorlib/executor/flux.py
index e9016576..1d844f28 100644
--- a/executorlib/executor/flux.py
+++ b/executorlib/executor/flux.py
@@ -358,8 +358,10 @@ def __init__(
             import pysqa  # noqa
 
             if block_allocation:
-                from executorlib.task_scheduler.interactive.pysqaspawner import create_pysqa_block_allocation_scheduler
-                
+                from executorlib.task_scheduler.interactive.pysqaspawner import (
+                    create_pysqa_block_allocation_scheduler,
+                )
+
                 super().__init__(
                     executor=create_pysqa_block_allocation_scheduler(
                         max_cores=max_cores,
diff --git a/executorlib/executor/slurm.py b/executorlib/executor/slurm.py
index 655a2d80..ed2410d3 100644
--- a/executorlib/executor/slurm.py
+++ b/executorlib/executor/slurm.py
@@ -166,8 +166,10 @@ def __init__(
             import pysqa  # noqa
 
             if block_allocation:
-                from executorlib.task_scheduler.interactive.pysqaspawner import create_pysqa_block_allocation_scheduler
-                
+                from executorlib.task_scheduler.interactive.pysqaspawner import (
+                    create_pysqa_block_allocation_scheduler,
+                )
+
                 super().__init__(
                     executor=create_pysqa_block_allocation_scheduler(
                         max_cores=max_cores,

From 2804562add35fa36919756028a364fc3766ede73 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Tue, 19 Aug 2025 11:23:28 +0200
Subject: [PATCH 12/83] fix type hint

---
 executorlib/task_scheduler/interactive/pysqaspawner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/executorlib/task_scheduler/interactive/pysqaspawner.py b/executorlib/task_scheduler/interactive/pysqaspawner.py
index 0fbc321b..f58b0975 100644
--- a/executorlib/task_scheduler/interactive/pysqaspawner.py
+++ b/executorlib/task_scheduler/interactive/pysqaspawner.py
@@ -77,7 +77,7 @@ def bootup(
             command=" ".join(self.generate_command(command_lst=command_lst)),
             working_directory=self._cwd,
             cores=self._cores,
-            **self._submission_kwargs,
+            **self._slurm_cmd_args,
         )
         while True:
             status = qa.get_status_of_job(process_id=self._process)

From 6fb86f7b8591e523c55822a8b32c7a9d6addc37d Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Tue, 19 Aug 2025 11:52:33 +0200
Subject: [PATCH 13/83] implement additional options for SLURM

---
 .../interactive/pysqaspawner.py               | 40 ++++++++++++-------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/pysqaspawner.py b/executorlib/task_scheduler/interactive/pysqaspawner.py
index f58b0975..afedada7 100644
--- a/executorlib/task_scheduler/interactive/pysqaspawner.py
+++ b/executorlib/task_scheduler/interactive/pysqaspawner.py
@@ -65,18 +65,10 @@ def bootup(
             queue_type=self._backend,
             execute_command=pysqa_execute_command,
         )
-        if self._gpus_per_core > 0:
-            raise ValueError()
-        if self._num_nodes is not None:
-            raise ValueError()
-        if self._exclusive:
-            raise ValueError()
-        if self._pmi_mode is not None:
-            raise ValueError()
         self._process = qa.submit_job(
             command=" ".join(self.generate_command(command_lst=command_lst)),
             working_directory=self._cwd,
-            cores=self._cores,
+            cores=int(self._cores * self._threads_per_core),
             **self._slurm_cmd_args,
         )
         while True:
@@ -100,12 +92,34 @@ def generate_command(self, command_lst: list[str]) -> list[str]:
         Returns:
             list[str]: The generated command list.
         """
-        if self._cores > 1 and self._backend is None:
-            command_prepend = ["mpiexec", "-n", str(self._cores)]
-        elif self._cores > 1 and self._backend == "slurm":
+        if self._cores > 1 and self._backend == "slurm":
             command_prepend = ["srun", "-n", str(self._cores)]
+            if self._pmi_mode is not None:
+                command_prepend += ["--mpi=" + self._pmi_mode]
+            if self._num_nodes is not None:
+                command_prepend_lst += ["-N", str(self._num_nodes)]
+            if self._threads_per_core > 1:
+                command_prepend_lst += ["--cpus-per-task=" + str(self._threads_per_core)]
+            if self._gpus_per_core > 0:
+                command_prepend_lst += ["--gpus-per-task=" + str(self._gpus_per_core)]
+            if self._exclusive:
+                command_prepend_lst += ["--exact"]
+            if self._openmpi_oversubscribe:
+                command_prepend_lst += ["--oversubscribe"]
         elif self._cores > 1 and self._backend == "flux":
             command_prepend = ["flux", "run", "-n", str(self._cores)]
+            if self._pmi_mode is not None:
+                command_prepend += ["-o", "pmi=" + self._pmi_mode]
+            if self._num_nodes is not None:
+                raise ValueError()
+            if self._threads_per_core > 1:
+               raise ValueError()
+            if self._gpus_per_core > 0:
+                raise ValueError()
+            if self._exclusive:
+                raise ValueError()
+            if self._openmpi_oversubscribe:
+                raise ValueError()
         elif self._cores > 1:
             raise ValueError(
                 f"backend should be None, slurm or flux, not {self._backend}"
@@ -162,8 +176,6 @@ def create_pysqa_block_allocation_scheduler(
     pysqa_config_directory: Optional[str] = None,
     backend: Optional[str] = None,
 ):
-    if backend is None:
-        raise ValueError("Backend must be either 'slurm' or 'flux'.")
     if resource_dict is None:
         resource_dict = {}
     cores_per_worker = resource_dict.get("cores", 1)

From ff10b0d67f0c0ea4aeacf43900074df6d116f0f0 Mon Sep 17 00:00:00 2001
From: pyiron-runner <pyiron@mpie.de>
Date: Tue, 19 Aug 2025 09:53:15 +0000
Subject: [PATCH 14/83] Format black

---
 executorlib/task_scheduler/interactive/pysqaspawner.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/pysqaspawner.py b/executorlib/task_scheduler/interactive/pysqaspawner.py
index afedada7..459e2132 100644
--- a/executorlib/task_scheduler/interactive/pysqaspawner.py
+++ b/executorlib/task_scheduler/interactive/pysqaspawner.py
@@ -99,7 +99,9 @@ def generate_command(self, command_lst: list[str]) -> list[str]:
             if self._num_nodes is not None:
                 command_prepend_lst += ["-N", str(self._num_nodes)]
             if self._threads_per_core > 1:
-                command_prepend_lst += ["--cpus-per-task=" + str(self._threads_per_core)]
+                command_prepend_lst += [
+                    "--cpus-per-task=" + str(self._threads_per_core)
+                ]
             if self._gpus_per_core > 0:
                 command_prepend_lst += ["--gpus-per-task=" + str(self._gpus_per_core)]
             if self._exclusive:
@@ -113,7 +115,7 @@ def generate_command(self, command_lst: list[str]) -> list[str]:
             if self._num_nodes is not None:
                 raise ValueError()
             if self._threads_per_core > 1:
-               raise ValueError()
+                raise ValueError()
             if self._gpus_per_core > 0:
                 raise ValueError()
             if self._exclusive:

From 38e022078f0714796fba78ac66fbe92ef84c17f6 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Tue, 19 Aug 2025 12:07:14 +0200
Subject: [PATCH 15/83] fixes

---
 executorlib/task_scheduler/interactive/pysqaspawner.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/pysqaspawner.py b/executorlib/task_scheduler/interactive/pysqaspawner.py
index 459e2132..8784686a 100644
--- a/executorlib/task_scheduler/interactive/pysqaspawner.py
+++ b/executorlib/task_scheduler/interactive/pysqaspawner.py
@@ -97,17 +97,17 @@ def generate_command(self, command_lst: list[str]) -> list[str]:
             if self._pmi_mode is not None:
                 command_prepend += ["--mpi=" + self._pmi_mode]
             if self._num_nodes is not None:
-                command_prepend_lst += ["-N", str(self._num_nodes)]
+                command_prepend += ["-N", str(self._num_nodes)]
             if self._threads_per_core > 1:
-                command_prepend_lst += [
+                command_prepend += [
                     "--cpus-per-task=" + str(self._threads_per_core)
                 ]
             if self._gpus_per_core > 0:
-                command_prepend_lst += ["--gpus-per-task=" + str(self._gpus_per_core)]
+                command_prepend += ["--gpus-per-task=" + str(self._gpus_per_core)]
             if self._exclusive:
-                command_prepend_lst += ["--exact"]
+                command_prepend += ["--exact"]
             if self._openmpi_oversubscribe:
-                command_prepend_lst += ["--oversubscribe"]
+                command_prepend += ["--oversubscribe"]
         elif self._cores > 1 and self._backend == "flux":
             command_prepend = ["flux", "run", "-n", str(self._cores)]
             if self._pmi_mode is not None:

From 713887841cdffe537af659ccde622039a3c755ff Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 19 Aug 2025 10:07:24 +0000
Subject: [PATCH 16/83] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 executorlib/task_scheduler/interactive/pysqaspawner.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/pysqaspawner.py b/executorlib/task_scheduler/interactive/pysqaspawner.py
index 8784686a..9fa06e0e 100644
--- a/executorlib/task_scheduler/interactive/pysqaspawner.py
+++ b/executorlib/task_scheduler/interactive/pysqaspawner.py
@@ -99,9 +99,7 @@ def generate_command(self, command_lst: list[str]) -> list[str]:
             if self._num_nodes is not None:
                 command_prepend += ["-N", str(self._num_nodes)]
             if self._threads_per_core > 1:
-                command_prepend += [
-                    "--cpus-per-task=" + str(self._threads_per_core)
-                ]
+                command_prepend += ["--cpus-per-task=" + str(self._threads_per_core)]
             if self._gpus_per_core > 0:
                 command_prepend += ["--gpus-per-task=" + str(self._gpus_per_core)]
             if self._exclusive:

From a9c4c687fdab55026692cea29ecc8220dcaa050f Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Tue, 19 Aug 2025 17:40:52 +0200
Subject: [PATCH 17/83] add test for flux block allocation

---
 tests/test_fluxclusterexecutor.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tests/test_fluxclusterexecutor.py b/tests/test_fluxclusterexecutor.py
index 0968fabb..fc2c1bc9 100644
--- a/tests/test_fluxclusterexecutor.py
+++ b/tests/test_fluxclusterexecutor.py
@@ -51,6 +51,20 @@ def test_executor(self):
             self.assertEqual(len(os.listdir("executorlib_cache")), 4)
             self.assertTrue(fs1.done())
 
+    def test_executor_blockallocation(self):
+        with FluxClusterExecutor(
+            resource_dict={"cores": 2, "cwd": "executorlib_cache"},
+            block_allocation=True,
+            cache_directory="executorlib_cache",
+            pmi_mode=pmi,
+        ) as exe:
+            cloudpickle_register(ind=1)
+            fs1 = exe.submit(mpi_funct, 1)
+            self.assertFalse(fs1.done())
+            self.assertEqual(fs1.result(), [(1, 2, 0), (1, 2, 1)])
+            self.assertEqual(len(os.listdir("executorlib_cache")), 4)
+            self.assertTrue(fs1.done())
+
     def test_executor_no_cwd(self):
         with FluxClusterExecutor(
             resource_dict={"cores": 2},

From 0e60b287749174a6cab94c2ee72bf17d72eb91f8 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Tue, 19 Aug 2025 17:47:00 +0200
Subject: [PATCH 18/83] fixes

---
 executorlib/task_scheduler/worker/__init__.py | 0
 tests/test_fluxclusterexecutor.py             | 1 +
 2 files changed, 1 insertion(+)
 delete mode 100644 executorlib/task_scheduler/worker/__init__.py

diff --git a/executorlib/task_scheduler/worker/__init__.py b/executorlib/task_scheduler/worker/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/test_fluxclusterexecutor.py b/tests/test_fluxclusterexecutor.py
index fc2c1bc9..9bcc1f5a 100644
--- a/tests/test_fluxclusterexecutor.py
+++ b/tests/test_fluxclusterexecutor.py
@@ -57,6 +57,7 @@ def test_executor_blockallocation(self):
             block_allocation=True,
             cache_directory="executorlib_cache",
             pmi_mode=pmi,
+            max_workers=2,
         ) as exe:
             cloudpickle_register(ind=1)
             fs1 = exe.submit(mpi_funct, 1)

From faf4c50691651a8612d9756070c2738514a0d28d Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Tue, 19 Aug 2025 17:55:58 +0200
Subject: [PATCH 19/83] more fixes

---
 executorlib/task_scheduler/interactive/pysqaspawner.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/executorlib/task_scheduler/interactive/pysqaspawner.py b/executorlib/task_scheduler/interactive/pysqaspawner.py
index 9fa06e0e..2e3e52d2 100644
--- a/executorlib/task_scheduler/interactive/pysqaspawner.py
+++ b/executorlib/task_scheduler/interactive/pysqaspawner.py
@@ -25,6 +25,7 @@ def __init__(
         pmi_mode: Optional[str] = None,
         config_directory: Optional[str] = None,
         backend: Optional[str] = None,
+        **kwargs,
     ):
         """
         Subprocess interface implementation.
@@ -49,6 +50,7 @@ def __init__(
         self._pmi_mode = pmi_mode
         self._config_directory = config_directory
         self._backend = backend
+        self._pysqa_submission_kwargs = kwargs
 
     def bootup(
         self,
@@ -69,7 +71,7 @@ def bootup(
             command=" ".join(self.generate_command(command_lst=command_lst)),
             working_directory=self._cwd,
             cores=int(self._cores * self._threads_per_core),
-            **self._slurm_cmd_args,
+            **self._pysqa_submission_kwargs,
         )
         while True:
             status = qa.get_status_of_job(process_id=self._process)
@@ -106,6 +108,8 @@ def generate_command(self, command_lst: list[str]) -> list[str]:
                 command_prepend += ["--exact"]
             if self._openmpi_oversubscribe:
                 command_prepend += ["--oversubscribe"]
+            if self._slurm_cmd_args is not None and len(self._slurm_cmd_args) > 0:
+                command_prepend += self._slurm_cmd_args
         elif self._cores > 1 and self._backend == "flux":
             command_prepend = ["flux", "run", "-n", str(self._cores)]
             if self._pmi_mode is not None:

From 4bd0001bbc687e6ba2b7276a6970b0221a2b268f Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Tue, 19 Aug 2025 21:10:19 +0200
Subject: [PATCH 20/83] fixes

---
 executorlib/task_scheduler/interactive/pysqaspawner.py | 4 +++-
 tests/test_fluxclusterexecutor.py                      | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/pysqaspawner.py b/executorlib/task_scheduler/interactive/pysqaspawner.py
index 2e3e52d2..e724b111 100644
--- a/executorlib/task_scheduler/interactive/pysqaspawner.py
+++ b/executorlib/task_scheduler/interactive/pysqaspawner.py
@@ -1,3 +1,4 @@
+import os
 from time import sleep
 from typing import Callable, Optional
 
@@ -183,7 +184,8 @@ def create_pysqa_block_allocation_scheduler(
     if resource_dict is None:
         resource_dict = {}
     cores_per_worker = resource_dict.get("cores", 1)
-    resource_dict["cache_directory"] = cache_directory
+    resource_dict["cwd"] = os.path.abspath(resource_dict["cwd"])
+    resource_dict["cache_directory"] = os.path.abspath(cache_directory)
     resource_dict["hostname_localhost"] = hostname_localhost
     resource_dict["log_obj_size"] = log_obj_size
     resource_dict["pmi_mode"] = pmi_mode
diff --git a/tests/test_fluxclusterexecutor.py b/tests/test_fluxclusterexecutor.py
index 9bcc1f5a..0231a14e 100644
--- a/tests/test_fluxclusterexecutor.py
+++ b/tests/test_fluxclusterexecutor.py
@@ -57,7 +57,7 @@ def test_executor_blockallocation(self):
             block_allocation=True,
             cache_directory="executorlib_cache",
             pmi_mode=pmi,
-            max_workers=2,
+            max_workers=1,
         ) as exe:
             cloudpickle_register(ind=1)
             fs1 = exe.submit(mpi_funct, 1)

From cf1cfe9899766a8f4aad70bf91a4ccfb59532c20 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Tue, 19 Aug 2025 21:26:23 +0200
Subject: [PATCH 21/83] handle different types

---
 executorlib/task_scheduler/interactive/pysqaspawner.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/pysqaspawner.py b/executorlib/task_scheduler/interactive/pysqaspawner.py
index e724b111..a4225301 100644
--- a/executorlib/task_scheduler/interactive/pysqaspawner.py
+++ b/executorlib/task_scheduler/interactive/pysqaspawner.py
@@ -184,8 +184,12 @@ def create_pysqa_block_allocation_scheduler(
     if resource_dict is None:
         resource_dict = {}
     cores_per_worker = resource_dict.get("cores", 1)
-    resource_dict["cwd"] = os.path.abspath(resource_dict["cwd"])
-    resource_dict["cache_directory"] = os.path.abspath(cache_directory)
+    if "cwd" in resource_dict:
+        resource_dict["cwd"] = os.path.abspath(resource_dict["cwd"])
+    if cache_directory is None:
+        resource_dict["cache_directory"] = os.path.abspath(cache_directory)
+    else:
+        resource_dict["cache_directory"] = None
     resource_dict["hostname_localhost"] = hostname_localhost
     resource_dict["log_obj_size"] = log_obj_size
     resource_dict["pmi_mode"] = pmi_mode

From 3887d1616624f7c4262d7ec41ac184531f8669a0 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Tue, 19 Aug 2025 23:09:05 +0200
Subject: [PATCH 22/83] fixes

---
 executorlib/task_scheduler/interactive/pysqaspawner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/executorlib/task_scheduler/interactive/pysqaspawner.py b/executorlib/task_scheduler/interactive/pysqaspawner.py
index a4225301..9a36fede 100644
--- a/executorlib/task_scheduler/interactive/pysqaspawner.py
+++ b/executorlib/task_scheduler/interactive/pysqaspawner.py
@@ -186,7 +186,7 @@ def create_pysqa_block_allocation_scheduler(
     cores_per_worker = resource_dict.get("cores", 1)
     if "cwd" in resource_dict:
         resource_dict["cwd"] = os.path.abspath(resource_dict["cwd"])
-    if cache_directory is None:
+    if cache_directory is not None:
         resource_dict["cache_directory"] = os.path.abspath(cache_directory)
     else:
         resource_dict["cache_directory"] = None

From b3ab3a25470888cdec714962f6acca7370a56d65 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Sun, 24 Aug 2025 17:10:12 +0200
Subject: [PATCH 23/83] Add print commands

---
 executorlib/standalone/scheduler.py                   |  2 ++
 .../task_scheduler/interactive/blockallocation.py     |  5 +++++
 .../task_scheduler/interactive/pysqaspawner.py        | 11 +++++------
 executorlib/task_scheduler/interactive/shared.py      |  4 ++++
 4 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/executorlib/standalone/scheduler.py b/executorlib/standalone/scheduler.py
index bc68187b..ce36a15e 100644
--- a/executorlib/standalone/scheduler.py
+++ b/executorlib/standalone/scheduler.py
@@ -23,6 +23,7 @@ def terminate_with_pysqa(
         queue_type=backend,
         execute_command=pysqa_execute_command,
     )
+    print(qa.get_queue_status())
     status = qa.get_status_of_job(process_id=queue_id)
     if status is not None and status not in ["finished", "error"]:
         with contextlib.suppress(subprocess.CalledProcessError):
@@ -52,6 +53,7 @@ def pysqa_execute_command(
     """
     if shell and isinstance(commands, list):
         commands = " ".join(commands)
+    print(commands, working_directory)
     out = subprocess.check_output(
         commands,
         cwd=working_directory,
diff --git a/executorlib/task_scheduler/interactive/blockallocation.py b/executorlib/task_scheduler/interactive/blockallocation.py
index 96cec2c1..a6e0ae7a 100644
--- a/executorlib/task_scheduler/interactive/blockallocation.py
+++ b/executorlib/task_scheduler/interactive/blockallocation.py
@@ -156,14 +156,19 @@ def shutdown(self, wait: bool = True, *, cancel_futures: bool = False):
             if cancel_futures:
                 cancel_items_in_queue(que=self._future_queue)
             if isinstance(self._process, list):
+                print(len(self._process), wait)
                 for _ in range(len(self._process)):
                     self._future_queue.put({"shutdown": True, "wait": wait})
+                print("after submission", wait)
                 if wait:
                     for process in self._process:
+                        print("join")
                         process.join()
+                    print("join done")
                     self._future_queue.join()
         self._process = None
         self._future_queue = None
+        print("block shutdown done")
 
     def _set_process(self, process: list[Thread]):  # type: ignore
         """
diff --git a/executorlib/task_scheduler/interactive/pysqaspawner.py b/executorlib/task_scheduler/interactive/pysqaspawner.py
index 9a36fede..8ff95e0d 100644
--- a/executorlib/task_scheduler/interactive/pysqaspawner.py
+++ b/executorlib/task_scheduler/interactive/pysqaspawner.py
@@ -68,12 +68,14 @@ def bootup(
             queue_type=self._backend,
             execute_command=pysqa_execute_command,
         )
+        print(self._process, self)
         self._process = qa.submit_job(
             command=" ".join(self.generate_command(command_lst=command_lst)),
             working_directory=self._cwd,
             cores=int(self._cores * self._threads_per_core),
             **self._pysqa_submission_kwargs,
         )
+        print(self._process, self)
         while True:
             status = qa.get_status_of_job(process_id=self._process)
             if status in ["running", "pending"]:
@@ -147,6 +149,7 @@ def shutdown(self, wait: bool = True):
                 backend=self._backend,
             )
         self._process = None
+        print("terminate done")
 
     def poll(self) -> bool:
         """
@@ -184,12 +187,8 @@ def create_pysqa_block_allocation_scheduler(
     if resource_dict is None:
         resource_dict = {}
     cores_per_worker = resource_dict.get("cores", 1)
-    if "cwd" in resource_dict:
-        resource_dict["cwd"] = os.path.abspath(resource_dict["cwd"])
-    if cache_directory is not None:
-        resource_dict["cache_directory"] = os.path.abspath(cache_directory)
-    else:
-        resource_dict["cache_directory"] = None
+    resource_dict["cwd"] = os.path.abspath(resource_dict["cwd"])
+    resource_dict["cache_directory"] = os.path.abspath(cache_directory)
     resource_dict["hostname_localhost"] = hostname_localhost
     resource_dict["log_obj_size"] = log_obj_size
     resource_dict["pmi_mode"] = pmi_mode
diff --git a/executorlib/task_scheduler/interactive/shared.py b/executorlib/task_scheduler/interactive/shared.py
index 02162308..baf754a7 100644
--- a/executorlib/task_scheduler/interactive/shared.py
+++ b/executorlib/task_scheduler/interactive/shared.py
@@ -68,10 +68,14 @@ def execute_tasks(
     while True:
         task_dict = future_queue.get()
         if "shutdown" in task_dict and task_dict["shutdown"]:
+            print("before shutdown", interface, interface._process, interface._spawner, interface._spawner._process)
             interface.shutdown(wait=task_dict["wait"])
+            print("before done")
             _task_done(future_queue=future_queue)
+            print("before join", queue_join_on_shutdown)
             if queue_join_on_shutdown:
                 future_queue.join()
+            print("break")
             break
         elif "fn" in task_dict and "future" in task_dict:
             if error_log_file is not None:

From 3936620830897cf26402d8d5cd3f7631e7c57f1c Mon Sep 17 00:00:00 2001
From: pyiron-runner <pyiron@mpie.de>
Date: Sun, 24 Aug 2025 15:10:48 +0000
Subject: [PATCH 24/83] Format black

---
 executorlib/task_scheduler/interactive/shared.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/executorlib/task_scheduler/interactive/shared.py b/executorlib/task_scheduler/interactive/shared.py
index baf754a7..eebcc4e9 100644
--- a/executorlib/task_scheduler/interactive/shared.py
+++ b/executorlib/task_scheduler/interactive/shared.py
@@ -68,7 +68,13 @@ def execute_tasks(
     while True:
         task_dict = future_queue.get()
         if "shutdown" in task_dict and task_dict["shutdown"]:
-            print("before shutdown", interface, interface._process, interface._spawner, interface._spawner._process)
+            print(
+                "before shutdown",
+                interface,
+                interface._process,
+                interface._spawner,
+                interface._spawner._process,
+            )
             interface.shutdown(wait=task_dict["wait"])
             print("before done")
             _task_done(future_queue=future_queue)

From a9f4eea39668622e1788cb7f33ed0c7d8525784c Mon Sep 17 00:00:00 2001
From: jan-janssen <jan.janssen@outlook.com>
Date: Sun, 24 Aug 2025 18:26:35 +0200
Subject: [PATCH 25/83] hash for worker directory

---
 executorlib/standalone/scheduler.py                    |  2 --
 .../task_scheduler/interactive/blockallocation.py      |  5 -----
 executorlib/task_scheduler/interactive/pysqaspawner.py |  7 +++----
 executorlib/task_scheduler/interactive/shared.py       | 10 ----------
 4 files changed, 3 insertions(+), 21 deletions(-)

diff --git a/executorlib/standalone/scheduler.py b/executorlib/standalone/scheduler.py
index ce36a15e..bc68187b 100644
--- a/executorlib/standalone/scheduler.py
+++ b/executorlib/standalone/scheduler.py
@@ -23,7 +23,6 @@ def terminate_with_pysqa(
         queue_type=backend,
         execute_command=pysqa_execute_command,
     )
-    print(qa.get_queue_status())
     status = qa.get_status_of_job(process_id=queue_id)
     if status is not None and status not in ["finished", "error"]:
         with contextlib.suppress(subprocess.CalledProcessError):
@@ -53,7 +52,6 @@ def pysqa_execute_command(
     """
     if shell and isinstance(commands, list):
         commands = " ".join(commands)
-    print(commands, working_directory)
     out = subprocess.check_output(
         commands,
         cwd=working_directory,
diff --git a/executorlib/task_scheduler/interactive/blockallocation.py b/executorlib/task_scheduler/interactive/blockallocation.py
index a6e0ae7a..96cec2c1 100644
--- a/executorlib/task_scheduler/interactive/blockallocation.py
+++ b/executorlib/task_scheduler/interactive/blockallocation.py
@@ -156,19 +156,14 @@ def shutdown(self, wait: bool = True, *, cancel_futures: bool = False):
             if cancel_futures:
                 cancel_items_in_queue(que=self._future_queue)
             if isinstance(self._process, list):
-                print(len(self._process), wait)
                 for _ in range(len(self._process)):
                     self._future_queue.put({"shutdown": True, "wait": wait})
-                print("after submission", wait)
                 if wait:
                     for process in self._process:
-                        print("join")
                         process.join()
-                    print("join done")
                     self._future_queue.join()
         self._process = None
         self._future_queue = None
-        print("block shutdown done")
 
     def _set_process(self, process: list[Thread]):  # type: ignore
         """
diff --git a/executorlib/task_scheduler/interactive/pysqaspawner.py b/executorlib/task_scheduler/interactive/pysqaspawner.py
index 8ff95e0d..a2caa965 100644
--- a/executorlib/task_scheduler/interactive/pysqaspawner.py
+++ b/executorlib/task_scheduler/interactive/pysqaspawner.py
@@ -1,4 +1,5 @@
 import os
+import hashlib
 from time import sleep
 from typing import Callable, Optional
 
@@ -68,14 +69,13 @@ def bootup(
             queue_type=self._backend,
             execute_command=pysqa_execute_command,
         )
-        print(self._process, self)
+        hash = hashlib.md5(str(self).encode()).hexdigest()
         self._process = qa.submit_job(
             command=" ".join(self.generate_command(command_lst=command_lst)),
-            working_directory=self._cwd,
+            working_directory=os.path.join(self._cwd, hash),
             cores=int(self._cores * self._threads_per_core),
             **self._pysqa_submission_kwargs,
         )
-        print(self._process, self)
         while True:
             status = qa.get_status_of_job(process_id=self._process)
             if status in ["running", "pending"]:
@@ -149,7 +149,6 @@ def shutdown(self, wait: bool = True):
                 backend=self._backend,
             )
         self._process = None
-        print("terminate done")
 
     def poll(self) -> bool:
         """
diff --git a/executorlib/task_scheduler/interactive/shared.py b/executorlib/task_scheduler/interactive/shared.py
index eebcc4e9..02162308 100644
--- a/executorlib/task_scheduler/interactive/shared.py
+++ b/executorlib/task_scheduler/interactive/shared.py
@@ -68,20 +68,10 @@ def execute_tasks(
     while True:
         task_dict = future_queue.get()
         if "shutdown" in task_dict and task_dict["shutdown"]:
-            print(
-                "before shutdown",
-                interface,
-                interface._process,
-                interface._spawner,
-                interface._spawner._process,
-            )
             interface.shutdown(wait=task_dict["wait"])
-            print("before done")
             _task_done(future_queue=future_queue)
-            print("before join", queue_join_on_shutdown)
             if queue_join_on_shutdown:
                 future_queue.join()
-            print("break")
             break
         elif "fn" in task_dict and "future" in task_dict:
             if error_log_file is not None:

From 6fb2decc7b12e096ab3effb72132e9450b645d60 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 24 Aug 2025 16:26:44 +0000
Subject: [PATCH 26/83] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 executorlib/task_scheduler/interactive/pysqaspawner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/executorlib/task_scheduler/interactive/pysqaspawner.py b/executorlib/task_scheduler/interactive/pysqaspawner.py
index a2caa965..62846957 100644
--- a/executorlib/task_scheduler/interactive/pysqaspawner.py
+++ b/executorlib/task_scheduler/interactive/pysqaspawner.py
@@ -1,5 +1,5 @@
-import os
 import hashlib
+import os
 from time import sleep
 from typing import Callable, Optional
 

From ef7f5bf540bce7574ed819da44a8184d619ff607 Mon Sep 17 00:00:00 2001
From: Jan Janssen <jan-janssen@users.noreply.github.com>
Date: Sun, 24 Aug 2025 18:51:38 +0200
Subject: [PATCH 27/83] Update test_fluxclusterexecutor.py

---
 tests/test_fluxclusterexecutor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_fluxclusterexecutor.py b/tests/test_fluxclusterexecutor.py
index 0231a14e..654a719a 100644
--- a/tests/test_fluxclusterexecutor.py
+++ b/tests/test_fluxclusterexecutor.py
@@ -63,7 +63,7 @@ def test_executor_blockallocation(self):
             fs1 = exe.submit(mpi_funct, 1)
             self.assertFalse(fs1.done())
             self.assertEqual(fs1.result(), [(1, 2, 0), (1, 2, 1)])
-            self.assertEqual(len(os.listdir("executorlib_cache")), 4)
+            self.assertEqual(len(os.listdir("executorlib_cache")), 2)
             self.assertTrue(fs1.done())
 
     def test_executor_no_cwd(self):

From 199d3d834c269f331f4abef4a712d77537bba1f0 Mon Sep 17 00:00:00 2001
From: jan-janssen <jan.janssen@outlook.com>
Date: Sun, 24 Aug 2025 20:13:12 +0200
Subject: [PATCH 28/83] fixes

---
 .../interactive/pysqaspawner.py               | 11 +++++++--
 tests/test_fluxclusterexecutor.py             | 24 +++++++++++++++++++
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/pysqaspawner.py b/executorlib/task_scheduler/interactive/pysqaspawner.py
index 62846957..83daf90c 100644
--- a/executorlib/task_scheduler/interactive/pysqaspawner.py
+++ b/executorlib/task_scheduler/interactive/pysqaspawner.py
@@ -70,9 +70,13 @@ def bootup(
             execute_command=pysqa_execute_command,
         )
         hash = hashlib.md5(str(self).encode()).hexdigest()
+        if self._cwd is not None:
+            working_directory = os.path.join(self._cwd, hash)
+        else:
+            working_directory = os.path.abspath(hash)
         self._process = qa.submit_job(
             command=" ".join(self.generate_command(command_lst=command_lst)),
-            working_directory=os.path.join(self._cwd, hash),
+            working_directory=working_directory,
             cores=int(self._cores * self._threads_per_core),
             **self._pysqa_submission_kwargs,
         )
@@ -187,7 +191,10 @@ def create_pysqa_block_allocation_scheduler(
         resource_dict = {}
     cores_per_worker = resource_dict.get("cores", 1)
     resource_dict["cwd"] = os.path.abspath(resource_dict["cwd"])
-    resource_dict["cache_directory"] = os.path.abspath(cache_directory)
+    if cache_directory is not None:
+        resource_dict["cache_directory"] = os.path.abspath(cache_directory)
+    else:
+        resource_dict["cache_directory"] = os.path.abspath(".")
     resource_dict["hostname_localhost"] = hostname_localhost
     resource_dict["log_obj_size"] = log_obj_size
     resource_dict["pmi_mode"] = pmi_mode
diff --git a/tests/test_fluxclusterexecutor.py b/tests/test_fluxclusterexecutor.py
index 654a719a..9b6c36d0 100644
--- a/tests/test_fluxclusterexecutor.py
+++ b/tests/test_fluxclusterexecutor.py
@@ -24,6 +24,11 @@
 skip_mpi4py_test = importlib.util.find_spec("mpi4py") is None
 
 
+def echo(i):
+    sleep(1)
+    return i
+
+
 def mpi_funct(i):
     from mpi4py import MPI
 
@@ -66,6 +71,25 @@ def test_executor_blockallocation(self):
             self.assertEqual(len(os.listdir("executorlib_cache")), 2)
             self.assertTrue(fs1.done())
 
+    def test_executor_blockallocation_echo(self):
+        with FluxClusterExecutor(
+            resource_dict={"cores": 1, "cwd": "executorlib_cache"},
+            block_allocation=True,
+            cache_directory="executorlib_cache",
+            pmi_mode=pmi,
+            max_workers=2,
+        ) as exe:
+            cloudpickle_register(ind=1)
+            fs1 = exe.submit(echo, 1)
+            fs2 = exe.submit(echo, 2)
+            self.assertFalse(fs1.done())
+            self.assertFalse(fs2.done())
+            self.assertEqual(fs1.result(), 1)
+            self.assertEqual(fs2.result(), 2)
+            self.assertEqual(len(os.listdir("executorlib_cache")), 2)
+            self.assertTrue(fs1.done())
+            self.assertTrue(fs2.done())
+
     def test_executor_no_cwd(self):
         with FluxClusterExecutor(
             resource_dict={"cores": 2},

From 18e2b016be8b5f6a2a92a83a63ef0032cba58e8d Mon Sep 17 00:00:00 2001
From: jan-janssen <jan.janssen@outlook.com>
Date: Sun, 24 Aug 2025 20:15:45 +0200
Subject: [PATCH 29/83] fix test

---
 tests/test_fluxclusterexecutor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_fluxclusterexecutor.py b/tests/test_fluxclusterexecutor.py
index 9b6c36d0..6a40bed4 100644
--- a/tests/test_fluxclusterexecutor.py
+++ b/tests/test_fluxclusterexecutor.py
@@ -86,7 +86,7 @@ def test_executor_blockallocation_echo(self):
             self.assertFalse(fs2.done())
             self.assertEqual(fs1.result(), 1)
             self.assertEqual(fs2.result(), 2)
-            self.assertEqual(len(os.listdir("executorlib_cache")), 2)
+            self.assertEqual(len(os.listdir("executorlib_cache")), 4)
             self.assertTrue(fs1.done())
             self.assertTrue(fs2.done())
 

From 15b69d24b709b49576ef36fe221086d003444b2b Mon Sep 17 00:00:00 2001
From: jan-janssen <jan.janssen@outlook.com>
Date: Fri, 29 Aug 2025 13:21:38 +0200
Subject: [PATCH 30/83] only receive jobs when worker is running

---
 executorlib/task_scheduler/interactive/pysqaspawner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/executorlib/task_scheduler/interactive/pysqaspawner.py b/executorlib/task_scheduler/interactive/pysqaspawner.py
index 83daf90c..edcb289f 100644
--- a/executorlib/task_scheduler/interactive/pysqaspawner.py
+++ b/executorlib/task_scheduler/interactive/pysqaspawner.py
@@ -82,7 +82,7 @@ def bootup(
         )
         while True:
             status = qa.get_status_of_job(process_id=self._process)
-            if status in ["running", "pending"]:
+            if status == "running":
                 break
             elif status is None:
                 raise RuntimeError(

From fc7a3825783e1b7fdd9f3df3f40e4291fa497d29 Mon Sep 17 00:00:00 2001
From: jan-janssen <jan.janssen@outlook.com>
Date: Fri, 29 Aug 2025 13:56:58 +0200
Subject: [PATCH 31/83] fix job resubmission

---
 .../interactive/pysqaspawner.py               | 30 +++++++++++--------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/pysqaspawner.py b/executorlib/task_scheduler/interactive/pysqaspawner.py
index edcb289f..8cc3249b 100644
--- a/executorlib/task_scheduler/interactive/pysqaspawner.py
+++ b/executorlib/task_scheduler/interactive/pysqaspawner.py
@@ -69,25 +69,18 @@ def bootup(
             queue_type=self._backend,
             execute_command=pysqa_execute_command,
         )
-        hash = hashlib.md5(str(self).encode()).hexdigest()
-        if self._cwd is not None:
-            working_directory = os.path.join(self._cwd, hash)
-        else:
-            working_directory = os.path.abspath(hash)
-        self._process = qa.submit_job(
-            command=" ".join(self.generate_command(command_lst=command_lst)),
-            working_directory=working_directory,
-            cores=int(self._cores * self._threads_per_core),
-            **self._pysqa_submission_kwargs,
-        )
+        job_id = self._start_process_helper(command_lst=command_lst, queue_adapter=qa)
         while True:
-            status = qa.get_status_of_job(process_id=self._process)
+            status = qa.get_status_of_job(process_id=job_id)
             if status == "running":
+                self._process = job_id
                 break
             elif status is None:
                 raise RuntimeError(
                     f"Failed to start the process with command: {command_lst}"
                 )
+            elif status == "error":
+                job_id = self._start_process_helper(command_lst=command_lst, queue_adapter=qa)
             else:
                 sleep(1)  # Wait for the process to start
 
@@ -173,6 +166,19 @@ def poll(self) -> bool:
             ]
         else:
             return False
+        
+    def _start_process_helper(self, command_lst: str, queue_adapter: QueueAdapter) -> int:
+        hash = hashlib.md5(str(self).encode()).hexdigest()
+        if self._cwd is not None:
+            working_directory = os.path.join(self._cwd, hash)
+        else:
+            working_directory = os.path.abspath(hash)
+        return queue_adapter.submit_job(
+            command=" ".join(self.generate_command(command_lst=command_lst)),
+            working_directory=working_directory,
+            cores=int(self._cores * self._threads_per_core),
+            **self._pysqa_submission_kwargs,
+        )
 
 
 def create_pysqa_block_allocation_scheduler(

From 7a3b19125decf26e73b192e036f0f03e05a83fc7 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 29 Aug 2025 11:57:09 +0000
Subject: [PATCH 32/83] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 executorlib/task_scheduler/interactive/pysqaspawner.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/pysqaspawner.py b/executorlib/task_scheduler/interactive/pysqaspawner.py
index 8cc3249b..2f73b725 100644
--- a/executorlib/task_scheduler/interactive/pysqaspawner.py
+++ b/executorlib/task_scheduler/interactive/pysqaspawner.py
@@ -80,7 +80,9 @@ def bootup(
                     f"Failed to start the process with command: {command_lst}"
                 )
             elif status == "error":
-                job_id = self._start_process_helper(command_lst=command_lst, queue_adapter=qa)
+                job_id = self._start_process_helper(
+                    command_lst=command_lst, queue_adapter=qa
+                )
             else:
                 sleep(1)  # Wait for the process to start
 
@@ -166,8 +168,10 @@ def poll(self) -> bool:
             ]
         else:
             return False
-        
-    def _start_process_helper(self, command_lst: str, queue_adapter: QueueAdapter) -> int:
+
+    def _start_process_helper(
+        self, command_lst: str, queue_adapter: QueueAdapter
+    ) -> int:
         hash = hashlib.md5(str(self).encode()).hexdigest()
         if self._cwd is not None:
             working_directory = os.path.join(self._cwd, hash)

From 4033bf39acf3fcd78327501f8bf6f0c11ffd440d Mon Sep 17 00:00:00 2001
From: jan-janssen <jan.janssen@outlook.com>
Date: Fri, 29 Aug 2025 13:59:20 +0200
Subject: [PATCH 33/83] fix type hints

---
 executorlib/task_scheduler/interactive/pysqaspawner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/executorlib/task_scheduler/interactive/pysqaspawner.py b/executorlib/task_scheduler/interactive/pysqaspawner.py
index 2f73b725..6ad2265e 100644
--- a/executorlib/task_scheduler/interactive/pysqaspawner.py
+++ b/executorlib/task_scheduler/interactive/pysqaspawner.py
@@ -170,7 +170,7 @@ def poll(self) -> bool:
             return False
 
     def _start_process_helper(
-        self, command_lst: str, queue_adapter: QueueAdapter
+        self, command_lst: list[str], queue_adapter: QueueAdapter
     ) -> int:
         hash = hashlib.md5(str(self).encode()).hexdigest()
         if self._cwd is not None:

From 1c3e26307014975d17c21becb5b7a11abd1df665 Mon Sep 17 00:00:00 2001
From: jan-janssen <jan.janssen@outlook.com>
Date: Fri, 29 Aug 2025 17:14:11 +0200
Subject: [PATCH 34/83] restart workers after they were killed

---
 .../standalone/interactive/communication.py   | 18 +++++++++--
 .../task_scheduler/interactive/shared.py      | 32 +++++++++++++++----
 2 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/executorlib/standalone/interactive/communication.py b/executorlib/standalone/interactive/communication.py
index 4a198882..78d82f79 100644
--- a/executorlib/standalone/interactive/communication.py
+++ b/executorlib/standalone/interactive/communication.py
@@ -7,6 +7,10 @@
 import zmq
 
 
+class ExecutorlibSockerError(RuntimeError):
+    pass
+
+
 class SocketInterface:
     """
     The SocketInterface is an abstraction layer on top of the zero message queue.
@@ -16,7 +20,7 @@ class SocketInterface:
         log_obj_size (boolean): Enable debug mode which reports the size of the communicated objects.
     """
 
-    def __init__(self, spawner=None, log_obj_size=False):
+    def __init__(self, spawner=None, log_obj_size: bool = False, time_out_ms: int = 1000):
         """
         Initialize the SocketInterface.
 
@@ -25,12 +29,16 @@ def __init__(self, spawner=None, log_obj_size=False):
         """
         self._context = zmq.Context()
         self._socket = self._context.socket(zmq.PAIR)
+        self._poller = zmq.Poller()
+        self._poller.register(self._socket, zmq.POLLIN) 
         self._process = None
+        self._time_out_ms = time_out_ms
         if log_obj_size:
             self._logger = logging.getLogger("executorlib")
         else:
             self._logger = None
         self._spawner = spawner
+        self._command_lst = []
 
     def send_dict(self, input_dict: dict):
         """
@@ -52,7 +60,12 @@ def receive_dict(self) -> dict:
         Returns:
             dict: dictionary with response received from the connected client
         """
-        data = self._socket.recv()
+        response_lst = []
+        while len(response_lst) == 0:
+            response_lst = self._poller.poll(self._time_out_ms)
+            if not self._spawner.poll():
+                raise ExecutorlibSockerError()
+        data = self._socket.recv(zmq.NOBLOCK)
         if self._logger is not None:
             self._logger.warning(
                 "Received dictionary of size: " + str(sys.getsizeof(data))
@@ -97,6 +110,7 @@ def bootup(
         Args:
             command_lst (list): list of strings to start the client process
         """
+        self._command_lst = command_lst
         self._spawner.bootup(
             command_lst=command_lst,
         )
diff --git a/executorlib/task_scheduler/interactive/shared.py b/executorlib/task_scheduler/interactive/shared.py
index 02162308..06b884b4 100644
--- a/executorlib/task_scheduler/interactive/shared.py
+++ b/executorlib/task_scheduler/interactive/shared.py
@@ -3,10 +3,12 @@
 import queue
 import time
 from typing import Callable, Optional
+from concurrent.futures._base import PENDING
 
 from executorlib.standalone.command import get_interactive_execute_command
 from executorlib.standalone.interactive.communication import (
     SocketInterface,
+    ExecutorlibSockerError,
     interface_bootup,
 )
 from executorlib.standalone.interactive.spawner import BaseSpawner, MpiExecSpawner
@@ -107,9 +109,17 @@ def _execute_task_without_cache(
         try:
             f.set_result(interface.send_and_receive_dict(input_dict=task_dict))
         except Exception as thread_exception:
-            interface.shutdown(wait=True)
-            _task_done(future_queue=future_queue)
-            f.set_exception(exception=thread_exception)
+            if isinstance(thread_exception, ExecutorlibSockerError):
+                f._state = PENDING
+                _task_done(future_queue=future_queue)
+                future_queue.put(task_dict | {"future": f})
+                interface._spawner.bootup(
+                    command_lst=interface._command_lst,
+                )
+            else:
+                interface.shutdown(wait=True)
+                _task_done(future_queue=future_queue)
+                f.set_exception(exception=thread_exception)
         else:
             _task_done(future_queue=future_queue)
 
@@ -154,10 +164,18 @@ def _execute_task_with_cache(
                 dump(file_name=file_name, data_dict=data_dict)
                 f.set_result(result)
             except Exception as thread_exception:
-                interface.shutdown(wait=True)
-                _task_done(future_queue=future_queue)
-                f.set_exception(exception=thread_exception)
-                raise thread_exception
+                if isinstance(thread_exception, ExecutorlibSockerError):
+                    f._state = PENDING
+                    _task_done(future_queue=future_queue)
+                    future_queue.put(task_dict | {"future": f})
+                    interface._spawner.bootup(
+                        command_lst=interface._command_lst,
+                    )
+                else:
+                    interface.shutdown(wait=True)
+                    _task_done(future_queue=future_queue)
+                    f.set_exception(exception=thread_exception)
+                    raise thread_exception
             else:
                 _task_done(future_queue=future_queue)
     else:

From cea4ca15101b530d49e606a034f19fdd600b56c5 Mon Sep 17 00:00:00 2001
From: pyiron-runner <pyiron@mpie.de>
Date: Fri, 29 Aug 2025 15:14:48 +0000
Subject: [PATCH 35/83] Format black

---
 executorlib/standalone/interactive/communication.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/executorlib/standalone/interactive/communication.py b/executorlib/standalone/interactive/communication.py
index 78d82f79..d9f7ffed 100644
--- a/executorlib/standalone/interactive/communication.py
+++ b/executorlib/standalone/interactive/communication.py
@@ -20,7 +20,9 @@ class SocketInterface:
         log_obj_size (boolean): Enable debug mode which reports the size of the communicated objects.
     """
 
-    def __init__(self, spawner=None, log_obj_size: bool = False, time_out_ms: int = 1000):
+    def __init__(
+        self, spawner=None, log_obj_size: bool = False, time_out_ms: int = 1000
+    ):
         """
         Initialize the SocketInterface.
 
@@ -30,7 +32,7 @@ def __init__(self, spawner=None, log_obj_size: bool = False, time_out_ms: int =
         self._context = zmq.Context()
         self._socket = self._context.socket(zmq.PAIR)
         self._poller = zmq.Poller()
-        self._poller.register(self._socket, zmq.POLLIN) 
+        self._poller.register(self._socket, zmq.POLLIN)
         self._process = None
         self._time_out_ms = time_out_ms
         if log_obj_size:

From 35a937224081d904d0e539e5cc5b1ea4bc0faf1e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 29 Aug 2025 15:15:38 +0000
Subject: [PATCH 36/83] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 executorlib/task_scheduler/interactive/shared.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/shared.py b/executorlib/task_scheduler/interactive/shared.py
index 06b884b4..1cbc564a 100644
--- a/executorlib/task_scheduler/interactive/shared.py
+++ b/executorlib/task_scheduler/interactive/shared.py
@@ -2,13 +2,13 @@
 import os
 import queue
 import time
-from typing import Callable, Optional
 from concurrent.futures._base import PENDING
+from typing import Callable, Optional
 
 from executorlib.standalone.command import get_interactive_execute_command
 from executorlib.standalone.interactive.communication import (
-    SocketInterface,
     ExecutorlibSockerError,
+    SocketInterface,
     interface_bootup,
 )
 from executorlib.standalone.interactive.spawner import BaseSpawner, MpiExecSpawner

From 17f1c3ad6e61ee94c0e92a15bdc01503a43bf05f Mon Sep 17 00:00:00 2001
From: jan-janssen <jan.janssen@outlook.com>
Date: Fri, 29 Aug 2025 17:24:24 +0200
Subject: [PATCH 37/83] type fixes

---
 executorlib/standalone/interactive/communication.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/executorlib/standalone/interactive/communication.py b/executorlib/standalone/interactive/communication.py
index d9f7ffed..d4d7696c 100644
--- a/executorlib/standalone/interactive/communication.py
+++ b/executorlib/standalone/interactive/communication.py
@@ -1,7 +1,7 @@
 import logging
 import sys
 from socket import gethostname
-from typing import Optional
+from typing import Any, Optional
 
 import cloudpickle
 import zmq
@@ -35,12 +35,11 @@ def __init__(
         self._poller.register(self._socket, zmq.POLLIN)
         self._process = None
         self._time_out_ms = time_out_ms
+        self._logger: Optional[logging.Logger] = None
         if log_obj_size:
             self._logger = logging.getLogger("executorlib")
-        else:
-            self._logger = None
         self._spawner = spawner
-        self._command_lst = []
+        self._command_lst: list[str] = []
 
     def send_dict(self, input_dict: dict):
         """
@@ -62,7 +61,7 @@ def receive_dict(self) -> dict:
         Returns:
             dict: dictionary with response received from the connected client
         """
-        response_lst = []
+        response_lst: list[tuple[Any, int]] = []
         while len(response_lst) == 0:
             response_lst = self._poller.poll(self._time_out_ms)
             if not self._spawner.poll():

From acd91fe85e6a3ebee3d37cde9f2be978c618ffc7 Mon Sep 17 00:00:00 2001
From: jan-janssen <jan.janssen@outlook.com>
Date: Fri, 29 Aug 2025 17:50:42 +0200
Subject: [PATCH 38/83] helper function

---
 executorlib/task_scheduler/interactive/shared.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/shared.py b/executorlib/task_scheduler/interactive/shared.py
index 1cbc564a..2c8038cd 100644
--- a/executorlib/task_scheduler/interactive/shared.py
+++ b/executorlib/task_scheduler/interactive/shared.py
@@ -2,6 +2,7 @@
 import os
 import queue
 import time
+from concurrent.futures import Future
 from concurrent.futures._base import PENDING
 from typing import Callable, Optional
 
@@ -110,9 +111,7 @@ def _execute_task_without_cache(
             f.set_result(interface.send_and_receive_dict(input_dict=task_dict))
         except Exception as thread_exception:
             if isinstance(thread_exception, ExecutorlibSockerError):
-                f._state = PENDING
-                _task_done(future_queue=future_queue)
-                future_queue.put(task_dict | {"future": f})
+                _reset_task_dict(future_obj=f, future_queue=future_queue, task_dict=task_dict)
                 interface._spawner.bootup(
                     command_lst=interface._command_lst,
                 )
@@ -165,9 +164,7 @@ def _execute_task_with_cache(
                 f.set_result(result)
             except Exception as thread_exception:
                 if isinstance(thread_exception, ExecutorlibSockerError):
-                    f._state = PENDING
-                    _task_done(future_queue=future_queue)
-                    future_queue.put(task_dict | {"future": f})
+                    _reset_task_dict(future_obj=f, future_queue=future_queue, task_dict=task_dict)
                     interface._spawner.bootup(
                         command_lst=interface._command_lst,
                     )
@@ -188,3 +185,9 @@ def _execute_task_with_cache(
 def _task_done(future_queue: queue.Queue):
     with contextlib.suppress(ValueError):
         future_queue.task_done()
+
+
+def _reset_task_dict(future_obj: Future, future_queue: queue.Queue, task_dict: dict):
+    future_obj._state = PENDING
+    _task_done(future_queue=future_queue)
+    future_queue.put(task_dict | {"future": future_obj})

From 337fa450d6f58fad41006cd8558e82f9c206978e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 29 Aug 2025 15:50:52 +0000
Subject: [PATCH 39/83] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 executorlib/task_scheduler/interactive/shared.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/shared.py b/executorlib/task_scheduler/interactive/shared.py
index 2c8038cd..6c8cfbbb 100644
--- a/executorlib/task_scheduler/interactive/shared.py
+++ b/executorlib/task_scheduler/interactive/shared.py
@@ -111,7 +111,9 @@ def _execute_task_without_cache(
             f.set_result(interface.send_and_receive_dict(input_dict=task_dict))
         except Exception as thread_exception:
             if isinstance(thread_exception, ExecutorlibSockerError):
-                _reset_task_dict(future_obj=f, future_queue=future_queue, task_dict=task_dict)
+                _reset_task_dict(
+                    future_obj=f, future_queue=future_queue, task_dict=task_dict
+                )
                 interface._spawner.bootup(
                     command_lst=interface._command_lst,
                 )
@@ -164,7 +166,9 @@ def _execute_task_with_cache(
                 f.set_result(result)
             except Exception as thread_exception:
                 if isinstance(thread_exception, ExecutorlibSockerError):
-                    _reset_task_dict(future_obj=f, future_queue=future_queue, task_dict=task_dict)
+                    _reset_task_dict(
+                        future_obj=f, future_queue=future_queue, task_dict=task_dict
+                    )
                     interface._spawner.bootup(
                         command_lst=interface._command_lst,
                     )

From 19e4cbf93390aef7f9825a1c2ac149407beaaea5 Mon Sep 17 00:00:00 2001
From: jan-janssen <jan.janssen@outlook.com>
Date: Fri, 29 Aug 2025 17:54:50 +0200
Subject: [PATCH 40/83] introduce restart function

---
 executorlib/standalone/interactive/communication.py | 8 ++++++++
 executorlib/task_scheduler/interactive/shared.py    | 8 ++------
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/executorlib/standalone/interactive/communication.py b/executorlib/standalone/interactive/communication.py
index d4d7696c..9824e838 100644
--- a/executorlib/standalone/interactive/communication.py
+++ b/executorlib/standalone/interactive/communication.py
@@ -116,6 +116,14 @@ def bootup(
             command_lst=command_lst,
         )
 
+    def restart(self):
+        """
+        Restart the client process to onnect to the SocketInterface.
+        """
+        self._spawner.bootup(
+            command_lst=self._command_lst,
+        )
+
     def shutdown(self, wait: bool = True):
         """
         Shutdown the SocketInterface and the connected client process.
diff --git a/executorlib/task_scheduler/interactive/shared.py b/executorlib/task_scheduler/interactive/shared.py
index 6c8cfbbb..7e3c8c69 100644
--- a/executorlib/task_scheduler/interactive/shared.py
+++ b/executorlib/task_scheduler/interactive/shared.py
@@ -114,9 +114,7 @@ def _execute_task_without_cache(
                 _reset_task_dict(
                     future_obj=f, future_queue=future_queue, task_dict=task_dict
                 )
-                interface._spawner.bootup(
-                    command_lst=interface._command_lst,
-                )
+                interface.restart()
             else:
                 interface.shutdown(wait=True)
                 _task_done(future_queue=future_queue)
@@ -169,9 +167,7 @@ def _execute_task_with_cache(
                     _reset_task_dict(
                         future_obj=f, future_queue=future_queue, task_dict=task_dict
                     )
-                    interface._spawner.bootup(
-                        command_lst=interface._command_lst,
-                    )
+                    interface.restart()
                 else:
                     interface.shutdown(wait=True)
                     _task_done(future_queue=future_queue)

From 9053074f4adc1babc8c2e8cdaf2b932dfd81be20 Mon Sep 17 00:00:00 2001
From: jan-janssen <jan.janssen@outlook.com>
Date: Fri, 29 Aug 2025 18:00:36 +0200
Subject: [PATCH 41/83] fix spelling

---
 executorlib/standalone/interactive/communication.py | 4 ++--
 executorlib/task_scheduler/interactive/shared.py    | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/executorlib/standalone/interactive/communication.py b/executorlib/standalone/interactive/communication.py
index 9824e838..0900a8cf 100644
--- a/executorlib/standalone/interactive/communication.py
+++ b/executorlib/standalone/interactive/communication.py
@@ -7,7 +7,7 @@
 import zmq
 
 
-class ExecutorlibSockerError(RuntimeError):
+class ExecutorlibSocketError(RuntimeError):
     pass
 
 
@@ -65,7 +65,7 @@ def receive_dict(self) -> dict:
         while len(response_lst) == 0:
             response_lst = self._poller.poll(self._time_out_ms)
             if not self._spawner.poll():
-                raise ExecutorlibSockerError()
+                raise ExecutorlibSocketError()
         data = self._socket.recv(zmq.NOBLOCK)
         if self._logger is not None:
             self._logger.warning(
diff --git a/executorlib/task_scheduler/interactive/shared.py b/executorlib/task_scheduler/interactive/shared.py
index 7e3c8c69..883c3dac 100644
--- a/executorlib/task_scheduler/interactive/shared.py
+++ b/executorlib/task_scheduler/interactive/shared.py
@@ -8,7 +8,7 @@
 
 from executorlib.standalone.command import get_interactive_execute_command
 from executorlib.standalone.interactive.communication import (
-    ExecutorlibSockerError,
+    ExecutorlibSocketError,
     SocketInterface,
     interface_bootup,
 )
@@ -110,7 +110,7 @@ def _execute_task_without_cache(
         try:
             f.set_result(interface.send_and_receive_dict(input_dict=task_dict))
         except Exception as thread_exception:
-            if isinstance(thread_exception, ExecutorlibSockerError):
+            if isinstance(thread_exception, ExecutorlibSocketError):
                 _reset_task_dict(
                     future_obj=f, future_queue=future_queue, task_dict=task_dict
                 )
@@ -163,7 +163,7 @@ def _execute_task_with_cache(
                 dump(file_name=file_name, data_dict=data_dict)
                 f.set_result(result)
             except Exception as thread_exception:
-                if isinstance(thread_exception, ExecutorlibSockerError):
+                if isinstance(thread_exception, ExecutorlibSocketError):
                     _reset_task_dict(
                         future_obj=f, future_queue=future_queue, task_dict=task_dict
                     )

From 5362c73e27ffe464ebe0e8dd3cd2b143bf3e410d Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Sat, 30 Aug 2025 11:39:58 +0200
Subject: [PATCH 42/83] shutdown on del

---
 .../interactive/pysqaspawner.py               | 52 ++++++++++---------
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/pysqaspawner.py b/executorlib/task_scheduler/interactive/pysqaspawner.py
index 6ad2265e..48a14291 100644
--- a/executorlib/task_scheduler/interactive/pysqaspawner.py
+++ b/executorlib/task_scheduler/interactive/pysqaspawner.py
@@ -43,7 +43,6 @@ def __init__(
             cores=cores,
             openmpi_oversubscribe=openmpi_oversubscribe,
         )
-        self._process: Optional[int] = None
         self._threads_per_core = threads_per_core
         self._gpus_per_core = gpus_per_core
         self._num_nodes = num_nodes
@@ -53,6 +52,8 @@ def __init__(
         self._config_directory = config_directory
         self._backend = backend
         self._pysqa_submission_kwargs = kwargs
+        self._process: Optional[int] = None
+        self._queue_adapter: Optional[QueueAdapter] = None
 
     def bootup(
         self,
@@ -64,25 +65,18 @@ def bootup(
         Args:
             command_lst (list[str]): The command list to execute.
         """
-        qa = QueueAdapter(
+        self._queue_adapter = QueueAdapter(
             directory=self._config_directory,
             queue_type=self._backend,
             execute_command=pysqa_execute_command,
         )
-        job_id = self._start_process_helper(command_lst=command_lst, queue_adapter=qa)
+        self._process = self._start_process_helper(
+            command_lst=command_lst, 
+            queue_adapter=self._queue_adapter,
+        )
         while True:
-            status = qa.get_status_of_job(process_id=job_id)
-            if status == "running":
-                self._process = job_id
+            if self._check_process_helper(command_lst=command_lst):
                 break
-            elif status is None:
-                raise RuntimeError(
-                    f"Failed to start the process with command: {command_lst}"
-                )
-            elif status == "error":
-                job_id = self._start_process_helper(
-                    command_lst=command_lst, queue_adapter=qa
-                )
             else:
                 sleep(1)  # Wait for the process to start
 
@@ -147,7 +141,7 @@ def shutdown(self, wait: bool = True):
                 config_directory=self._config_directory,
                 backend=self._backend,
             )
-        self._process = None
+            self._process = None
 
     def poll(self) -> bool:
         """
@@ -156,16 +150,9 @@ def poll(self) -> bool:
         Returns:
             bool: True if the interface is running, False otherwise.
         """
-        qa = QueueAdapter(
-            directory=self._config_directory,
-            queue_type=self._backend,
-            execute_command=pysqa_execute_command,
-        )
         if self._process is not None:
-            return qa.get_status_of_job(process_id=self._process) in [
-                "running",
-                "pending",
-            ]
+            status = self._queue_adapter.get_status_of_job(process_id=self._process)
+            return status in ["running", "pending"]
         else:
             return False
 
@@ -183,6 +170,23 @@ def _start_process_helper(
             cores=int(self._cores * self._threads_per_core),
             **self._pysqa_submission_kwargs,
         )
+    
+    def _check_process_helper(self, command_lst: list[str]) -> bool:
+        status = self._queue_adapter.get_status_of_job(process_id=self._process)
+        if status == "running":
+            return True
+        elif status is None:
+            raise RuntimeError(
+                f"Failed to start the process with command: {command_lst}"
+            )
+        elif status == "error":
+            self._process = self._start_process_helper(
+                command_lst=command_lst, queue_adapter=self._queue_adapter
+            )
+        return False
+    
+    def __del__(self):
+        self.shutdown(wait=True)
 
 
 def create_pysqa_block_allocation_scheduler(

From 1b4baa90a96ff9b3de01a9a82bd740c47c16eb9f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 30 Aug 2025 09:40:07 +0000
Subject: [PATCH 43/83] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 executorlib/task_scheduler/interactive/pysqaspawner.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/pysqaspawner.py b/executorlib/task_scheduler/interactive/pysqaspawner.py
index 48a14291..a20eca60 100644
--- a/executorlib/task_scheduler/interactive/pysqaspawner.py
+++ b/executorlib/task_scheduler/interactive/pysqaspawner.py
@@ -71,7 +71,7 @@ def bootup(
             execute_command=pysqa_execute_command,
         )
         self._process = self._start_process_helper(
-            command_lst=command_lst, 
+            command_lst=command_lst,
             queue_adapter=self._queue_adapter,
         )
         while True:
@@ -170,7 +170,7 @@ def _start_process_helper(
             cores=int(self._cores * self._threads_per_core),
             **self._pysqa_submission_kwargs,
         )
-    
+
     def _check_process_helper(self, command_lst: list[str]) -> bool:
         status = self._queue_adapter.get_status_of_job(process_id=self._process)
         if status == "running":
@@ -184,7 +184,7 @@ def _check_process_helper(self, command_lst: list[str]) -> bool:
                 command_lst=command_lst, queue_adapter=self._queue_adapter
             )
         return False
-    
+
     def __del__(self):
         self.shutdown(wait=True)
 

From 0855ee99f7e8981043454cbd754b2f32ff815563 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Sat, 30 Aug 2025 11:43:10 +0200
Subject: [PATCH 44/83] type fixes

---
 executorlib/task_scheduler/interactive/pysqaspawner.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/pysqaspawner.py b/executorlib/task_scheduler/interactive/pysqaspawner.py
index a20eca60..73a8cb87 100644
--- a/executorlib/task_scheduler/interactive/pysqaspawner.py
+++ b/executorlib/task_scheduler/interactive/pysqaspawner.py
@@ -150,7 +150,7 @@ def poll(self) -> bool:
         Returns:
             bool: True if the interface is running, False otherwise.
         """
-        if self._process is not None:
+        if self._process is not None and self._queue_adapter is not None:
             status = self._queue_adapter.get_status_of_job(process_id=self._process)
             return status in ["running", "pending"]
         else:
@@ -172,7 +172,10 @@ def _start_process_helper(
         )
 
     def _check_process_helper(self, command_lst: list[str]) -> bool:
-        status = self._queue_adapter.get_status_of_job(process_id=self._process)
+        if self._queue_adapter is not None:
+            status = self._queue_adapter.get_status_of_job(process_id=self._process)
+        else:
+            status = None
         if status == "running":
             return True
         elif status is None:

From 8cb53caec7ae1ae75cb9644e120a739c9a7aaadd Mon Sep 17 00:00:00 2001
From: Jan Janssen <jan-janssen@users.noreply.github.com>
Date: Sat, 30 Aug 2025 18:22:21 +0200
Subject: [PATCH 45/83] Introduce stop function (#791)

* all tasks are stopped with stop function

* Format black

* add additional break

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix typing

* fixes

* shutdown

* restructure

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* the interface can only be none when it was cancelled before it started

* fix type hints

* fixes

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* be more explizit with types

---------

Co-authored-by: pyiron-runner <pyiron@mpie.de>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .../standalone/interactive/communication.py   | 40 ++++++++++++++-----
 executorlib/standalone/interactive/spawner.py |  9 +++--
 .../interactive/blockallocation.py            | 13 +++++-
 .../task_scheduler/interactive/fluxspawner.py |  6 ++-
 .../interactive/pysqaspawner.py               |  6 ++-
 .../task_scheduler/interactive/shared.py      | 35 +++++++++++-----
 6 files changed, 80 insertions(+), 29 deletions(-)

diff --git a/executorlib/standalone/interactive/communication.py b/executorlib/standalone/interactive/communication.py
index b5af3c56..b0c4bc39 100644
--- a/executorlib/standalone/interactive/communication.py
+++ b/executorlib/standalone/interactive/communication.py
@@ -1,7 +1,7 @@
 import logging
 import sys
 from socket import gethostname
-from typing import Any, Optional
+from typing import Any, Callable, Optional
 
 import cloudpickle
 import zmq
@@ -43,6 +43,7 @@ def __init__(
             self._logger = logging.getLogger("executorlib")
         self._spawner = spawner
         self._command_lst: list[str] = []
+        self._stop_function: Optional[Callable] = None
 
     def send_dict(self, input_dict: dict):
         """
@@ -107,7 +108,8 @@ def bind_to_random_port(self) -> int:
     def bootup(
         self,
         command_lst: list[str],
-    ):
+        stop_function: Optional[Callable] = None,
+    ) -> bool:
         """
         Boot up the client process to connect to the SocketInterface.
 
@@ -115,17 +117,26 @@ def bootup(
             command_lst (list): list of strings to start the client process
         """
         self._command_lst = command_lst
-        self._spawner.bootup(
+        self._stop_function = stop_function
+        if not self._spawner.bootup(
             command_lst=command_lst,
-        )
+            stop_function=stop_function,
+        ):
+            self._reset_socket()
+            return False
+        return True
 
     def restart(self):
         """
         Restart the client process to onnect to the SocketInterface.
         """
-        self._spawner.bootup(
+        if not self._spawner.bootup(
             command_lst=self._command_lst,
-        )
+            stop_function=self._stop_function,
+        ):
+            self._reset_socket()
+            return False
+        return True
 
     def shutdown(self, wait: bool = True):
         """
@@ -140,6 +151,10 @@ def shutdown(self, wait: bool = True):
                 input_dict={"shutdown": True, "wait": wait}
             )
             self._spawner.shutdown(wait=wait)
+        self._reset_socket()
+        return result
+
+    def _reset_socket(self):
         if self._socket is not None:
             self._socket.close()
         if self._context is not None:
@@ -147,7 +162,6 @@ def shutdown(self, wait: bool = True):
         self._process = None
         self._socket = None
         self._context = None
-        return result
 
     def __del__(self):
         """
@@ -163,7 +177,8 @@ def interface_bootup(
     hostname_localhost: Optional[bool] = None,
     log_obj_size: bool = False,
     worker_id: Optional[int] = None,
-) -> SocketInterface:
+    stop_function: Optional[Callable] = None,
+) -> Optional[SocketInterface]:
     """
     Start interface for ZMQ communication
 
@@ -202,10 +217,13 @@ def interface_bootup(
         "--zmqport",
         str(interface.bind_to_random_port()),
     ]
-    interface.bootup(
+    if interface.bootup(
         command_lst=command_lst,
-    )
-    return interface
+        stop_function=stop_function,
+    ):
+        return interface
+    else:
+        return None
 
 
 def interface_connect(host: str, port: str) -> tuple[zmq.Context, zmq.Socket]:
diff --git a/executorlib/standalone/interactive/spawner.py b/executorlib/standalone/interactive/spawner.py
index 4a5cb390..ce90052b 100644
--- a/executorlib/standalone/interactive/spawner.py
+++ b/executorlib/standalone/interactive/spawner.py
@@ -1,7 +1,7 @@
 import os
 import subprocess
 from abc import ABC, abstractmethod
-from typing import Optional
+from typing import Callable, Optional
 
 MPI_COMMAND = "mpiexec"
 
@@ -29,7 +29,8 @@ def __init__(
     def bootup(
         self,
         command_lst: list[str],
-    ):
+        stop_function: Optional[Callable] = None,
+    ) -> bool:
         """
         Method to start the interface.
 
@@ -87,7 +88,8 @@ def __init__(
     def bootup(
         self,
         command_lst: list[str],
-    ):
+        stop_function: Optional[Callable] = None,
+    ) -> bool:
         """
         Method to start the subprocess interface.
 
@@ -101,6 +103,7 @@ def bootup(
             cwd=self._cwd,
             stdin=subprocess.DEVNULL,
         )
+        return True
 
     def generate_command(self, command_lst: list[str]) -> list[str]:
         """
diff --git a/executorlib/task_scheduler/interactive/blockallocation.py b/executorlib/task_scheduler/interactive/blockallocation.py
index 96cec2c1..2e1d1f02 100644
--- a/executorlib/task_scheduler/interactive/blockallocation.py
+++ b/executorlib/task_scheduler/interactive/blockallocation.py
@@ -12,6 +12,8 @@
 from executorlib.task_scheduler.base import TaskSchedulerBase
 from executorlib.task_scheduler.interactive.shared import execute_tasks
 
+_task_schedulder_dict: dict = {}
+
 
 class BlockAllocationTaskScheduler(TaskSchedulerBase):
     """
@@ -61,11 +63,18 @@ def __init__(
         executor_kwargs["queue_join_on_shutdown"] = False
         self._process_kwargs = executor_kwargs
         self._max_workers = max_workers
+        self_id = id(self)
+        self._self_id = self_id
+        _task_schedulder_dict[self._self_id] = False
         self._set_process(
             process=[
                 Thread(
                     target=execute_tasks,
-                    kwargs=executor_kwargs | {"worker_id": worker_id},
+                    kwargs=executor_kwargs
+                    | {
+                        "worker_id": worker_id,
+                        "stop_function": lambda: _task_schedulder_dict[self_id],
+                    },
                 )
                 for worker_id in range(self._max_workers)
             ],
@@ -155,7 +164,9 @@ def shutdown(self, wait: bool = True, *, cancel_futures: bool = False):
         if self._future_queue is not None:
             if cancel_futures:
                 cancel_items_in_queue(que=self._future_queue)
+            self._shutdown_flag = True
             if isinstance(self._process, list):
+                _task_schedulder_dict[self._self_id] = True
                 for _ in range(len(self._process)):
                     self._future_queue.put({"shutdown": True, "wait": wait})
                 if wait:
diff --git a/executorlib/task_scheduler/interactive/fluxspawner.py b/executorlib/task_scheduler/interactive/fluxspawner.py
index 5a35dd5c..378bbe92 100644
--- a/executorlib/task_scheduler/interactive/fluxspawner.py
+++ b/executorlib/task_scheduler/interactive/fluxspawner.py
@@ -1,5 +1,5 @@
 import os
-from typing import Optional
+from typing import Callable, Optional
 
 import flux
 import flux.job
@@ -75,7 +75,8 @@ def __init__(
     def bootup(
         self,
         command_lst: list[str],
-    ):
+        stop_function: Optional[Callable] = None,
+    ) -> bool:
         """
         Boot up the client process to connect to the SocketInterface.
 
@@ -126,6 +127,7 @@ def bootup(
             )
         else:
             self._future = self._flux_executor.submit(jobspec=jobspec)
+        return True
 
     def shutdown(self, wait: bool = True):
         """
diff --git a/executorlib/task_scheduler/interactive/pysqaspawner.py b/executorlib/task_scheduler/interactive/pysqaspawner.py
index 73a8cb87..31f57c8b 100644
--- a/executorlib/task_scheduler/interactive/pysqaspawner.py
+++ b/executorlib/task_scheduler/interactive/pysqaspawner.py
@@ -58,6 +58,7 @@ def __init__(
     def bootup(
         self,
         command_lst: list[str],
+        stop_function: Optional[Callable] = None,
     ):
         """
         Method to start the subprocess interface.
@@ -76,7 +77,10 @@ def bootup(
         )
         while True:
             if self._check_process_helper(command_lst=command_lst):
-                break
+                return True
+            elif stop_function is not None and stop_function():
+                self.shutdown(wait=True)
+                return False
             else:
                 sleep(1)  # Wait for the process to start
 
diff --git a/executorlib/task_scheduler/interactive/shared.py b/executorlib/task_scheduler/interactive/shared.py
index 883c3dac..fea9f86a 100644
--- a/executorlib/task_scheduler/interactive/shared.py
+++ b/executorlib/task_scheduler/interactive/shared.py
@@ -28,6 +28,7 @@ def execute_tasks(
     log_obj_size: bool = False,
     error_log_file: Optional[str] = None,
     worker_id: Optional[int] = None,
+    stop_function: Optional[Callable] = None,
     **kwargs,
 ) -> None:
     """
@@ -63,15 +64,17 @@ def execute_tasks(
         hostname_localhost=hostname_localhost,
         log_obj_size=log_obj_size,
         worker_id=worker_id,
+        stop_function=stop_function,
     )
-    if init_function is not None:
+    if init_function is not None and interface is not None:
         interface.send_dict(
             input_dict={"init": True, "fn": init_function, "args": (), "kwargs": {}}
         )
     while True:
         task_dict = future_queue.get()
         if "shutdown" in task_dict and task_dict["shutdown"]:
-            interface.shutdown(wait=task_dict["wait"])
+            if interface is not None:
+                interface.shutdown(wait=task_dict["wait"])
             _task_done(future_queue=future_queue)
             if queue_join_on_shutdown:
                 future_queue.join()
@@ -79,23 +82,31 @@ def execute_tasks(
         elif "fn" in task_dict and "future" in task_dict:
             if error_log_file is not None:
                 task_dict["error_log_file"] = error_log_file
-            if cache_directory is None:
-                _execute_task_without_cache(
-                    interface=interface, task_dict=task_dict, future_queue=future_queue
+            if cache_directory is None and interface is not None:
+                result_flag = _execute_task_without_cache(
+                    interface=interface,
+                    task_dict=task_dict,
+                    future_queue=future_queue,
                 )
-            else:
-                _execute_task_with_cache(
+            elif cache_directory is not None and interface is not None:
+                result_flag = _execute_task_with_cache(
                     interface=interface,
                     task_dict=task_dict,
                     future_queue=future_queue,
                     cache_directory=cache_directory,
                     cache_key=cache_key,
                 )
+            else:
+                raise ValueError()
+            if not result_flag:
+                if queue_join_on_shutdown:
+                    future_queue.join()
+                break
 
 
 def _execute_task_without_cache(
     interface: SocketInterface, task_dict: dict, future_queue: queue.Queue
-):
+) -> bool:
     """
     Execute the task in the task_dict by communicating it via the interface.
 
@@ -114,13 +125,14 @@ def _execute_task_without_cache(
                 _reset_task_dict(
                     future_obj=f, future_queue=future_queue, task_dict=task_dict
                 )
-                interface.restart()
+                return interface.restart()
             else:
                 interface.shutdown(wait=True)
                 _task_done(future_queue=future_queue)
                 f.set_exception(exception=thread_exception)
         else:
             _task_done(future_queue=future_queue)
+    return True
 
 
 def _execute_task_with_cache(
@@ -129,7 +141,7 @@ def _execute_task_with_cache(
     future_queue: queue.Queue,
     cache_directory: str,
     cache_key: Optional[str] = None,
-):
+) -> bool:
     """
     Execute the task in the task_dict by communicating it via the interface using the cache in the cache directory.
 
@@ -167,7 +179,7 @@ def _execute_task_with_cache(
                     _reset_task_dict(
                         future_obj=f, future_queue=future_queue, task_dict=task_dict
                     )
-                    interface.restart()
+                    return interface.restart()
                 else:
                     interface.shutdown(wait=True)
                     _task_done(future_queue=future_queue)
@@ -180,6 +192,7 @@ def _execute_task_with_cache(
         future = task_dict["future"]
         future.set_result(result)
         _task_done(future_queue=future_queue)
+    return True
 
 
 def _task_done(future_queue: queue.Queue):

From 79842e6efc08ef0ea80499725a4806be49de3333 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Sun, 31 Aug 2025 12:26:22 +0200
Subject: [PATCH 46/83] merge changes

---
 executorlib/task_scheduler/interactive/shared.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/executorlib/task_scheduler/interactive/shared.py b/executorlib/task_scheduler/interactive/shared.py
index 69c2d2e9..404fde91 100644
--- a/executorlib/task_scheduler/interactive/shared.py
+++ b/executorlib/task_scheduler/interactive/shared.py
@@ -81,7 +81,7 @@ def execute_multiple_tasks(
             break
         elif "fn" in task_dict and "future" in task_dict:
             if interface is not None:
-                result_flag =_execute_task_dict(
+                result_flag = _execute_task_dict(
                     task_dict=task_dict,
                     interface=interface,
                     cache_directory=cache_directory,

From 8551eda9d92b6b97625d27ed3d5732bd4dd36e67 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Sun, 31 Aug 2025 12:27:06 +0200
Subject: [PATCH 47/83] fix docstring

---
 executorlib/task_scheduler/interactive/shared.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/shared.py b/executorlib/task_scheduler/interactive/shared.py
index 404fde91..16fd9002 100644
--- a/executorlib/task_scheduler/interactive/shared.py
+++ b/executorlib/task_scheduler/interactive/shared.py
@@ -113,7 +113,8 @@ def execute_single_task(
     Execute a single tasks in parallel using the message passing interface (MPI).
 
     Args:
-        future_queue (queue.Queue): task queue of dictionary objects which are submitted to the parallel process
+        task_dict (dict): task submitted to the executor as dictionary. This dictionary has the following keys
+                          {"fn": Callable, "args": (), "kwargs": {}, "resource_dict": {}}
         cores (int): defines the total number of MPI ranks to use
         spawner (BaseSpawner): Spawner to start process on selected compute resources
         hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
@@ -123,11 +124,9 @@ def execute_single_task(
                                       points to the same address as localhost. Still MacOS >= 12 seems to disable
                                       this look up for security reasons. So on MacOS it is required to set this
                                       option to true
-        init_function (Callable): optional function to preset arguments for functions which are submitted later
         cache_directory (str, optional): The directory to store cache files. Defaults to "executorlib_cache".
         cache_key (str, optional): By default the cache_key is generated based on the function hash, this can be
                                    overwritten by setting the cache_key.
-        queue_join_on_shutdown (bool): Join communication queue when thread is closed. Defaults to True.
         log_obj_size (bool): Enable debug mode which reports the size of the communicated objects.
         error_log_file (str): Name of the error log file to use for storing exceptions raised by the Python functions
                               submitted to the Executor.

From a969dd9929150d955282e12907e1d10c62e91642 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Sun, 31 Aug 2025 12:33:01 +0200
Subject: [PATCH 48/83] fixes

---
 .../task_scheduler/interactive/shared.py      | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/shared.py b/executorlib/task_scheduler/interactive/shared.py
index 16fd9002..bea60460 100644
--- a/executorlib/task_scheduler/interactive/shared.py
+++ b/executorlib/task_scheduler/interactive/shared.py
@@ -88,7 +88,15 @@ def execute_multiple_tasks(
                     cache_key=cache_key,
                     error_log_file=error_log_file,
                 )
-                _task_done(future_queue=future_queue)
+                if not result_flag:
+                    _task_done(future_queue=future_queue)
+                    f = task_dict.pop("future")
+                    _reset_task_dict(
+                        future_obj=f, future_queue=future_queue, task_dict=task_dict
+                    )
+                    interface.restart()
+                else:
+                    _task_done(future_queue=future_queue)
             else:
                 raise ValueError()
             if not result_flag:
@@ -198,10 +206,7 @@ def _execute_task_without_cache(interface: SocketInterface, task_dict: dict) ->
             f.set_result(interface.send_and_receive_dict(input_dict=task_dict))
         except Exception as thread_exception:
             if isinstance(thread_exception, ExecutorlibSocketError):
-                _reset_task_dict(
-                    future_obj=f, future_queue=future_queue, task_dict=task_dict
-                )
-                return interface.restart()
+                return False
             else:
                 interface.shutdown(wait=True)
                 f.set_exception(exception=thread_exception)
@@ -247,10 +252,7 @@ def _execute_task_with_cache(
                 f.set_result(result)
             except Exception as thread_exception:
                 if isinstance(thread_exception, ExecutorlibSocketError):
-                    _reset_task_dict(
-                        future_obj=f, future_queue=future_queue, task_dict=task_dict
-                    )
-                    return interface.restart()
+                    return False
                 else:
                     interface.shutdown(wait=True)
                     f.set_exception(exception=thread_exception)

From 928465b6099993115e8fcca95e0535524b5863ac Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Sun, 31 Aug 2025 12:37:32 +0200
Subject: [PATCH 49/83] fix types

---
 .../task_scheduler/interactive/shared.py      | 38 +++++++++----------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/shared.py b/executorlib/task_scheduler/interactive/shared.py
index bea60460..91b5953e 100644
--- a/executorlib/task_scheduler/interactive/shared.py
+++ b/executorlib/task_scheduler/interactive/shared.py
@@ -80,25 +80,23 @@ def execute_multiple_tasks(
                 future_queue.join()
             break
         elif "fn" in task_dict and "future" in task_dict:
-            if interface is not None:
-                result_flag = _execute_task_dict(
-                    task_dict=task_dict,
-                    interface=interface,
-                    cache_directory=cache_directory,
-                    cache_key=cache_key,
-                    error_log_file=error_log_file,
+            result_flag = _execute_task_dict(
+                task_dict=task_dict,
+                interface=interface,
+                cache_directory=cache_directory,
+                cache_key=cache_key,
+                error_log_file=error_log_file,
+            )
+            if not result_flag:
+                _task_done(future_queue=future_queue)
+                f = task_dict.pop("future")
+                _reset_task_dict(
+                    future_obj=f, future_queue=future_queue, task_dict=task_dict
                 )
-                if not result_flag:
-                    _task_done(future_queue=future_queue)
-                    f = task_dict.pop("future")
-                    _reset_task_dict(
-                        future_obj=f, future_queue=future_queue, task_dict=task_dict
-                    )
+                if interface is not None:
                     interface.restart()
-                else:
-                    _task_done(future_queue=future_queue)
             else:
-                raise ValueError()
+                _task_done(future_queue=future_queue)
             if not result_flag:
                 if queue_join_on_shutdown:
                     future_queue.join()
@@ -160,7 +158,7 @@ def execute_single_task(
 
 def _execute_task_dict(
     task_dict: dict,
-    interface: SocketInterface,
+    interface: Optional[SocketInterface] = None,
     cache_directory: Optional[str] = None,
     cache_key: Optional[str] = None,
     error_log_file: Optional[str] = None,
@@ -180,15 +178,17 @@ def _execute_task_dict(
     """
     if error_log_file is not None:
         task_dict["error_log_file"] = error_log_file
-    if cache_directory is None:
+    if cache_directory is None and interface is not None:
         return _execute_task_without_cache(interface=interface, task_dict=task_dict)
-    else:
+    elif cache_directory is not None and interface is not None:
         return _execute_task_with_cache(
             interface=interface,
             task_dict=task_dict,
             cache_directory=cache_directory,
             cache_key=cache_key,
         )
+    else:
+        raise ValueError()
 
 
 def _execute_task_without_cache(interface: SocketInterface, task_dict: dict) -> bool:

From 3e36a5a1b2071186265e7f1aa1a603c4f7405461 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Sun, 31 Aug 2025 13:29:05 +0200
Subject: [PATCH 50/83] consistent naming scheme

---
 executorlib/executor/flux.py                                    | 2 +-
 executorlib/executor/slurm.py                                   | 2 +-
 .../interactive/{pysqaspawner.py => spawner_pysqa.py}           | 0
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename executorlib/task_scheduler/interactive/{pysqaspawner.py => spawner_pysqa.py} (100%)

diff --git a/executorlib/executor/flux.py b/executorlib/executor/flux.py
index e8952047..4d7c2826 100644
--- a/executorlib/executor/flux.py
+++ b/executorlib/executor/flux.py
@@ -358,7 +358,7 @@ def __init__(
             import pysqa  # noqa
 
             if block_allocation:
-                from executorlib.task_scheduler.interactive.pysqaspawner import (
+                from executorlib.task_scheduler.interactive.spawner_pysqa import (
                     create_pysqa_block_allocation_scheduler,
                 )
 
diff --git a/executorlib/executor/slurm.py b/executorlib/executor/slurm.py
index 2624dd91..f0bd3342 100644
--- a/executorlib/executor/slurm.py
+++ b/executorlib/executor/slurm.py
@@ -166,7 +166,7 @@ def __init__(
             import pysqa  # noqa
 
             if block_allocation:
-                from executorlib.task_scheduler.interactive.pysqaspawner import (
+                from executorlib.task_scheduler.interactive.spawner_pysqa import (
                     create_pysqa_block_allocation_scheduler,
                 )
 
diff --git a/executorlib/task_scheduler/interactive/pysqaspawner.py b/executorlib/task_scheduler/interactive/spawner_pysqa.py
similarity index 100%
rename from executorlib/task_scheduler/interactive/pysqaspawner.py
rename to executorlib/task_scheduler/interactive/spawner_pysqa.py

From ad6ca173f8910ea8732ab027fe2bbff0ff43400e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 31 Aug 2025 15:56:13 +0000
Subject: [PATCH 51/83] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 executorlib/task_scheduler/interactive/blockallocation.py | 6 +++++-
 executorlib/task_scheduler/interactive/shared.py          | 5 ++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/blockallocation.py b/executorlib/task_scheduler/interactive/blockallocation.py
index b334b528..2142ea2f 100644
--- a/executorlib/task_scheduler/interactive/blockallocation.py
+++ b/executorlib/task_scheduler/interactive/blockallocation.py
@@ -12,7 +12,11 @@
 from executorlib.standalone.interactive.spawner import BaseSpawner, MpiExecSpawner
 from executorlib.standalone.queue import cancel_items_in_queue
 from executorlib.task_scheduler.base import TaskSchedulerBase
-from executorlib.task_scheduler.interactive.shared import execute_task_dict, task_done, reset_task_dict
+from executorlib.task_scheduler.interactive.shared import (
+    execute_task_dict,
+    reset_task_dict,
+    task_done,
+)
 
 _task_schedulder_dict: dict = {}
 
diff --git a/executorlib/task_scheduler/interactive/shared.py b/executorlib/task_scheduler/interactive/shared.py
index 38994e0d..4d61de7a 100644
--- a/executorlib/task_scheduler/interactive/shared.py
+++ b/executorlib/task_scheduler/interactive/shared.py
@@ -6,7 +6,10 @@
 from concurrent.futures._base import PENDING
 from typing import Optional
 
-from executorlib.standalone.interactive.communication import ExecutorlibSocketError, SocketInterface
+from executorlib.standalone.interactive.communication import (
+    ExecutorlibSocketError,
+    SocketInterface,
+)
 from executorlib.standalone.serialize import serialize_funct
 
 

From f1d0afffd13bc57ecafe8febed1bda50b1457e53 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Sun, 31 Aug 2025 18:05:05 +0200
Subject: [PATCH 52/83] remove duplicated task_done() call

---
 executorlib/task_scheduler/interactive/blockallocation.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/executorlib/task_scheduler/interactive/blockallocation.py b/executorlib/task_scheduler/interactive/blockallocation.py
index b334b528..1a3bdde6 100644
--- a/executorlib/task_scheduler/interactive/blockallocation.py
+++ b/executorlib/task_scheduler/interactive/blockallocation.py
@@ -261,7 +261,6 @@ def _execute_multiple_tasks(
                 cache_key=cache_key,
                 error_log_file=error_log_file,
             )
-            task_done(future_queue=future_queue)
             if not result_flag:
                 task_done(future_queue=future_queue)
                 f = task_dict.pop("future")

From 88d0cd60363fe00a01cf72e5940e08ae0b914688 Mon Sep 17 00:00:00 2001
From: jan-janssen <jan.janssen@outlook.com>
Date: Sun, 31 Aug 2025 18:33:29 +0200
Subject: [PATCH 53/83] fixes

---
 executorlib/task_scheduler/interactive/blockallocation.py | 5 ++++-
 executorlib/task_scheduler/interactive/shared.py          | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/blockallocation.py b/executorlib/task_scheduler/interactive/blockallocation.py
index 1e969603..e02cf741 100644
--- a/executorlib/task_scheduler/interactive/blockallocation.py
+++ b/executorlib/task_scheduler/interactive/blockallocation.py
@@ -234,6 +234,7 @@ def _execute_multiple_tasks(
        worker_id (int): Communicate the worker which ID was assigned to it for future reference and resource
                         distribution.
     """
+    # The interface becomes None when the job was cancelled before computing resources were allocated. 
     interface = interface_bootup(
         command_lst=get_interactive_execute_command(
             cores=cores,
@@ -259,7 +260,7 @@ def _execute_multiple_tasks(
             break
         elif "fn" in task_dict and "future" in task_dict:
             result_flag = execute_task_dict(
-                task_dict=task_dict,
+                task_dict=task_dict.copy(),  # this copy is expensive and should be fixed
                 interface=interface,
                 cache_directory=cache_directory,
                 cache_key=cache_key,
@@ -273,5 +274,7 @@ def _execute_multiple_tasks(
                 )
                 if interface is not None:
                     interface.restart()
+                else:
+                    break
             else:
                 task_done(future_queue=future_queue)
diff --git a/executorlib/task_scheduler/interactive/shared.py b/executorlib/task_scheduler/interactive/shared.py
index 4d61de7a..80fa0acc 100644
--- a/executorlib/task_scheduler/interactive/shared.py
+++ b/executorlib/task_scheduler/interactive/shared.py
@@ -45,7 +45,7 @@ def execute_task_dict(
             cache_key=cache_key,
         )
     else:
-        raise ValueError()
+        return False
 
 
 def task_done(future_queue: queue.Queue):

From 68b14082e6b0967004805f20e9bb455aa4fd1e4a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 31 Aug 2025 16:33:38 +0000
Subject: [PATCH 54/83] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 executorlib/task_scheduler/interactive/blockallocation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/executorlib/task_scheduler/interactive/blockallocation.py b/executorlib/task_scheduler/interactive/blockallocation.py
index e02cf741..f71afa41 100644
--- a/executorlib/task_scheduler/interactive/blockallocation.py
+++ b/executorlib/task_scheduler/interactive/blockallocation.py
@@ -234,7 +234,7 @@ def _execute_multiple_tasks(
        worker_id (int): Communicate the worker which ID was assigned to it for future reference and resource
                         distribution.
     """
-    # The interface becomes None when the job was cancelled before computing resources were allocated. 
+    # The interface becomes None when the job was cancelled before computing resources were allocated.
     interface = interface_bootup(
         command_lst=get_interactive_execute_command(
             cores=cores,

From 14580797e1fa8a3c6b9c159d01de74d5dfa39c3b Mon Sep 17 00:00:00 2001
From: jan-janssen <jan.janssen@outlook.com>
Date: Sun, 31 Aug 2025 18:41:06 +0200
Subject: [PATCH 55/83] cancel items in queue

---
 executorlib/task_scheduler/interactive/blockallocation.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/executorlib/task_scheduler/interactive/blockallocation.py b/executorlib/task_scheduler/interactive/blockallocation.py
index f71afa41..0c22fc15 100644
--- a/executorlib/task_scheduler/interactive/blockallocation.py
+++ b/executorlib/task_scheduler/interactive/blockallocation.py
@@ -172,12 +172,13 @@ def shutdown(self, wait: bool = True, *, cancel_futures: bool = False):
                 cancel_items_in_queue(que=self._future_queue)
             self._shutdown_flag = True
             if isinstance(self._process, list):
-                _task_schedulder_dict[self._self_id] = True
+                _task_schedulder_dict[self._self_id] = True  # This is a hard shutdown
                 for _ in range(len(self._process)):
                     self._future_queue.put({"shutdown": True, "wait": wait})
                 if wait:
                     for process in self._process:
                         process.join()
+                    cancel_items_in_queue(que=self._future_queue)
                     self._future_queue.join()
         self._process = None
         self._future_queue = None

From 095385f6c7fb0428de0fc7a63a738a5f556e2698 Mon Sep 17 00:00:00 2001
From: jan-janssen <jan.janssen@outlook.com>
Date: Sun, 31 Aug 2025 19:00:45 +0200
Subject: [PATCH 56/83] fixes

---
 .../interactive/blockallocation.py            |  5 ++-
 .../task_scheduler/interactive/onetoone.py    |  8 +++-
 .../task_scheduler/interactive/shared.py      | 42 +++++++++++++------
 3 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/blockallocation.py b/executorlib/task_scheduler/interactive/blockallocation.py
index a3d5c7c8..929a5d45 100644
--- a/executorlib/task_scheduler/interactive/blockallocation.py
+++ b/executorlib/task_scheduler/interactive/blockallocation.py
@@ -260,8 +260,10 @@ def _execute_multiple_tasks(
                 future_queue.join()
             break
         elif "fn" in task_dict and "future" in task_dict:
+            f = task_dict.pop("future")
             result_flag = execute_task_dict(
-                task_dict=task_dict.copy(),  # this copy is expensive and should be fixed
+                future_obj=f,
+                task_dict=task_dict,
                 interface=interface,
                 cache_directory=cache_directory,
                 cache_key=cache_key,
@@ -269,7 +271,6 @@ def _execute_multiple_tasks(
             )
             if not result_flag:
                 task_done(future_queue=future_queue)
-                f = task_dict.pop("future")
                 reset_task_dict(
                     future_obj=f, future_queue=future_queue, task_dict=task_dict
                 )
diff --git a/executorlib/task_scheduler/interactive/onetoone.py b/executorlib/task_scheduler/interactive/onetoone.py
index b3ffddbd..dbdd2c82 100644
--- a/executorlib/task_scheduler/interactive/onetoone.py
+++ b/executorlib/task_scheduler/interactive/onetoone.py
@@ -1,6 +1,7 @@
 import queue
 from threading import Thread
 from typing import Optional
+from concurrent.futures import Future
 
 from executorlib.standalone.command import get_interactive_execute_command
 from executorlib.standalone.interactive.communication import interface_bootup
@@ -186,6 +187,7 @@ def _wrap_execute_task_in_separate_process(
                              dictionary containing the future objects and the number of cores they require
     """
     resource_dict = task_dict.pop("resource_dict").copy()
+    f = task_dict.pop("future")
     if "cores" not in resource_dict or (
         resource_dict["cores"] == 1 and executor_kwargs["cores"] >= 1
     ):
@@ -197,12 +199,13 @@ def _wrap_execute_task_in_separate_process(
         max_cores=max_cores,
         max_workers=max_workers,
     )
-    active_task_dict[task_dict["future"]] = slots_required
+    active_task_dict[f] = slots_required
     task_kwargs = executor_kwargs.copy()
     task_kwargs.update(resource_dict)
     task_kwargs.update(
         {
             "task_dict": task_dict,
+            "future_obj": f,
             "spawner": spawner,
             "hostname_localhost": hostname_localhost,
         }
@@ -217,6 +220,7 @@ def _wrap_execute_task_in_separate_process(
 
 def _execute_task_in_thread(
     task_dict: dict,
+    future_obj: Future,
     cores: int = 1,
     spawner: type[BaseSpawner] = MpiExecSpawner,
     hostname_localhost: Optional[bool] = None,
@@ -233,6 +237,7 @@ def _execute_task_in_thread(
     Args:
         task_dict (dict): task submitted to the executor as dictionary. This dictionary has the following keys
                           {"fn": Callable, "args": (), "kwargs": {}, "resource_dict": {}}
+        future_obj (Future): A Future representing the given call.
         cores (int): defines the total number of MPI ranks to use
         spawner (BaseSpawner): Spawner to start process on selected compute resources
         hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
@@ -253,6 +258,7 @@ def _execute_task_in_thread(
     """
     execute_task_dict(
         task_dict=task_dict,
+        future_obj=future_obj,
         interface=interface_bootup(
             command_lst=get_interactive_execute_command(
                 cores=cores,
diff --git a/executorlib/task_scheduler/interactive/shared.py b/executorlib/task_scheduler/interactive/shared.py
index 80fa0acc..cf4de6ff 100644
--- a/executorlib/task_scheduler/interactive/shared.py
+++ b/executorlib/task_scheduler/interactive/shared.py
@@ -15,6 +15,7 @@
 
 def execute_task_dict(
     task_dict: dict,
+    future_obj: Future,
     interface: Optional[SocketInterface] = None,
     cache_directory: Optional[str] = None,
     cache_key: Optional[str] = None,
@@ -26,6 +27,7 @@ def execute_task_dict(
     Args:
         task_dict (dict): task submitted to the executor as dictionary. This dictionary has the following keys
                           {"fn": Callable, "args": (), "kwargs": {}, "resource_dict": {}}
+        future_obj (Future): A Future representing the given call.
         interface (SocketInterface): socket interface for zmq communication
         cache_directory (str, optional): The directory to store cache files. Defaults to "executorlib_cache".
         cache_key (str, optional): By default the cache_key is generated based on the function hash, this can be
@@ -36,30 +38,46 @@ def execute_task_dict(
     if error_log_file is not None:
         task_dict["error_log_file"] = error_log_file
     if cache_directory is None and interface is not None:
-        return _execute_task_without_cache(interface=interface, task_dict=task_dict)
+        return _execute_task_without_cache(interface=interface, task_dict=task_dict, future_obj=future_obj)
     elif cache_directory is not None and interface is not None:
         return _execute_task_with_cache(
             interface=interface,
             task_dict=task_dict,
             cache_directory=cache_directory,
             cache_key=cache_key,
+            future_obj=future_obj,
         )
     else:
         return False
 
 
 def task_done(future_queue: queue.Queue):
+    """
+    Mark the current task as done in the current queue. 
+    
+    Args:
+        future_queue (queue): Queue of task dictionaries waiting for execution.
+    """
     with contextlib.suppress(ValueError):
         future_queue.task_done()
 
 
 def reset_task_dict(future_obj: Future, future_queue: queue.Queue, task_dict: dict):
+    """
+    Reset the task dictionary for resubmission to the queue.
+
+    Args:
+        future_obj (Future): A Future representing the given call.
+        future_queue (queue): Queue of task dictionaries waiting for execution.
+        task_dict (dict): task submitted to the executor as dictionary. This dictionary has the following keys
+                          {"fn": Callable, "args": (), "kwargs": {}, "resource_dict": {}}
+    """
     future_obj._state = PENDING
     _task_done(future_queue=future_queue)
     future_queue.put(task_dict | {"future": future_obj})
 
 
-def _execute_task_without_cache(interface: SocketInterface, task_dict: dict) -> bool:
+def _execute_task_without_cache(interface: SocketInterface, task_dict: dict, future_obj: Future) -> bool:
     """
     Execute the task in the task_dict by communicating it via the interface.
 
@@ -67,23 +85,24 @@ def _execute_task_without_cache(interface: SocketInterface, task_dict: dict) ->
         interface (SocketInterface): socket interface for zmq communication
         task_dict (dict): task submitted to the executor as dictionary. This dictionary has the following keys
                           {"fn": Callable, "args": (), "kwargs": {}, "resource_dict": {}}
+        future_obj (Future): A Future representing the given call.
     """
-    f = task_dict.pop("future")
-    if not f.done() and f.set_running_or_notify_cancel():
+    if not future_obj.done() and future_obj.set_running_or_notify_cancel():
         try:
-            f.set_result(interface.send_and_receive_dict(input_dict=task_dict))
+            future_obj.set_result(interface.send_and_receive_dict(input_dict=task_dict))
         except Exception as thread_exception:
             if isinstance(thread_exception, ExecutorlibSocketError):
                 return False
             else:
                 interface.shutdown(wait=True)
-                f.set_exception(exception=thread_exception)
+                future_obj.set_exception(exception=thread_exception)
     return True
 
 
 def _execute_task_with_cache(
     interface: SocketInterface,
     task_dict: dict,
+    future_obj: Future,
     cache_directory: str,
     cache_key: Optional[str] = None,
 ) -> bool:
@@ -94,6 +113,7 @@ def _execute_task_with_cache(
         interface (SocketInterface): socket interface for zmq communication
         task_dict (dict): task submitted to the executor as dictionary. This dictionary has the following keys
                           {"fn": Callable, "args": (), "kwargs": {}, "resource_dict": {}}
+        future_obj (Future): A Future representing the given call.
         cache_directory (str): The directory to store cache files.
         cache_key (str, optional): By default the cache_key is generated based on the function hash, this can be
                                   overwritten by setting the cache_key.
@@ -109,25 +129,23 @@ def _execute_task_with_cache(
     )
     file_name = os.path.abspath(os.path.join(cache_directory, task_key + "_o.h5"))
     if file_name not in get_cache_files(cache_directory=cache_directory):
-        f = task_dict.pop("future")
-        if f.set_running_or_notify_cancel():
+        if future_obj.set_running_or_notify_cancel():
             try:
                 time_start = time.time()
                 result = interface.send_and_receive_dict(input_dict=task_dict)
                 data_dict["output"] = result
                 data_dict["runtime"] = time.time() - time_start
                 dump(file_name=file_name, data_dict=data_dict)
-                f.set_result(result)
+                future_obj.set_result(result)
             except Exception as thread_exception:
                 if isinstance(thread_exception, ExecutorlibSocketError):
                     return False
                 else:
                     interface.shutdown(wait=True)
-                    f.set_exception(exception=thread_exception)
+                    future_obj.set_exception(exception=thread_exception)
     else:
         _, _, result = get_output(file_name=file_name)
-        future = task_dict["future"]
-        future.set_result(result)
+        future_obj.set_result(result)
     return True
 
 

From ae7ac003f1e2c5442b95cdecafa1224e711e9383 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 31 Aug 2025 17:00:54 +0000
Subject: [PATCH 57/83] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 executorlib/task_scheduler/interactive/onetoone.py |  2 +-
 executorlib/task_scheduler/interactive/shared.py   | 12 ++++++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/onetoone.py b/executorlib/task_scheduler/interactive/onetoone.py
index dbdd2c82..c6f678cd 100644
--- a/executorlib/task_scheduler/interactive/onetoone.py
+++ b/executorlib/task_scheduler/interactive/onetoone.py
@@ -1,7 +1,7 @@
 import queue
+from concurrent.futures import Future
 from threading import Thread
 from typing import Optional
-from concurrent.futures import Future
 
 from executorlib.standalone.command import get_interactive_execute_command
 from executorlib.standalone.interactive.communication import interface_bootup
diff --git a/executorlib/task_scheduler/interactive/shared.py b/executorlib/task_scheduler/interactive/shared.py
index cf4de6ff..74032551 100644
--- a/executorlib/task_scheduler/interactive/shared.py
+++ b/executorlib/task_scheduler/interactive/shared.py
@@ -38,7 +38,9 @@ def execute_task_dict(
     if error_log_file is not None:
         task_dict["error_log_file"] = error_log_file
     if cache_directory is None and interface is not None:
-        return _execute_task_without_cache(interface=interface, task_dict=task_dict, future_obj=future_obj)
+        return _execute_task_without_cache(
+            interface=interface, task_dict=task_dict, future_obj=future_obj
+        )
     elif cache_directory is not None and interface is not None:
         return _execute_task_with_cache(
             interface=interface,
@@ -53,8 +55,8 @@ def execute_task_dict(
 
 def task_done(future_queue: queue.Queue):
     """
-    Mark the current task as done in the current queue. 
-    
+    Mark the current task as done in the current queue.
+
     Args:
         future_queue (queue): Queue of task dictionaries waiting for execution.
     """
@@ -77,7 +79,9 @@ def reset_task_dict(future_obj: Future, future_queue: queue.Queue, task_dict: di
     future_queue.put(task_dict | {"future": future_obj})
 
 
-def _execute_task_without_cache(interface: SocketInterface, task_dict: dict, future_obj: Future) -> bool:
+def _execute_task_without_cache(
+    interface: SocketInterface, task_dict: dict, future_obj: Future
+) -> bool:
     """
     Execute the task in the task_dict by communicating it via the interface.
 

From c7f9eaf50392bb48470e5d6fc6bfb5021ae8ac89 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Sun, 31 Aug 2025 20:17:15 +0200
Subject: [PATCH 58/83] fix return

---
 executorlib/task_scheduler/interactive/shared.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/executorlib/task_scheduler/interactive/shared.py b/executorlib/task_scheduler/interactive/shared.py
index 2ba6126b..3cf20362 100644
--- a/executorlib/task_scheduler/interactive/shared.py
+++ b/executorlib/task_scheduler/interactive/shared.py
@@ -102,6 +102,7 @@ def _execute_task_without_cache(
         else:
             interface.shutdown(wait=True)
             future_obj.set_exception(exception=thread_exception)
+    return True
 
 
 def _execute_task_with_cache(

From d4babd8d651faf8f2597620b5fd66bbfe54f4ac1 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Sun, 31 Aug 2025 20:21:50 +0200
Subject: [PATCH 59/83] fix duplicated arguments

---
 executorlib/task_scheduler/interactive/onetoone.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/executorlib/task_scheduler/interactive/onetoone.py b/executorlib/task_scheduler/interactive/onetoone.py
index 74182203..c6f678cd 100644
--- a/executorlib/task_scheduler/interactive/onetoone.py
+++ b/executorlib/task_scheduler/interactive/onetoone.py
@@ -208,7 +208,6 @@ def _wrap_execute_task_in_separate_process(
             "future_obj": f,
             "spawner": spawner,
             "hostname_localhost": hostname_localhost,
-            "future_obj": f,
         }
     )
     process = Thread(

From 72da39d43c523e7edc5f3ce92952f974539eeb90 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Sun, 31 Aug 2025 20:44:48 +0200
Subject: [PATCH 60/83] resort

---
 executorlib/task_scheduler/interactive/onetoone.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/executorlib/task_scheduler/interactive/onetoone.py b/executorlib/task_scheduler/interactive/onetoone.py
index c6f678cd..3b631565 100644
--- a/executorlib/task_scheduler/interactive/onetoone.py
+++ b/executorlib/task_scheduler/interactive/onetoone.py
@@ -205,9 +205,9 @@ def _wrap_execute_task_in_separate_process(
     task_kwargs.update(
         {
             "task_dict": task_dict,
-            "future_obj": f,
             "spawner": spawner,
             "hostname_localhost": hostname_localhost,
+            "future_obj": f,
         }
     )
     process = Thread(

From 60e2deeed161ca5a5cfd8d72eacd26bea0fefeea Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Sun, 31 Aug 2025 21:18:39 +0200
Subject: [PATCH 61/83] remove unused statement

---
 executorlib/task_scheduler/interactive/blockallocation.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/executorlib/task_scheduler/interactive/blockallocation.py b/executorlib/task_scheduler/interactive/blockallocation.py
index 929a5d45..756068ce 100644
--- a/executorlib/task_scheduler/interactive/blockallocation.py
+++ b/executorlib/task_scheduler/interactive/blockallocation.py
@@ -170,7 +170,6 @@ def shutdown(self, wait: bool = True, *, cancel_futures: bool = False):
         if self._future_queue is not None:
             if cancel_futures:
                 cancel_items_in_queue(que=self._future_queue)
-            self._shutdown_flag = True
             if isinstance(self._process, list):
                 _task_schedulder_dict[self._self_id] = True  # This is a hard shutdown
                 for _ in range(len(self._process)):

From dbf3e65224a681f01b213b3833f49b203b1d5ba6 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Sun, 31 Aug 2025 21:55:49 +0200
Subject: [PATCH 62/83] rename variable

---
 executorlib/task_scheduler/interactive/blockallocation.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/blockallocation.py b/executorlib/task_scheduler/interactive/blockallocation.py
index 756068ce..fdb1e5c6 100644
--- a/executorlib/task_scheduler/interactive/blockallocation.py
+++ b/executorlib/task_scheduler/interactive/blockallocation.py
@@ -18,7 +18,7 @@
     task_done,
 )
 
-_task_schedulder_dict: dict = {}
+_interrupt_interface_bootup_dict: dict = {}
 
 
 class BlockAllocationTaskScheduler(TaskSchedulerBase):
@@ -71,7 +71,7 @@ def __init__(
         self._max_workers = max_workers
         self_id = id(self)
         self._self_id = self_id
-        _task_schedulder_dict[self._self_id] = False
+        _interrupt_interface_bootup_dict[self._self_id] = False
         self._set_process(
             process=[
                 Thread(
@@ -79,7 +79,7 @@ def __init__(
                     kwargs=executor_kwargs
                     | {
                         "worker_id": worker_id,
-                        "stop_function": lambda: _task_schedulder_dict[self_id],
+                        "stop_function": lambda: _interrupt_interface_bootup_dict[self_id],
                     },
                 )
                 for worker_id in range(self._max_workers)
@@ -171,7 +171,7 @@ def shutdown(self, wait: bool = True, *, cancel_futures: bool = False):
             if cancel_futures:
                 cancel_items_in_queue(que=self._future_queue)
             if isinstance(self._process, list):
-                _task_schedulder_dict[self._self_id] = True  # This is a hard shutdown
+                _interrupt_interface_bootup_dict[self._self_id] = True
                 for _ in range(len(self._process)):
                     self._future_queue.put({"shutdown": True, "wait": wait})
                 if wait:

From 6b73ab7b928299aac05f67bdbe94bd9ac7b887f2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 31 Aug 2025 19:55:57 +0000
Subject: [PATCH 63/83] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 executorlib/task_scheduler/interactive/blockallocation.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/executorlib/task_scheduler/interactive/blockallocation.py b/executorlib/task_scheduler/interactive/blockallocation.py
index fdb1e5c6..bf74ff5b 100644
--- a/executorlib/task_scheduler/interactive/blockallocation.py
+++ b/executorlib/task_scheduler/interactive/blockallocation.py
@@ -79,7 +79,9 @@ def __init__(
                     kwargs=executor_kwargs
                     | {
                         "worker_id": worker_id,
-                        "stop_function": lambda: _interrupt_interface_bootup_dict[self_id],
+                        "stop_function": lambda: _interrupt_interface_bootup_dict[
+                            self_id
+                        ],
                     },
                 )
                 for worker_id in range(self._max_workers)

From fd9b630c8d0a0bbe5e4cd7235c7fed0687b3f028 Mon Sep 17 00:00:00 2001
From: Jan Janssen <jan-janssen@users.noreply.github.com>
Date: Sun, 31 Aug 2025 22:28:47 +0200
Subject: [PATCH 64/83] Update shared.py

---
 executorlib/task_scheduler/interactive/shared.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/shared.py b/executorlib/task_scheduler/interactive/shared.py
index 3cf20362..b81a1c93 100644
--- a/executorlib/task_scheduler/interactive/shared.py
+++ b/executorlib/task_scheduler/interactive/shared.py
@@ -152,8 +152,3 @@ def _execute_task_with_cache(
         _, _, result = get_output(file_name=file_name)
         future_obj.set_result(result)
     return True
-
-
-def _task_done(future_queue: queue.Queue):
-    with contextlib.suppress(ValueError):
-        future_queue.task_done()

From b60d3a205957530a03e08985613026a3e5952037 Mon Sep 17 00:00:00 2001
From: Jan Janssen <jan-janssen@users.noreply.github.com>
Date: Sun, 31 Aug 2025 22:30:22 +0200
Subject: [PATCH 65/83] Update blockallocation.py

---
 .../task_scheduler/interactive/blockallocation.py   | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/blockallocation.py b/executorlib/task_scheduler/interactive/blockallocation.py
index bf74ff5b..54aaf4cc 100644
--- a/executorlib/task_scheduler/interactive/blockallocation.py
+++ b/executorlib/task_scheduler/interactive/blockallocation.py
@@ -18,7 +18,7 @@
     task_done,
 )
 
-_interrupt_interface_bootup_dict: dict = {}
+_interrupt_bootup_dict: dict = {}
 
 
 class BlockAllocationTaskScheduler(TaskSchedulerBase):
@@ -71,17 +71,14 @@ def __init__(
         self._max_workers = max_workers
         self_id = id(self)
         self._self_id = self_id
-        _interrupt_interface_bootup_dict[self._self_id] = False
+        _interrupt_bootup_dict[self._self_id] = False
         self._set_process(
             process=[
                 Thread(
                     target=_execute_multiple_tasks,
-                    kwargs=executor_kwargs
-                    | {
+                    kwargs=executor_kwargs | {
                         "worker_id": worker_id,
-                        "stop_function": lambda: _interrupt_interface_bootup_dict[
-                            self_id
-                        ],
+                        "stop_function": lambda: _interrupt_bootup_dict[self_id],
                     },
                 )
                 for worker_id in range(self._max_workers)
@@ -173,7 +170,7 @@ def shutdown(self, wait: bool = True, *, cancel_futures: bool = False):
             if cancel_futures:
                 cancel_items_in_queue(que=self._future_queue)
             if isinstance(self._process, list):
-                _interrupt_interface_bootup_dict[self._self_id] = True
+                _interrupt_bootup_dict[self._self_id] = True
                 for _ in range(len(self._process)):
                     self._future_queue.put({"shutdown": True, "wait": wait})
                 if wait:

From c27713f2b0de95cbbc6a5f3feea380cebe29d4b2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 31 Aug 2025 20:30:27 +0000
Subject: [PATCH 66/83] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 executorlib/task_scheduler/interactive/blockallocation.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/executorlib/task_scheduler/interactive/blockallocation.py b/executorlib/task_scheduler/interactive/blockallocation.py
index 54aaf4cc..5338deba 100644
--- a/executorlib/task_scheduler/interactive/blockallocation.py
+++ b/executorlib/task_scheduler/interactive/blockallocation.py
@@ -76,7 +76,8 @@ def __init__(
             process=[
                 Thread(
                     target=_execute_multiple_tasks,
-                    kwargs=executor_kwargs | {
+                    kwargs=executor_kwargs
+                    | {
                         "worker_id": worker_id,
                         "stop_function": lambda: _interrupt_bootup_dict[self_id],
                     },

From a647574878a99f02e321b2a59f594b5e1bc1e273 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Mon, 8 Sep 2025 09:46:10 +0200
Subject: [PATCH 67/83] Add docstrings

---
 .../interactive/spawner_pysqa.py               | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/executorlib/task_scheduler/interactive/spawner_pysqa.py b/executorlib/task_scheduler/interactive/spawner_pysqa.py
index 31f57c8b..c2f60d70 100644
--- a/executorlib/task_scheduler/interactive/spawner_pysqa.py
+++ b/executorlib/task_scheduler/interactive/spawner_pysqa.py
@@ -34,9 +34,17 @@ def __init__(
 
         Args:
             cwd (str, optional): The current working directory. Defaults to None.
-            cores (int, optional): The number of cores to use. Defaults to 1.
-            threads_per_core (int, optional): The number of threads per core. Defaults to 1.
-            openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
+            cores (int): The number of cores to use. Defaults to 1.
+            threads_per_core (int): The number of threads per core. Defaults to 1.
+            gpus_per_core (int): number of GPUs per worker - defaults to 0
+            num_nodes (int, optional): The number of compute nodes to use for executing the task.  Defaults to None.
+            exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults 
+                              to False.
+            openmpi_oversubscribe (bool): Whether to oversubscribe the cores. Defaults to False.
+            slurm_cmd_args (list, optional): Additional command line arguments for the srun call (SLURM only)
+            pmi_mode (str, optional): PMI interface to use (OpenMPI v5 requires pmix) default is None
+            config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
+            backend (str): name of the backend used to spawn tasks.
         """
         super().__init__(
             cwd=cwd,
@@ -65,6 +73,10 @@ def bootup(
 
         Args:
             command_lst (list[str]): The command list to execute.
+            stop_function (Callable): Function to stop the interface.
+
+        Returns:
+            bool: Whether the interface was successfully started.
         """
         self._queue_adapter = QueueAdapter(
             directory=self._config_directory,

From 4e097b6cfdff14da6d6d940a5d4de47f890cfcbe Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Mon, 8 Sep 2025 09:46:25 +0200
Subject: [PATCH 68/83] test for generate_command()

---
 tests/test_standalone_interactive_backend.py | 42 ++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/tests/test_standalone_interactive_backend.py b/tests/test_standalone_interactive_backend.py
index f4b46e30..efdaf6af 100644
--- a/tests/test_standalone_interactive_backend.py
+++ b/tests/test_standalone_interactive_backend.py
@@ -6,6 +6,13 @@
 from executorlib.standalone.interactive.spawner import MpiExecSpawner
 from executorlib.task_scheduler.interactive.spawner_slurm import SrunSpawner
 
+try:
+    from executorlib.task_scheduler.interactive.spawner_pysqa import PysqaSpawner
+
+    skip_pysqa_test = False
+except ImportError:
+    skip_pysqa_test = True
+
 
 class TestParser(unittest.TestCase):
     def test_command_local(self):
@@ -121,3 +128,38 @@ def test_command_slurm_user_command(self):
             ),
         )
         self.assertEqual(result_dict, parse_arguments(command_lst))
+
+    @unittest.skipIf(skip_pysqa_test, "pysqa is not installed, so the pysqa tests are skipped.")
+    def test_command_pysqa(self):
+        interface_slurm = PysqaSpawner(backend="slurm", cores=2, pmi_mode="pmix", num_nodes=2, threads_per_core=2, gpus_per_core=1, exclusive=True, openmpi_oversubscribe=True, slurm_cmd_args=["test"])
+        output = ['srun', '-n', '2', '--mpi=pmix', '-N', '2', '--cpus-per-task=2', '--gpus-per-task=1', '--exact', '--oversubscribe', 'test']
+        self.assertEqual(interface_slurm.generate_command(command_lst=[]), output)
+
+        interface_flux = PysqaSpawner(backend="flux", cores=2, pmi_mode="pmix")
+        output = ['flux', 'run', '-n', '2', '-o', 'pmi=pmix']
+        self.assertEqual(interface_flux.generate_command(command_lst=[]), output)
+
+        interface_flux = PysqaSpawner(backend="flux", cores=2, pmi_mode="pmix", num_nodes=2)
+        with self.assertRaises(ValueError):
+            interface_flux.generate_command(command_lst=[])
+
+        interface_flux = PysqaSpawner(backend="flux", cores=2, pmi_mode="pmix", threads_per_core=2)
+        with self.assertRaises(ValueError):
+            interface_flux.generate_command(command_lst=[])
+
+        interface_flux = PysqaSpawner(backend="flux", cores=2, pmi_mode="pmix", gpus_per_core=1)
+        with self.assertRaises(ValueError):
+            interface_flux.generate_command(command_lst=[])
+
+        interface_flux = PysqaSpawner(backend="flux", cores=2, pmi_mode="pmix", exclusive=True)
+        with self.assertRaises(ValueError):
+            interface_flux.generate_command(command_lst=[])
+
+        interface_flux = PysqaSpawner(backend="flux", cores=2, pmi_mode="pmix", openmpi_oversubscribe=True)
+        with self.assertRaises(ValueError):
+            interface_flux.generate_command(command_lst=[])
+
+        interface_nobackend = PysqaSpawner(cores=2)
+        with self.assertRaises(ValueError):
+            interface_nobackend.generate_command(command_lst=[])
+        
\ No newline at end of file

From 1c0148a1517bb125910f627a8a25b82767a74c9c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 8 Sep 2025 07:46:34 +0000
Subject: [PATCH 69/83] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 executorlib/task_scheduler/interactive/spawner_pysqa.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/executorlib/task_scheduler/interactive/spawner_pysqa.py b/executorlib/task_scheduler/interactive/spawner_pysqa.py
index c2f60d70..8fb2ccd6 100644
--- a/executorlib/task_scheduler/interactive/spawner_pysqa.py
+++ b/executorlib/task_scheduler/interactive/spawner_pysqa.py
@@ -38,7 +38,7 @@ def __init__(
             threads_per_core (int): The number of threads per core. Defaults to 1.
             gpus_per_core (int): number of GPUs per worker - defaults to 0
             num_nodes (int, optional): The number of compute nodes to use for executing the task.  Defaults to None.
-            exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults 
+            exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults
                               to False.
             openmpi_oversubscribe (bool): Whether to oversubscribe the cores. Defaults to False.
             slurm_cmd_args (list, optional): Additional command line arguments for the srun call (SLURM only)

From 62d48987e7c279badf67e7a6a4d5b55891a3b73f Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Mon, 8 Sep 2025 11:36:58 +0200
Subject: [PATCH 70/83] Add more tests

---
 tests/test_fluxclusterexecutor.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/tests/test_fluxclusterexecutor.py b/tests/test_fluxclusterexecutor.py
index 54b64ff1..8a89faf4 100644
--- a/tests/test_fluxclusterexecutor.py
+++ b/tests/test_fluxclusterexecutor.py
@@ -14,6 +14,7 @@
     from executorlib.standalone.hdf import dump
     from executorlib.task_scheduler.file.spawner_pysqa import execute_with_pysqa
     from executorlib.standalone.scheduler import terminate_with_pysqa
+    from executorlib.task_scheduler.interactive.spawner_pysqa import PysqaSpawner
 
     skip_flux_test = "FLUX_URI" not in os.environ
     pmi = os.environ.get("EXECUTORLIB_PMIX", None)
@@ -37,6 +38,10 @@ def mpi_funct(i):
     return i, size, rank
 
 
+def stop_function():
+    return True
+
+
 @unittest.skipIf(
     skip_flux_test or skip_mpi4py_test,
     "h5py or mpi4py or flux are not installed, so the h5py, flux and mpi4py tests are skipped.",
@@ -161,3 +166,17 @@ def test_terminate_tasks_in_cache(self):
 
     def tearDown(self):
         shutil.rmtree("executorlib_cache", ignore_errors=True)
+
+
+@unittest.skipIf(
+    skip_flux_test,
+    "flux is not installed, so the flux tests are skipped.",
+)
+class TestPysqaSpawner(unittest.TestCase):
+    def test_pysqa_spawner_sleep(self):
+        interface_flux = PysqaSpawner(backend="flux", cores=1)
+        self.assertTrue(interface_flux.bootup(command_lst=["sleep", "1"]))
+
+    def test_pysqa_spawner_stop_function(self):
+        interface_flux = PysqaSpawner(backend="flux", cores=1)
+        self.assertFalse(interface_flux.bootup(command_lst=["exit"], stop_function=stop_function))

From dba3b48a147fa8f434e0929fef90814479cf3477 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Mon, 8 Sep 2025 11:40:15 +0200
Subject: [PATCH 71/83] smaller tests

---
 tests/test_fluxclusterexecutor.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/tests/test_fluxclusterexecutor.py b/tests/test_fluxclusterexecutor.py
index 8a89faf4..2a0f39ae 100644
--- a/tests/test_fluxclusterexecutor.py
+++ b/tests/test_fluxclusterexecutor.py
@@ -38,10 +38,6 @@ def mpi_funct(i):
     return i, size, rank
 
 
-def stop_function():
-    return True
-
-
 @unittest.skipIf(
     skip_flux_test or skip_mpi4py_test,
     "h5py or mpi4py or flux are not installed, so the h5py, flux and mpi4py tests are skipped.",
@@ -176,7 +172,3 @@ class TestPysqaSpawner(unittest.TestCase):
     def test_pysqa_spawner_sleep(self):
         interface_flux = PysqaSpawner(backend="flux", cores=1)
         self.assertTrue(interface_flux.bootup(command_lst=["sleep", "1"]))
-
-    def test_pysqa_spawner_stop_function(self):
-        interface_flux = PysqaSpawner(backend="flux", cores=1)
-        self.assertFalse(interface_flux.bootup(command_lst=["exit"], stop_function=stop_function))

From ad4e45c47711ee983561465eeaeeffa8e2f69488 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Mon, 8 Sep 2025 11:42:54 +0200
Subject: [PATCH 72/83] submit a big job

---
 tests/test_fluxclusterexecutor.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/test_fluxclusterexecutor.py b/tests/test_fluxclusterexecutor.py
index 2a0f39ae..4d6bab66 100644
--- a/tests/test_fluxclusterexecutor.py
+++ b/tests/test_fluxclusterexecutor.py
@@ -38,6 +38,10 @@ def mpi_funct(i):
     return i, size, rank
 
 
+def stop_function():
+    return True
+
+
 @unittest.skipIf(
     skip_flux_test or skip_mpi4py_test,
     "h5py or mpi4py or flux are not installed, so the h5py, flux and mpi4py tests are skipped.",
@@ -172,3 +176,7 @@ class TestPysqaSpawner(unittest.TestCase):
     def test_pysqa_spawner_sleep(self):
         interface_flux = PysqaSpawner(backend="flux", cores=1)
         self.assertTrue(interface_flux.bootup(command_lst=["sleep", "1"]))
+
+    def test_pysqa_spawner_big(self):
+        interface_flux = PysqaSpawner(backend="flux", cores=100)
+        self.assertFalse(interface_flux.bootup(command_lst=["sleep", "1"], stop_function=stop_function))

From 624856182d1cdffd4dc26cd10309b32e34d0efbf Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Mon, 8 Sep 2025 11:59:00 +0200
Subject: [PATCH 73/83] extend tests

---
 tests/test_fluxclusterexecutor.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/test_fluxclusterexecutor.py b/tests/test_fluxclusterexecutor.py
index 4d6bab66..c0bc6458 100644
--- a/tests/test_fluxclusterexecutor.py
+++ b/tests/test_fluxclusterexecutor.py
@@ -176,6 +176,19 @@ class TestPysqaSpawner(unittest.TestCase):
     def test_pysqa_spawner_sleep(self):
         interface_flux = PysqaSpawner(backend="flux", cores=1)
         self.assertTrue(interface_flux.bootup(command_lst=["sleep", "1"]))
+        self.assertTrue(interface_flux._check_process_helper(command_lst=[]))
+        self.assertTrue(interface_flux.poll())
+        process_id = interface_flux._process
+        interface_flux.shutdown(wait=True)
+        interface_flux._process = process_id
+        self.assertFalse(interface_flux.poll())
+        self.assertFalse(interface_flux._check_process_helper(command_lst=["sleep", "1"]))
+        self.assertTrue(interface_flux.poll())
+
+    def test_pysqa_spawner_error(self):
+        interface_flux = PysqaSpawner(backend="flux", cores=1)
+        with self.assertRaises(RuntimeError):
+            interface_flux.bootup(command_lst=["--unknonwn", "1"])
 
     def test_pysqa_spawner_big(self):
         interface_flux = PysqaSpawner(backend="flux", cores=100)

From 1e2d21c031727feddaf7d4e04cacbc50ef766ade Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Mon, 8 Sep 2025 12:03:57 +0200
Subject: [PATCH 74/83] no command

---
 tests/test_fluxclusterexecutor.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_fluxclusterexecutor.py b/tests/test_fluxclusterexecutor.py
index c0bc6458..a21b62d5 100644
--- a/tests/test_fluxclusterexecutor.py
+++ b/tests/test_fluxclusterexecutor.py
@@ -183,12 +183,11 @@ def test_pysqa_spawner_sleep(self):
         interface_flux._process = process_id
         self.assertFalse(interface_flux.poll())
         self.assertFalse(interface_flux._check_process_helper(command_lst=["sleep", "1"]))
-        self.assertTrue(interface_flux.poll())
 
     def test_pysqa_spawner_error(self):
         interface_flux = PysqaSpawner(backend="flux", cores=1)
         with self.assertRaises(RuntimeError):
-            interface_flux.bootup(command_lst=["--unknonwn", "1"])
+            interface_flux.bootup(command_lst=[])
 
     def test_pysqa_spawner_big(self):
         interface_flux = PysqaSpawner(backend="flux", cores=100)

From 10db91ce09372d65ba3d4edd110646e5ad9c6708 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Mon, 8 Sep 2025 12:07:51 +0200
Subject: [PATCH 75/83] remove error test

---
 tests/test_fluxclusterexecutor.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tests/test_fluxclusterexecutor.py b/tests/test_fluxclusterexecutor.py
index a21b62d5..04680c34 100644
--- a/tests/test_fluxclusterexecutor.py
+++ b/tests/test_fluxclusterexecutor.py
@@ -184,11 +184,6 @@ def test_pysqa_spawner_sleep(self):
         self.assertFalse(interface_flux.poll())
         self.assertFalse(interface_flux._check_process_helper(command_lst=["sleep", "1"]))
 
-    def test_pysqa_spawner_error(self):
-        interface_flux = PysqaSpawner(backend="flux", cores=1)
-        with self.assertRaises(RuntimeError):
-            interface_flux.bootup(command_lst=[])
-
     def test_pysqa_spawner_big(self):
         interface_flux = PysqaSpawner(backend="flux", cores=100)
         self.assertFalse(interface_flux.bootup(command_lst=["sleep", "1"], stop_function=stop_function))

From 8daa42c8ad31a3be43da2332d80930462e70f85c Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Mon, 8 Sep 2025 12:16:36 +0200
Subject: [PATCH 76/83] extend tests

---
 tests/test_standalone_interactive_backend.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tests/test_standalone_interactive_backend.py b/tests/test_standalone_interactive_backend.py
index efdaf6af..b146a526 100644
--- a/tests/test_standalone_interactive_backend.py
+++ b/tests/test_standalone_interactive_backend.py
@@ -7,7 +7,7 @@
 from executorlib.task_scheduler.interactive.spawner_slurm import SrunSpawner
 
 try:
-    from executorlib.task_scheduler.interactive.spawner_pysqa import PysqaSpawner
+    from executorlib.task_scheduler.interactive.spawner_pysqa import PysqaSpawner, create_pysqa_block_allocation_scheduler
 
     skip_pysqa_test = False
 except ImportError:
@@ -135,6 +135,9 @@ def test_command_pysqa(self):
         output = ['srun', '-n', '2', '--mpi=pmix', '-N', '2', '--cpus-per-task=2', '--gpus-per-task=1', '--exact', '--oversubscribe', 'test']
         self.assertEqual(interface_slurm.generate_command(command_lst=[]), output)
 
+        with self.assertRaises(RuntimeError):
+            interface_slurm.bootup(command_lst=["sleep", "1"])
+
         interface_flux = PysqaSpawner(backend="flux", cores=2, pmi_mode="pmix")
         output = ['flux', 'run', '-n', '2', '-o', 'pmi=pmix']
         self.assertEqual(interface_flux.generate_command(command_lst=[]), output)
@@ -162,4 +165,7 @@ def test_command_pysqa(self):
         interface_nobackend = PysqaSpawner(cores=2)
         with self.assertRaises(ValueError):
             interface_nobackend.generate_command(command_lst=[])
+
+        with self.assertRaises(ValueError):
+            create_pysqa_block_allocation_scheduler()
         
\ No newline at end of file

From 77ae767f341d4d28f8b6cf4e3ed09dd36cc25edd Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Mon, 8 Sep 2025 12:19:32 +0200
Subject: [PATCH 77/83] change error name

---
 tests/test_standalone_interactive_backend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_standalone_interactive_backend.py b/tests/test_standalone_interactive_backend.py
index b146a526..bdbb9922 100644
--- a/tests/test_standalone_interactive_backend.py
+++ b/tests/test_standalone_interactive_backend.py
@@ -166,6 +166,6 @@ def test_command_pysqa(self):
         with self.assertRaises(ValueError):
             interface_nobackend.generate_command(command_lst=[])
 
-        with self.assertRaises(ValueError):
+        with self.assertRaises(FileNotFoundError):
             create_pysqa_block_allocation_scheduler()
         
\ No newline at end of file

From 54c5a23efdd4559f8ad5558a9bac2f4175923512 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Mon, 8 Sep 2025 12:21:52 +0200
Subject: [PATCH 78/83] check more errors

---
 tests/test_standalone_interactive_backend.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/test_standalone_interactive_backend.py b/tests/test_standalone_interactive_backend.py
index bdbb9922..7be32234 100644
--- a/tests/test_standalone_interactive_backend.py
+++ b/tests/test_standalone_interactive_backend.py
@@ -166,6 +166,9 @@ def test_command_pysqa(self):
         with self.assertRaises(ValueError):
             interface_nobackend.generate_command(command_lst=[])
 
+        with self.assertRaises(RuntimeError):
+            interface_nobackend._check_process_helper(command_lst=[])
+
         with self.assertRaises(FileNotFoundError):
             create_pysqa_block_allocation_scheduler()
         
\ No newline at end of file

From 676b4ecedc0c4d774cbfd241cb396932e6f19c92 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Mon, 8 Sep 2025 12:27:58 +0200
Subject: [PATCH 79/83] clean up

---
 tests/test_standalone_interactive_backend.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/tests/test_standalone_interactive_backend.py b/tests/test_standalone_interactive_backend.py
index 7be32234..3279c8fd 100644
--- a/tests/test_standalone_interactive_backend.py
+++ b/tests/test_standalone_interactive_backend.py
@@ -7,7 +7,7 @@
 from executorlib.task_scheduler.interactive.spawner_slurm import SrunSpawner
 
 try:
-    from executorlib.task_scheduler.interactive.spawner_pysqa import PysqaSpawner, create_pysqa_block_allocation_scheduler
+    from executorlib.task_scheduler.interactive.spawner_pysqa import PysqaSpawner
 
     skip_pysqa_test = False
 except ImportError:
@@ -135,7 +135,7 @@ def test_command_pysqa(self):
         output = ['srun', '-n', '2', '--mpi=pmix', '-N', '2', '--cpus-per-task=2', '--gpus-per-task=1', '--exact', '--oversubscribe', 'test']
         self.assertEqual(interface_slurm.generate_command(command_lst=[]), output)
 
-        with self.assertRaises(RuntimeError):
+        with self.assertRaises(FileNotFoundError):
             interface_slurm.bootup(command_lst=["sleep", "1"])
 
         interface_flux = PysqaSpawner(backend="flux", cores=2, pmi_mode="pmix")
@@ -168,7 +168,4 @@ def test_command_pysqa(self):
 
         with self.assertRaises(RuntimeError):
             interface_nobackend._check_process_helper(command_lst=[])
-
-        with self.assertRaises(FileNotFoundError):
-            create_pysqa_block_allocation_scheduler()
         
\ No newline at end of file

From 9b497d27d51a59ba56894852276c818348d6d56e Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Mon, 8 Sep 2025 12:33:17 +0200
Subject: [PATCH 80/83] extend tests

---
 tests/test_standalone_interactive_backend.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/test_standalone_interactive_backend.py b/tests/test_standalone_interactive_backend.py
index 3279c8fd..9fa00536 100644
--- a/tests/test_standalone_interactive_backend.py
+++ b/tests/test_standalone_interactive_backend.py
@@ -7,7 +7,7 @@
 from executorlib.task_scheduler.interactive.spawner_slurm import SrunSpawner
 
 try:
-    from executorlib.task_scheduler.interactive.spawner_pysqa import PysqaSpawner
+    from executorlib.task_scheduler.interactive.spawner_pysqa import PysqaSpawner, create_pysqa_block_allocation_scheduler
 
     skip_pysqa_test = False
 except ImportError:
@@ -168,4 +168,9 @@ def test_command_pysqa(self):
 
         with self.assertRaises(RuntimeError):
             interface_nobackend._check_process_helper(command_lst=[])
-        
\ No newline at end of file
+
+        with self.assertRaises(KeyError):
+            create_pysqa_block_allocation_scheduler()
+
+        with self.assertRaises(ValueError):
+            create_pysqa_block_allocation_scheduler(resource_dict={"cwd": "."})
\ No newline at end of file

From 6c624bdf95d4c293df002129ad8c757f734951e6 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Mon, 8 Sep 2025 12:38:58 +0200
Subject: [PATCH 81/83] more tests

---
 .../task_scheduler/interactive/spawner_pysqa.py    |  3 ++-
 tests/test_slurmclusterexecutor.py                 | 14 ++++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/executorlib/task_scheduler/interactive/spawner_pysqa.py b/executorlib/task_scheduler/interactive/spawner_pysqa.py
index 8fb2ccd6..b91178be 100644
--- a/executorlib/task_scheduler/interactive/spawner_pysqa.py
+++ b/executorlib/task_scheduler/interactive/spawner_pysqa.py
@@ -223,7 +223,8 @@ def create_pysqa_block_allocation_scheduler(
     if resource_dict is None:
         resource_dict = {}
     cores_per_worker = resource_dict.get("cores", 1)
-    resource_dict["cwd"] = os.path.abspath(resource_dict["cwd"])
+    if "cwd" in resource_dict and resource_dict["cwd"] is not None:
+        resource_dict["cwd"] = os.path.abspath(resource_dict["cwd"])
     if cache_directory is not None:
         resource_dict["cache_directory"] = os.path.abspath(cache_directory)
     else:
diff --git a/tests/test_slurmclusterexecutor.py b/tests/test_slurmclusterexecutor.py
index a26524e7..704b3a91 100644
--- a/tests/test_slurmclusterexecutor.py
+++ b/tests/test_slurmclusterexecutor.py
@@ -20,6 +20,13 @@
 except ImportError:
     skip_h5py_test = True
 
+try:
+    import pysqa
+
+    skip_pysqa_test = False
+except ImportError:
+    skip_pysqa_test = True
+
 submission_template = """\
 #!/bin/bash
 #SBATCH --output=time.out
@@ -108,3 +115,10 @@ def test_executor_existing_files(self):
 
     def tearDown(self):
         shutil.rmtree("executorlib_cache", ignore_errors=True)
+
+
+@unittest.skipIf(skip_pysqa_test, "pysqa is not installed, so the pysqa tests are skipped.")
+class TestSlurmClusterInit(unittest.TestCase):
+    def test_slurm_cluster_init(self):
+        with self.assertRaises(ValueError):
+            SlurmClusterExecutor(block_allocation=True)
\ No newline at end of file

From 6c0284560a5b5118e688df173dd20966188d4086 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Mon, 8 Sep 2025 12:42:13 +0200
Subject: [PATCH 82/83] validate initialization

---
 tests/test_slurmclusterexecutor.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/test_slurmclusterexecutor.py b/tests/test_slurmclusterexecutor.py
index 704b3a91..41057119 100644
--- a/tests/test_slurmclusterexecutor.py
+++ b/tests/test_slurmclusterexecutor.py
@@ -119,6 +119,9 @@ def tearDown(self):
 
 @unittest.skipIf(skip_pysqa_test, "pysqa is not installed, so the pysqa tests are skipped.")
 class TestSlurmClusterInit(unittest.TestCase):
-    def test_slurm_cluster_init(self):
+    def test_slurm_cluster_block_allocation(self):
         with self.assertRaises(ValueError):
-            SlurmClusterExecutor(block_allocation=True)
\ No newline at end of file
+            SlurmClusterExecutor(block_allocation=True)
+
+    def test_slurm_cluster_file(self):
+        self.assertTrue(SlurmClusterExecutor(block_allocation=False))
\ No newline at end of file

From 5f7b676bee4f36c988fc67ca26dade6e85fbe9c4 Mon Sep 17 00:00:00 2001
From: Jan Janssen <janssen@mpie.de>
Date: Mon, 8 Sep 2025 12:43:03 +0200
Subject: [PATCH 83/83] fix test

---
 tests/test_standalone_interactive_backend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_standalone_interactive_backend.py b/tests/test_standalone_interactive_backend.py
index 9fa00536..ed3745e3 100644
--- a/tests/test_standalone_interactive_backend.py
+++ b/tests/test_standalone_interactive_backend.py
@@ -169,7 +169,7 @@ def test_command_pysqa(self):
         with self.assertRaises(RuntimeError):
             interface_nobackend._check_process_helper(command_lst=[])
 
-        with self.assertRaises(KeyError):
+        with self.assertRaises(ValueError):
             create_pysqa_block_allocation_scheduler()
 
         with self.assertRaises(ValueError):