From c15c1c18ddd8d2de7dad15158a448564c41c2b3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Sat, 29 Mar 2025 07:22:13 +0100 Subject: [PATCH 01/10] Add example for get_cache_data() --- executorlib/__init__.py | 2 + notebooks/1-single-node.ipynb | 187 ++++++++++++++++++++++++---------- 2 files changed, 138 insertions(+), 51 deletions(-) diff --git a/executorlib/__init__.py b/executorlib/__init__.py index 16180430..520256e8 100644 --- a/executorlib/__init__.py +++ b/executorlib/__init__.py @@ -8,6 +8,7 @@ SlurmClusterExecutor, SlurmJobExecutor, ) +from executorlib.standalone.hdf import get_cache_data __version__ = _get_versions()["version"] __all__: list = [ @@ -16,4 +17,5 @@ "SingleNodeExecutor", "SlurmJobExecutor", "SlurmClusterExecutor", + "get_cache_data", ] diff --git a/notebooks/1-single-node.ipynb b/notebooks/1-single-node.ipynb index a1fa930e..91fd7349 100644 --- a/notebooks/1-single-node.ipynb +++ b/notebooks/1-single-node.ipynb @@ -26,13 +26,17 @@ "id": "b1907f12-7378-423b-9b83-1b65fc0a20f5", "metadata": {}, "outputs": [], - "source": "from executorlib import SingleNodeExecutor" + "source": [ + "from executorlib import SingleNodeExecutor" + ] }, { "cell_type": "markdown", "id": "1654679f-38b3-4699-9bfe-b48cbde0b2db", "metadata": {}, - "source": "It is recommended to use the `SingleNodeExecutor` class in combination with a `with`-statement. This guarantees the processes created by the `SingleNodeExecutor` class to evaluate the Python functions are afterward closed and do not remain ghost processes. A function is then submitted using the `submit(fn, /, *args, **kwargs)` function which executes a given function `fn` as `fn(*args, **kwargs)`. The `submit()` function returns a [concurrent.futures.Future](https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.Future) object, as defined by the Python Standard Library. As a first example we submit the function `sum()` to calculate the sum of the list `[1, 1]`:" + "source": [ + "It is recommended to use the `SingleNodeExecutor` class in combination with a `with`-statement. This guarantees the processes created by the `SingleNodeExecutor` class to evaluate the Python functions are afterward closed and do not remain ghost processes. A function is then submitted using the `submit(fn, /, *args, **kwargs)` function which executes a given function `fn` as `fn(*args, **kwargs)`. The `submit()` function returns a [concurrent.futures.Future](https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.Future) object, as defined by the Python Standard Library. As a first example we submit the function `sum()` to calculate the sum of the list `[1, 1]`:" + ] }, { "cell_type": "code", @@ -45,8 +49,8 @@ "output_type": "stream", "text": [ "2\n", - "CPU times: user 100 ms, sys: 70.7 ms, total: 171 ms\n", - "Wall time: 1.94 s\n" + "CPU times: user 84.4 ms, sys: 59.3 ms, total: 144 ms\n", + "Wall time: 482 ms\n" ] } ], @@ -61,7 +65,9 @@ "cell_type": "markdown", "id": "a1109584-9db2-4f9d-b3ed-494d96241396", "metadata": {}, - "source": "As expected the result of the summation `sum([1, 1])` is `2`. The same result is retrieved from the [concurrent.futures.Future](https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.Future) object received from the submission of the `sum()` as it is printed here `print(future.result())`. For most Python functions and especially the `sum()` function it is computationally not efficient to initialize the `SingleNodeExecutor` class only for the execution of a single function call, rather it is more computationally efficient to initialize the `SingleNodeExecutor` class once and then submit a number of functions. This can be achieved with a loop. For example the sum of the pairs `[2, 2]`, `[3, 3]` and `[4, 4]` can be achieved with a for-loop inside the context of the `SingleNodeExecutor()` class as provided by the `with`-statement." + "source": [ + "As expected the result of the summation `sum([1, 1])` is `2`. The same result is retrieved from the [concurrent.futures.Future](https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.Future) object received from the submission of the `sum()` as it is printed here `print(future.result())`. For most Python functions and especially the `sum()` function it is computationally not efficient to initialize the `SingleNodeExecutor` class only for the execution of a single function call, rather it is more computationally efficient to initialize the `SingleNodeExecutor` class once and then submit a number of functions. This can be achieved with a loop. For example the sum of the pairs `[2, 2]`, `[3, 3]` and `[4, 4]` can be achieved with a for-loop inside the context of the `SingleNodeExecutor()` class as provided by the `with`-statement." + ] }, { "cell_type": "code", @@ -74,8 +80,8 @@ "output_type": "stream", "text": [ "[4, 6, 8]\n", - "CPU times: user 49.4 ms, sys: 29.2 ms, total: 78.7 ms\n", - "Wall time: 1.75 s\n" + "CPU times: user 39.7 ms, sys: 26.8 ms, total: 66.5 ms\n", + "Wall time: 524 ms\n" ] } ], @@ -105,8 +111,8 @@ "output_type": "stream", "text": [ "[10, 12, 14]\n", - "CPU times: user 40.5 ms, sys: 28.1 ms, total: 68.6 ms\n", - "Wall time: 1.09 s\n" + "CPU times: user 28 ms, sys: 23.1 ms, total: 51.1 ms\n", + "Wall time: 517 ms\n" ] } ], @@ -121,7 +127,9 @@ "cell_type": "markdown", "id": "ac86bf47-4eb6-4d7c-acae-760b880803a8", "metadata": {}, - "source": "These three examples cover the general functionality of the `SingleNodeExecutor` class. Following the [Executor](https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.Executor) interface as it is defined in the Python standard library." + "source": [ + "These three examples cover the general functionality of the `SingleNodeExecutor` class. Following the [Executor](https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.Executor) interface as it is defined in the Python standard library." + ] }, { "cell_type": "markdown", @@ -349,8 +357,8 @@ "output_type": "stream", "text": [ "2\n", - "CPU times: user 37.1 ms, sys: 21.8 ms, total: 58.9 ms\n", - "Wall time: 1.09 s\n" + "CPU times: user 31.1 ms, sys: 19.1 ms, total: 50.1 ms\n", + "Wall time: 394 ms\n" ] } ], @@ -388,7 +396,9 @@ "cell_type": "markdown", "id": "9e1212c4-e3fb-4e21-be43-0a4f0a08b856", "metadata": {}, - "source": "Still the resource dictionary parameter can still be set during the initialisation of the `SingleNodeExecutor` class. Internally, this groups the created Python processes in fixed allocations and afterwards submit Python functions to these allocations." + "source": [ + "Still the resource dictionary parameter can still be set during the initialisation of the `SingleNodeExecutor` class. Internally, this groups the created Python processes in fixed allocations and afterwards submit Python functions to these allocations." + ] }, { "cell_type": "code", @@ -413,34 +423,7 @@ "experience performance degradation.\n", "\n", " Local host: MacBook-Pro.local\n", - " System call: unlink(2) /var/folders/z7/3vhrmssx60v240x_ndq448h80000gn/T//ompi.MacBook-Pro.501/pid.22031/1/vader_segment.MacBook-Pro.501.17620001.1\n", - " Error: No such file or directory (errno 2)\n", - "--------------------------------------------------------------------------\n", - "--------------------------------------------------------------------------\n", - "A system call failed during shared memory initialization that should\n", - "not have. It is likely that your MPI job will now either abort or\n", - "experience performance degradation.\n", - "\n", - " Local host: MacBook-Pro.local\n", - " System call: unlink(2) /var/folders/z7/3vhrmssx60v240x_ndq448h80000gn/T//ompi.MacBook-Pro.501/pid.22028/1/vader_segment.MacBook-Pro.501.17610001.1\n", - " Error: No such file or directory (errno 2)\n", - "--------------------------------------------------------------------------\n", - "--------------------------------------------------------------------------\n", - "A system call failed during shared memory initialization that should\n", - "not have. It is likely that your MPI job will now either abort or\n", - "experience performance degradation.\n", - "\n", - " Local host: MacBook-Pro.local\n", - " System call: unlink(2) /var/folders/z7/3vhrmssx60v240x_ndq448h80000gn/T//ompi.MacBook-Pro.501/pid.22030/1/vader_segment.MacBook-Pro.501.17630001.1\n", - " Error: No such file or directory (errno 2)\n", - "--------------------------------------------------------------------------\n", - "--------------------------------------------------------------------------\n", - "A system call failed during shared memory initialization that should\n", - "not have. It is likely that your MPI job will now either abort or\n", - "experience performance degradation.\n", - "\n", - " Local host: MacBook-Pro.local\n", - " System call: unlink(2) /var/folders/z7/3vhrmssx60v240x_ndq448h80000gn/T//ompi.MacBook-Pro.501/pid.22029/1/vader_segment.MacBook-Pro.501.17600001.1\n", + " System call: unlink(2) /var/folders/z7/3vhrmssx60v240x_ndq448h80000gn/T//ompi.MacBook-Pro.501/pid.55070/1/vader_segment.MacBook-Pro.501.96730001.1\n", " Error: No such file or directory (errno 2)\n", "--------------------------------------------------------------------------\n" ] @@ -486,7 +469,9 @@ "cell_type": "markdown", "id": "d07cf107-3627-4cb0-906c-647497d6e0d2", "metadata": {}, - "source": "The function `calc_with_preload()` requires three inputs `i`, `j` and `k`. But when the function is submitted to the executor only two inputs are provided `fs = exe.submit(calc, 2, j=5)`. In this case the first input parameter is mapped to `i=2`, the second input parameter is specified explicitly `j=5` but the third input parameter `k` is not provided. So the `SingleNodeExecutor` automatically checks the keys set in the `init_function()` function. In this case the returned dictionary `{\"j\": 4, \"k\": 3, \"l\": 2}` defines `j=4`, `k=3` and `l=2`. For this specific call of the `calc_with_preload()` function, `i` and `j` are already provided so `j` is not required, but `k=3` is used from the `init_function()` and as the `calc_with_preload()` function does not define the `l` parameter this one is also ignored." + "source": [ + "The function `calc_with_preload()` requires three inputs `i`, `j` and `k`. But when the function is submitted to the executor only two inputs are provided `fs = exe.submit(calc, 2, j=5)`. In this case the first input parameter is mapped to `i=2`, the second input parameter is specified explicitly `j=5` but the third input parameter `k` is not provided. So the `SingleNodeExecutor` automatically checks the keys set in the `init_function()` function. In this case the returned dictionary `{\"j\": 4, \"k\": 3, \"l\": 2}` defines `j=4`, `k=3` and `l=2`. For this specific call of the `calc_with_preload()` function, `i` and `j` are already provided so `j` is not required, but `k=3` is used from the `init_function()` and as the `calc_with_preload()` function does not define the `l` parameter this one is also ignored." + ] }, { "cell_type": "code", @@ -538,8 +523,8 @@ "output_type": "stream", "text": [ "[2, 4, 6]\n", - "CPU times: user 547 ms, sys: 161 ms, total: 708 ms\n", - "Wall time: 1.33 s\n" + "CPU times: user 512 ms, sys: 138 ms, total: 650 ms\n", + "Wall time: 865 ms\n" ] } ], @@ -571,8 +556,8 @@ "output_type": "stream", "text": [ "[2, 4, 6]\n", - "CPU times: user 52.1 ms, sys: 41.1 ms, total: 93.2 ms\n", - "Wall time: 1.13 s\n" + "CPU times: user 56.7 ms, sys: 32.5 ms, total: 89.2 ms\n", + "Wall time: 620 ms\n" ] } ], @@ -583,6 +568,106 @@ " print([f.result() for f in future_lst])" ] }, + { + "cell_type": "markdown", + "id": "5144a035-633e-4e60-a362-f3b15b28848b", + "metadata": {}, + "source": [ + "An additional advantage of the cache is the option to gather the results of previously submitted functions. Using the `get_cache_data()` function the results of each Python function is converted to a dictionary. This list of dictionaries can be converted to a `pandas.DataFrame` for further processing:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "f574b9e1-de55-4e38-aef7-a4bed540e040", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
functioninput_argsinput_kwargsoutputruntimefilename
0<built-in function sum>([1, 1],){}20.001686sum0d968285d17368d1c34ea7392309bcc5.h5out
1<built-in function sum>([3, 3],){}60.136151sum0102e33bb2921ae07a3bbe3db5d3dec9.h5out
2<built-in function sum>([2, 2],){}40.136006sum6270955d7c8022a0c1027aafaee64439.h5out
\n", + "
" + ], + "text/plain": [ + " function input_args input_kwargs output runtime \\\n", + "0 ([1, 1],) {} 2 0.001686 \n", + "1 ([3, 3],) {} 6 0.136151 \n", + "2 ([2, 2],) {} 4 0.136006 \n", + "\n", + " filename \n", + "0 sum0d968285d17368d1c34ea7392309bcc5.h5out \n", + "1 sum0102e33bb2921ae07a3bbe3db5d3dec9.h5out \n", + "2 sum6270955d7c8022a0c1027aafaee64439.h5out " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas\n", + "from executorlib import get_cache_data\n", + "\n", + "df = pandas.DataFrame(get_cache_data(cache_directory=\"./cache\"))\n", + "df" + ] + }, { "cell_type": "markdown", "id": "68092479-e846-494a-9ac9-d9638b102bd8", @@ -593,7 +678,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "id": "34a9316d-577f-4a63-af14-736fb4e6b219", "metadata": {}, "outputs": [ @@ -601,7 +686,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "['sumb6a5053f96b7031239c2e8d0e7563ce4.h5out', 'sum5171356dfe527405c606081cfbd2dffe.h5out', 'sumd1bf4ee658f1ac42924a2e4690e797f4.h5out']\n" + "['sum0d968285d17368d1c34ea7392309bcc5.h5out', 'sum0102e33bb2921ae07a3bbe3db5d3dec9.h5out', 'sum6270955d7c8022a0c1027aafaee64439.h5out']\n" ] } ], @@ -637,7 +722,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "id": "d8b75a26-479d-405e-8895-a8d56b3f0f4b", "metadata": {}, "outputs": [], @@ -658,7 +743,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "id": "35fd5747-c57d-4926-8d83-d5c55a130ad6", "metadata": {}, "outputs": [ @@ -692,7 +777,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 23, "id": "f67470b5-af1d-4add-9de8-7f259ca67324", "metadata": {}, "outputs": [ From dd0eb0a16e6d91745f5edec31552b259bb39264f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Sat, 29 Mar 2025 11:00:05 +0100 Subject: [PATCH 02/10] fix minimal --- executorlib/__init__.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/executorlib/__init__.py b/executorlib/__init__.py index 520256e8..33617729 100644 --- a/executorlib/__init__.py +++ b/executorlib/__init__.py @@ -8,7 +8,13 @@ SlurmClusterExecutor, SlurmJobExecutor, ) -from executorlib.standalone.hdf import get_cache_data + +try: + from executorlib.standalone.hdf import get_cache_data +except ImportError: + hdf_lst: list = [] +else: + hdf_lst: list = [get_cache_data] __version__ = _get_versions()["version"] __all__: list = [ @@ -17,5 +23,4 @@ "SingleNodeExecutor", "SlurmJobExecutor", "SlurmClusterExecutor", - "get_cache_data", -] +] + hdf_lst From be2b2185131115d59be76c59345242f396f15109 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Sat, 29 Mar 2025 11:06:49 +0100 Subject: [PATCH 03/10] fix list of tools --- executorlib/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/executorlib/__init__.py b/executorlib/__init__.py index 33617729..6064b1b2 100644 --- a/executorlib/__init__.py +++ b/executorlib/__init__.py @@ -9,12 +9,14 @@ SlurmJobExecutor, ) +hdf_lst: list = [] + try: from executorlib.standalone.hdf import get_cache_data except ImportError: - hdf_lst: list = [] + pass else: - hdf_lst: list = [get_cache_data] + hdf_lst += [get_cache_data] __version__ = _get_versions()["version"] __all__: list = [ From 16a81422efbc185a9f1b6cc136bce27a68a0cfdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Sat, 29 Mar 2025 11:07:21 +0100 Subject: [PATCH 04/10] hid list --- executorlib/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/executorlib/__init__.py b/executorlib/__init__.py index 6064b1b2..e560ab51 100644 --- a/executorlib/__init__.py +++ b/executorlib/__init__.py @@ -9,14 +9,14 @@ SlurmJobExecutor, ) -hdf_lst: list = [] +_hdf_lst: list = [] try: from executorlib.standalone.hdf import get_cache_data except ImportError: pass else: - hdf_lst += [get_cache_data] + _hdf_lst += [get_cache_data] __version__ = _get_versions()["version"] __all__: list = [ @@ -25,4 +25,4 @@ "SingleNodeExecutor", "SlurmJobExecutor", "SlurmClusterExecutor", -] + hdf_lst +] + _hdf_lst From c42411f977116c71cab26acf7d045f1288406db5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Sat, 29 Mar 2025 11:09:22 +0100 Subject: [PATCH 05/10] add list step by step --- executorlib/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/executorlib/__init__.py b/executorlib/__init__.py index e560ab51..09bd8f4a 100644 --- a/executorlib/__init__.py +++ b/executorlib/__init__.py @@ -25,4 +25,5 @@ "SingleNodeExecutor", "SlurmJobExecutor", "SlurmClusterExecutor", -] + _hdf_lst +] +__all__ += _hdf_lst From 86b522c55d3247fb8a9690e42fa498255c7ab2d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Sat, 29 Mar 2025 11:13:40 +0100 Subject: [PATCH 06/10] add to list --- executorlib/__init__.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/executorlib/__init__.py b/executorlib/__init__.py index 09bd8f4a..01aa1078 100644 --- a/executorlib/__init__.py +++ b/executorlib/__init__.py @@ -9,21 +9,19 @@ SlurmJobExecutor, ) -_hdf_lst: list = [] +__all__: list = [ + "FluxJobExecutor", + "FluxClusterExecutor", + "SingleNodeExecutor", + "SlurmJobExecutor", + "SlurmClusterExecutor", +] try: from executorlib.standalone.hdf import get_cache_data except ImportError: pass else: - _hdf_lst += [get_cache_data] + __all__ += [get_cache_data] __version__ = _get_versions()["version"] -__all__: list = [ - "FluxJobExecutor", - "FluxClusterExecutor", - "SingleNodeExecutor", - "SlurmJobExecutor", - "SlurmClusterExecutor", -] -__all__ += _hdf_lst From 05fdccad8cd77f2c37b11b6b10aab7fa680e2cbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Sat, 29 Mar 2025 11:14:42 +0100 Subject: [PATCH 07/10] ignore --- executorlib/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/executorlib/__init__.py b/executorlib/__init__.py index 01aa1078..255257f3 100644 --- a/executorlib/__init__.py +++ b/executorlib/__init__.py @@ -22,6 +22,6 @@ except ImportError: pass else: - __all__ += [get_cache_data] + __all__ += [get_cache_data] # noqa: PLE0605 __version__ = _get_versions()["version"] From 2ab024f17353b1deaf0e581dad0f5f7638632745 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Sat, 29 Mar 2025 11:15:51 +0100 Subject: [PATCH 08/10] add strings --- executorlib/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/executorlib/__init__.py b/executorlib/__init__.py index 255257f3..ce5e4c4b 100644 --- a/executorlib/__init__.py +++ b/executorlib/__init__.py @@ -22,6 +22,6 @@ except ImportError: pass else: - __all__ += [get_cache_data] # noqa: PLE0605 + __all__ += ["get_cache_data"] __version__ = _get_versions()["version"] From 85f33bcb6c3c9bf14946fa0ab47299c2e5f135b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Sat, 29 Mar 2025 11:16:21 +0100 Subject: [PATCH 09/10] extend type hint --- executorlib/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/executorlib/__init__.py b/executorlib/__init__.py index ce5e4c4b..127fd879 100644 --- a/executorlib/__init__.py +++ b/executorlib/__init__.py @@ -9,7 +9,7 @@ SlurmJobExecutor, ) -__all__: list = [ +__all__: list[str] = [ "FluxJobExecutor", "FluxClusterExecutor", "SingleNodeExecutor", From 4e216e64e079b412b63e4705e8b13a3100f53e84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jan=C3=9Fen?= Date: Sat, 29 Mar 2025 11:24:17 +0100 Subject: [PATCH 10/10] fix test to increase coverage --- tests/test_singlenodeexecutor_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_singlenodeexecutor_cache.py b/tests/test_singlenodeexecutor_cache.py index 71bb49b8..bded9cab 100644 --- a/tests/test_singlenodeexecutor_cache.py +++ b/tests/test_singlenodeexecutor_cache.py @@ -6,7 +6,7 @@ from executorlib.standalone.serialize import cloudpickle_register try: - from executorlib.standalone.hdf import get_cache_data + from executorlib import get_cache_data skip_h5py_test = False except ImportError: