Skip to content

Commit

Permalink
Merge pull request #37 from rabernat/prefect-executor
Browse files Browse the repository at this point in the history
Add Prefect executor
  • Loading branch information
TomAugspurger committed Aug 5, 2020
2 parents 685a8b2 + 2af8fd0 commit 48b0eda
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 3 deletions.
5 changes: 5 additions & 0 deletions rechunker/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import zarr
import dask
import dask.array

from rechunker.algorithm import rechunking_plan
from rechunker.types import ArrayProxy, CopySpec, Executor
Expand Down Expand Up @@ -156,6 +157,10 @@ def _get_executor(name: str) -> Executor:
from rechunker.executors.beam import BeamExecutor

return BeamExecutor()
elif name.lower() == "prefect":
from rechunker.executors.prefect import PrefectExecutor

return PrefectExecutor()
elif name.lower() == "python":
from rechunker.executors.python import PythonExecutor

Expand Down
53 changes: 53 additions & 0 deletions rechunker/executors/prefect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from typing import Iterable, Tuple

import prefect

from rechunker.executors.util import chunk_keys, split_into_direct_copies
from rechunker.types import (
CopySpec,
Executor,
ReadableArray,
WriteableArray,
)


class PrefectExecutor(Executor[prefect.Flow]):
"""An execution engine based on Prefect.
Supports copying between any arrays that implement ``__getitem__`` and
``__setitem__`` for tuples of ``slice`` objects. Array must also be
serializable by Prefect (i.e., with pickle).
Execution plans for PrefectExecutor are prefect.Flow objects.
"""

def prepare_plan(self, specs: Iterable[CopySpec]) -> prefect.Flow:
return _make_flow(specs)

def execute_plan(self, plan: prefect.Flow, **kwargs):
return plan.run(**kwargs)


@prefect.task
def _copy_chunk(
source: ReadableArray, target: WriteableArray, key: Tuple[int, ...]
) -> None:
target[key] = source[key]


def _make_flow(specs: Iterable[CopySpec]) -> prefect.Flow:
with prefect.Flow("Rechunker") as flow:
# iterate over different arrays in the group
for spec in specs:
copy_tasks = []
# iterate over the different stages of the array copying
for (source, target, chunks) in split_into_direct_copies(spec):
keys = list(chunk_keys(source.shape, chunks))
copy_task = _copy_chunk.map(
prefect.unmapped(source), prefect.unmapped(target), keys
)
copy_tasks.append(copy_task)
# create dependence between stages
for n in range(len(copy_tasks) - 1):
copy_tasks[n + 1].set_upstream(copy_tasks[n])
return flow
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
]

extras_require = {
"complete": install_requires + ["apache_beam", "pyyaml", "fsspec"],
"complete": install_requires + ["apache_beam", "pyyaml", "fsspec", "prefect"],
"docs": doc_requires,
}
extras_require["dev"] = extras_require["complete"] + [
Expand Down
5 changes: 3 additions & 2 deletions tests/test_rechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def requires_import(module, *args):


requires_beam = partial(requires_import, "apache_beam")
requires_prefect = partial(requires_import, "prefect")


@pytest.fixture(params=[(8000, 200), {"y": 8000, "x": 200}])
Expand All @@ -37,7 +38,7 @@ def target_chunks(request):
@pytest.mark.parametrize("dtype", ["f4"])
@pytest.mark.parametrize("max_mem", [25600000, "25.6MB"])
@pytest.mark.parametrize(
"executor", ["dask", "python", requires_beam("beam")],
"executor", ["dask", "python", requires_beam("beam"), requires_prefect("prefect")],
)
@pytest.mark.parametrize(
"dims,target_chunks",
Expand Down Expand Up @@ -130,7 +131,7 @@ def test_rechunk_dask_array(


@pytest.mark.parametrize(
"executor", ["dask", "python", requires_beam("beam")],
"executor", ["dask", "python", requires_beam("beam"), requires_prefect("prefect")],
)
def test_rechunk_group(tmp_path, executor):
store_source = str(tmp_path / "source.zarr")
Expand Down

0 comments on commit 48b0eda

Please sign in to comment.