diff --git a/.gitignore b/.gitignore index cbb95c36..ec80016e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,19 @@ +# Generated by Cargo +# will have compiled files and executables +debug/ +target/ + + +# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries +# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html +Cargo.lock + +# These are backup files generated by rustfmt +**/*.rs.bk + +# Skip wheel +**/pynumaflow_lite-*.whl + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/packages/pynumaflow-lite/Cargo.toml b/packages/pynumaflow-lite/Cargo.toml new file mode 100644 index 00000000..032f8b68 --- /dev/null +++ b/packages/pynumaflow-lite/Cargo.toml @@ -0,0 +1,50 @@ +[package] +name = "pynumaflow-lite" +version = "0.1.0" +edition = "2024" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +name = "pynumaflow_lite" +crate-type = ["cdylib", "rlib"] + +[dependencies] +numaflow = { git = "https://github.com/numaproj/numaflow-rs.git", branch = "export-accum-items" } +pyo3 = { version = "0.26.0", features = ["chrono", "experimental-inspect"] } +tokio = "1.47.1" +tonic = "0.14.2" +tokio-stream = "0.1.17" +tower = "0.5.2" +hyper-util = "0.1.16" +prost-types = "0.14.1" +chrono = "0.4.42" +pyo3-async-runtimes = { version = "0.26.0", features = ["tokio-runtime"] } +futures-core = "0.3.31" +pin-project = "1.1.10" + + +[[bin]] +name = "test_map" +path = "tests/bin/map.rs" + +[[bin]] +name = "test_batchmap" +path = "tests/bin/batchmap.rs" + + +[[bin]] +name = "test_mapstream" +path = "tests/bin/mapstream.rs" + + +[[bin]] +name = "test_reduce" +path = "tests/bin/reduce.rs" + +[[bin]] +name = "test_session_reduce" +path = "tests/bin/session_reduce.rs" + +[[bin]] +name = "test_accumulator" +path = "tests/bin/accumulator.rs" diff --git a/packages/pynumaflow-lite/Makefile b/packages/pynumaflow-lite/Makefile new file mode 100644 index 00000000..ae7d02a0 --- /dev/null +++ b/packages/pynumaflow-lite/Makefile @@ -0,0 +1,44 @@ +.PHONY: help build develop stubgen clean test test-rust + +# Default Python package/module name +MODULE ?= pynumaflow_lite + +# Optional args to pass through to cargo test, e.g., ARGS="--lib pyiterables::tests::py_async_iter_stream_yields_incrementally" +ARGS ?= + + +help: + @echo "Targets:" + @echo " build - cargo build the Rust library" + @echo " develop - maturin develop (install in current Python env)" + @echo " test - run end-to-end pytest (depends on develop)" + @echo " test-rust - cargo test with PYTHONHOME set; pass args via ARGS=\"...\"" + + @echo " clean - cargo clean" + +build: + cargo build + +# Installs the extension into the active Python environment. +# You can then discover the installed .so path to run stubgen against it if preferred. +develop: + maturin develop + +# Run pytest end-to-end tests. Assumes a working Python env with pytest installed. +# Example: (cd pynumaflow-lite && make test) +# Note: we do not install pytest here to avoid mutating global envs. +test: develop + pytest -v + + +# Run cargo tests with PYTHONHOME pointed at base_prefix so embedded CPython finds stdlib +# Usage examples: +# make test-rust ARGS="--lib" +# make test-rust ARGS="--lib pyiterables::tests::py_async_iter_stream_yields_incrementally" +test-rust: + @export PYTHONHOME="$(shell python -c 'import sys; print(sys.base_prefix)')" && \ + cargo test $(ARGS) + + +clean: + cargo clean diff --git a/packages/pynumaflow-lite/README.md b/packages/pynumaflow-lite/README.md new file mode 100644 index 00000000..0af3fcd7 --- /dev/null +++ b/packages/pynumaflow-lite/README.md @@ -0,0 +1,37 @@ +## Development Setup + +```bash +# new venv +uv venv + +# activate venv +source venv/bin/activate + +uv pip install maturin + +# install dependencies +uv sync +``` + +### Testing + +```bash +make test +``` + +### HOWTO create .whl + +Go to `pynumaflow-lite` (top level) directory and run the below command. + +```bash +docker run --rm -v $(pwd):/io ghcr.io/pyo3/maturin build -i python3.11 --release +``` + +This will create the `wheel` file in `target/wheels/` directory. You should copy it over to where we +are writing the python code referencing this library. + +e.g., + +```bash +cp target/wheels/pynumaflow_lite-0.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl manifests/simple-async-map/ +``` \ No newline at end of file diff --git a/packages/pynumaflow-lite/manifests/accumulator/Dockerfile b/packages/pynumaflow-lite/manifests/accumulator/Dockerfile new file mode 100644 index 00000000..445f85dc --- /dev/null +++ b/packages/pynumaflow-lite/manifests/accumulator/Dockerfile @@ -0,0 +1,39 @@ +FROM python:3.11-slim-bullseye AS builder + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PIP_NO_CACHE_DIR=on \ + PIP_DISABLE_PIP_VERSION_CHECK=on \ + PIP_DEFAULT_TIMEOUT=100 \ + POETRY_HOME="/opt/poetry" \ + POETRY_VIRTUALENVS_IN_PROJECT=true \ + POETRY_NO_INTERACTION=1 \ + PYSETUP_PATH="/opt/pysetup" + +ENV PATH="$POETRY_HOME/bin:$PATH" + +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + curl \ + wget \ + # deps for building python deps + build-essential \ + && apt-get install -y git \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + && curl -sSL https://install.python-poetry.org | python3 - + +FROM builder AS udf + +WORKDIR $PYSETUP_PATH +COPY ./ ./ + +# NOTE: place the built wheel in this directory before building the image +RUN pip install $PYSETUP_PATH/pynumaflow_lite-0.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl + +RUN poetry lock +RUN poetry install --no-cache --no-root && \ + rm -rf ~/.cache/pypoetry/ + +CMD ["python", "accumulator_stream_sorter.py"] + diff --git a/packages/pynumaflow-lite/manifests/accumulator/README.md b/packages/pynumaflow-lite/manifests/accumulator/README.md new file mode 100644 index 00000000..455c5114 --- /dev/null +++ b/packages/pynumaflow-lite/manifests/accumulator/README.md @@ -0,0 +1,37 @@ +To create the `wheel` file, refer [root](../../README.md) + +## HOWTO build Image + +```bash +docker build . -t quay.io/numaio/numaflow/pynumaflow-lite-accumulator-stream-sorter:v1 --load +``` + +Load it now to `k3d` + +### `k3d` + +```bash +k3d image import quay.io/numaio/numaflow/pynumaflow-lite-accumulator-stream-sorter:v1 +``` + +### Minikube + +```bash +minikube image load quay.io/numaio/numaflow/pynumaflow-lite-accumulator-stream-sorter:v1 +``` + +#### Delete image from minikube + +`minikube` doesn't like pushing the same image over, delete and load if you are using +the same tag. + +```bash +minikube image rm quay.io/numaio/numaflow/pynumaflow-lite-accumulator-stream-sorter:v1 +``` + +## Run the pipeline + +```bash +kubectl apply -f pipeline.yaml +``` + diff --git a/packages/pynumaflow-lite/manifests/accumulator/accumulator_stream_sorter.py b/packages/pynumaflow-lite/manifests/accumulator/accumulator_stream_sorter.py new file mode 100644 index 00000000..85d25ae0 --- /dev/null +++ b/packages/pynumaflow-lite/manifests/accumulator/accumulator_stream_sorter.py @@ -0,0 +1,133 @@ +""" +Stream sorter accumulator example. + +This accumulator buffers incoming data and sorts it by event time, +flushing sorted data when the watermark advances. +""" +import asyncio +from datetime import datetime +from typing import AsyncIterator + +from pynumaflow_lite.accumulator import Datum, Message, AccumulatorAsyncServer, Accumulator + + +class StreamSorter(Accumulator): + """ + A stream sorter that buffers and sorts data by event time, + flushing when watermark advances. + """ + + def __init__(self): + from datetime import timezone + # Initialize with a very old timestamp (timezone-aware) + self.latest_wm = datetime.fromtimestamp(-1, tz=timezone.utc) + self.sorted_buffer: list[Datum] = [] + print("StreamSorter initialized") + + async def handler(self, datums: AsyncIterator[Datum]) -> AsyncIterator[Message]: + """ + Buffer and sort datums, yielding sorted messages when watermark advances. + """ + print("Handler started, waiting for datums...") + datum_count = 0 + + async for datum in datums: + datum_count += 1 + print(f"Received datum #{datum_count}: event_time={datum.event_time}, " + f"watermark={datum.watermark}, value={datum.value}") + + # If watermark has moved forward + if datum.watermark and datum.watermark > self.latest_wm: + old_wm = self.latest_wm + self.latest_wm = datum.watermark + print(f"Watermark advanced from {old_wm} to {self.latest_wm}") + + # Flush buffer + flushed = 0 + async for msg in self.flush_buffer(): + flushed += 1 + yield msg + + if flushed > 0: + print(f"Flushed {flushed} messages from buffer") + + # Insert into sorted buffer + self.insert_sorted(datum) + print(f"Buffer size: {len(self.sorted_buffer)}") + + print(f"Handler finished. Total datums processed: {datum_count}") + print(f"Remaining in buffer: {len(self.sorted_buffer)}") + + # Flush any remaining items in the buffer at the end + if self.sorted_buffer: + print("Flushing remaining buffer at end...") + for datum in self.sorted_buffer: + print(f" Flushing: event_time={datum.event_time}, value={datum.value}") + # Use Message.from_datum to preserve all metadata + yield Message.from_datum(datum) + self.sorted_buffer = [] + + def insert_sorted(self, datum: Datum): + """Binary insert to keep sorted buffer in order by event_time.""" + left, right = 0, len(self.sorted_buffer) + while left < right: + mid = (left + right) // 2 + if self.sorted_buffer[mid].event_time > datum.event_time: + right = mid + else: + left = mid + 1 + self.sorted_buffer.insert(left, datum) + + async def flush_buffer(self) -> AsyncIterator[Message]: + """Flush all items from buffer that are before or at the watermark.""" + i = 0 + for datum in self.sorted_buffer: + if datum.event_time > self.latest_wm: + break + print(f" Flushing: event_time={datum.event_time}, value={datum.value}") + # Use Message.from_datum to preserve all metadata (id, headers, event_time, watermark) + yield Message.from_datum(datum) + i += 1 + + # Remove flushed items + self.sorted_buffer = self.sorted_buffer[i:] + + +async def main(): + """ + Start the accumulator server. + """ + import signal + + server = AccumulatorAsyncServer() + + # Set up signal handlers for graceful shutdown + loop = asyncio.get_running_loop() + try: + loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) + loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) + except (NotImplementedError, RuntimeError): + pass + + try: + print("Starting Stream Sorter Accumulator Server...") + await server.start(StreamSorter) + print("Shutting down gracefully...") + except asyncio.CancelledError: + try: + server.stop() + except Exception: + pass + return + + +# Optional: ensure default signal handlers are in place so asyncio.run can handle them cleanly. +import signal +signal.signal(signal.SIGINT, signal.default_int_handler) +try: + signal.signal(signal.SIGTERM, signal.SIG_DFL) +except AttributeError: + pass + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/packages/pynumaflow-lite/manifests/accumulator/pipeline.yaml b/packages/pynumaflow-lite/manifests/accumulator/pipeline.yaml new file mode 100644 index 00000000..f4ad3f53 --- /dev/null +++ b/packages/pynumaflow-lite/manifests/accumulator/pipeline.yaml @@ -0,0 +1,39 @@ +apiVersion: numaflow.numaproj.io/v1alpha1 +kind: Pipeline +metadata: + name: accumulator-stream-sorter +spec: + watermark: + idleSource: + threshold: 5s + incrementBy: 3s + stepInterval: 2s + vertices: + - name: in + source: + # A self data generating source + http: { } + - name: stream-sorter + udf: + container: + # count element in sessions + image: quay.io/numaio/numaflow/pynumaflow-lite-accumulator-stream-sorter:v1 + groupBy: + window: + accumulator: + timeout: 10s + keyed: true + storage: + persistentVolumeClaim: + volumeSize: 1Gi + accessMode: ReadWriteOnce + - name: out + sink: + # A simple log printing sink + log: { } + edges: + - from: in + to: stream-sorter + - from: stream-sorter + to: out + diff --git a/packages/pynumaflow-lite/manifests/accumulator/pyproject.toml b/packages/pynumaflow-lite/manifests/accumulator/pyproject.toml new file mode 100644 index 00000000..78ffa64c --- /dev/null +++ b/packages/pynumaflow-lite/manifests/accumulator/pyproject.toml @@ -0,0 +1,16 @@ +[project] +name = "stream-sorter" +version = "0.1.0" +description = "Stream sorter accumulator example using pynumaflow-lite" +authors = [ + { name = "Vigith Maurice", email = "vigith@gmail.com" } +] +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ +] + +[build-system] +requires = ["poetry-core>=2.0.0,<3.0.0"] +build-backend = "poetry.core.masonry.api" + diff --git a/packages/pynumaflow-lite/manifests/batchmap/Dockerfile b/packages/pynumaflow-lite/manifests/batchmap/Dockerfile new file mode 100644 index 00000000..af12e945 --- /dev/null +++ b/packages/pynumaflow-lite/manifests/batchmap/Dockerfile @@ -0,0 +1,37 @@ +FROM python:3.11-slim-bullseye AS builder + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PIP_NO_CACHE_DIR=on \ + PIP_DISABLE_PIP_VERSION_CHECK=on \ + PIP_DEFAULT_TIMEOUT=100 \ + POETRY_HOME="/opt/poetry" \ + POETRY_VIRTUALENVS_IN_PROJECT=true \ + POETRY_NO_INTERACTION=1 \ + PYSETUP_PATH="/opt/pysetup" + + ENV PATH="$POETRY_HOME/bin:$PATH" + +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + curl \ + wget \ + # deps for building python deps + build-essential \ + && apt-get install -y git \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + && curl -sSL https://install.python-poetry.org | python3 - + +FROM builder AS udf + +WORKDIR $PYSETUP_PATH +COPY ./ ./ + +RUN pip install $PYSETUP_PATH/pynumaflow_lite-0.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl + +RUN poetry lock +RUN poetry install --no-cache --no-root && \ + rm -rf ~/.cache/pypoetry/ + +CMD ["python", "batchmap_cat.py"] \ No newline at end of file diff --git a/packages/pynumaflow-lite/manifests/batchmap/README.md b/packages/pynumaflow-lite/manifests/batchmap/README.md new file mode 100644 index 00000000..44b84212 --- /dev/null +++ b/packages/pynumaflow-lite/manifests/batchmap/README.md @@ -0,0 +1,19 @@ +To create the `wheel` file, refer [root](../../README.md) + +## HOWTO build Image + +```bash +docker build . -t quay.io/numaio/numaflow/pynumaflow-lite-batchmap-cat:v2 --load +``` + +Load it now to `k3d` + +```bash +k3d image import quay.io/numaio/numaflow/pynumaflow-lite-batchmap-cat:v2 +``` + +## Run the pipeline + +```bash +kubectl apply -f pipeline.yaml +``` \ No newline at end of file diff --git a/packages/pynumaflow-lite/manifests/batchmap/batchmap_cat.py b/packages/pynumaflow-lite/manifests/batchmap/batchmap_cat.py new file mode 100644 index 00000000..11b2ee56 --- /dev/null +++ b/packages/pynumaflow-lite/manifests/batchmap/batchmap_cat.py @@ -0,0 +1,55 @@ +import asyncio +import signal +from collections.abc import AsyncIterable + +from pynumaflow_lite import batchmapper +from pynumaflow_lite.batchmapper import Message + + +class SimpleBatchCat(batchmapper.BatchMapper): + async def handler(self, batch: AsyncIterable[batchmapper.Datum]) -> batchmapper.BatchResponses: + responses = batchmapper.BatchResponses() + async for d in batch: + resp = batchmapper.BatchResponse(d.id) + if d.value == b"bad world": + resp.append(Message.message_to_drop()) + continue + + resp.append(Message(d.value, d.keys)) + responses.append(resp) + return responses + + +# Optional: ensure default signal handlers are in place so asyncio.run can handle them cleanly. +signal.signal(signal.SIGINT, signal.default_int_handler) +try: + signal.signal(signal.SIGTERM, signal.SIG_DFL) +except AttributeError: + pass + + +async def start(f: callable): + server = batchmapper.BatchMapAsyncServer() + + # Register loop-level signal handlers so we control shutdown and avoid asyncio.run + loop = asyncio.get_running_loop() + try: + loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) + loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) + except (NotImplementedError, RuntimeError): + pass + + try: + await server.start(f) + print("Shutting down gracefully...") + except asyncio.CancelledError: + try: + server.stop() + except Exception: + pass + return + + +if __name__ == "__main__": + async_handler = SimpleBatchCat() + asyncio.run(start(async_handler)) diff --git a/packages/pynumaflow-lite/manifests/batchmap/pipeline.yaml b/packages/pynumaflow-lite/manifests/batchmap/pipeline.yaml new file mode 100644 index 00000000..3f3372b7 --- /dev/null +++ b/packages/pynumaflow-lite/manifests/batchmap/pipeline.yaml @@ -0,0 +1,30 @@ +apiVersion: numaflow.numaproj.io/v1alpha1 +kind: Pipeline +metadata: + name: batchmap-cat +spec: + vertices: + - name: in + source: + # A self data generating source + generator: + rpu: 500 + duration: 1s + - name: batchmap + partitions: 2 + scale: + min: 1 + udf: + container: + image: quay.io/numaio/numaflow/pynumaflow-lite-batchmap-cat:v2 + imagePullPolicy: Never + - name: sink + scale: + min: 1 + sink: + log: { } + edges: + - from: in + to: batchmap + - from: batchmap + to: sink \ No newline at end of file diff --git a/packages/pynumaflow-lite/manifests/batchmap/pyproject.toml b/packages/pynumaflow-lite/manifests/batchmap/pyproject.toml new file mode 100644 index 00000000..de7ee701 --- /dev/null +++ b/packages/pynumaflow-lite/manifests/batchmap/pyproject.toml @@ -0,0 +1,16 @@ +[project] +name = "batchmap-cat" +version = "0.1.0" +description = "Testing Python FFI" +authors = [ + { name = "Vigith Maurice", email = "vigith@gmail.com" } +] +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ +] + + +[build-system] +requires = ["poetry-core>=2.0.0,<3.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/packages/pynumaflow-lite/manifests/map/Dockerfile b/packages/pynumaflow-lite/manifests/map/Dockerfile new file mode 100644 index 00000000..ef530774 --- /dev/null +++ b/packages/pynumaflow-lite/manifests/map/Dockerfile @@ -0,0 +1,37 @@ +FROM python:3.11-slim-bullseye AS builder + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PIP_NO_CACHE_DIR=on \ + PIP_DISABLE_PIP_VERSION_CHECK=on \ + PIP_DEFAULT_TIMEOUT=100 \ + POETRY_HOME="/opt/poetry" \ + POETRY_VIRTUALENVS_IN_PROJECT=true \ + POETRY_NO_INTERACTION=1 \ + PYSETUP_PATH="/opt/pysetup" + + ENV PATH="$POETRY_HOME/bin:$PATH" + +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + curl \ + wget \ + # deps for building python deps + build-essential \ + && apt-get install -y git \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + && curl -sSL https://install.python-poetry.org | python3 - + +FROM builder AS udf + +WORKDIR $PYSETUP_PATH +COPY ./ ./ + +RUN pip install $PYSETUP_PATH/pynumaflow_lite-0.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl + +RUN poetry lock +RUN poetry install --no-cache --no-root && \ + rm -rf ~/.cache/pypoetry/ + +CMD ["python", "map_cat.py"] \ No newline at end of file diff --git a/packages/pynumaflow-lite/manifests/map/README.md b/packages/pynumaflow-lite/manifests/map/README.md new file mode 100644 index 00000000..ecef703d --- /dev/null +++ b/packages/pynumaflow-lite/manifests/map/README.md @@ -0,0 +1,19 @@ +To create the `wheel` file, refer [root](../../README.md) + +## HOWTO build Image + +```bash +docker build . -t quay.io/numaio/numaflow/pynumaflow-lite-map-cat:v2 --load +``` + +Load it now to `k3d` + +```bash +k3d image import quay.io/numaio/numaflow/pynumaflow-lite-map-cat:v2 +``` + +## Run the pipeline + +```bash +kubectl apply -f pipeline.yaml +``` \ No newline at end of file diff --git a/packages/pynumaflow-lite/manifests/map/map_cat.py b/packages/pynumaflow-lite/manifests/map/map_cat.py new file mode 100644 index 00000000..a45df476 --- /dev/null +++ b/packages/pynumaflow-lite/manifests/map/map_cat.py @@ -0,0 +1,59 @@ +import asyncio +import signal +from pynumaflow_lite import mapper + + +class SimpleCat(mapper.Mapper): + async def handler( + self, keys: list[str], payload: mapper.Datum + ) -> mapper.Messages: + + messages = mapper.Messages() + + if payload.value == b"bad world": + messages.append(mapper.Message.message_to_drop()) + else: + messages.append(mapper.Message(payload.value, keys)) + + return messages + + +# Optional: ensure default signal handlers are in place so asyncio.run can handle them cleanly. +signal.signal(signal.SIGINT, signal.default_int_handler) +try: + signal.signal(signal.SIGTERM, signal.SIG_DFL) +except AttributeError: + pass + + +async def start(f: callable): + server = mapper.MapAsyncServer() + + # Register loop-level signal handlers so we control shutdown and avoid asyncio.run + # converting it into KeyboardInterrupt/CancelledError traces. + loop = asyncio.get_running_loop() + loop.set_debug(True) + print("Registering signal handlers", loop) + try: + loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) + loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) + except (NotImplementedError, RuntimeError): + print("Failed to register signal handlers") + # add_signal_handler may not be available on some platforms/contexts; fallback below. + pass + + try: + await server.start(f) + print("Shutting down gracefully...") + except asyncio.CancelledError: + # Fallback in case the task was cancelled by the runner + try: + server.stop() + except Exception: + pass + return + + +if __name__ == "__main__": + async_handler = SimpleCat() + asyncio.run(start(async_handler)) diff --git a/packages/pynumaflow-lite/manifests/map/pipeline.yaml b/packages/pynumaflow-lite/manifests/map/pipeline.yaml new file mode 100644 index 00000000..25311f2e --- /dev/null +++ b/packages/pynumaflow-lite/manifests/map/pipeline.yaml @@ -0,0 +1,30 @@ +apiVersion: numaflow.numaproj.io/v1alpha1 +kind: Pipeline +metadata: + name: map-cat +spec: + vertices: + - name: in + source: + # A self data generating source + generator: + rpu: 500 + duration: 1s + - name: map + partitions: 2 + scale: + min: 1 + udf: + container: + image: quay.io/numaio/numaflow/pynumaflow-lite-map-cat:v2 + imagePullPolicy: Never + - name: sink + scale: + min: 1 + sink: + log: { } + edges: + - from: in + to: map + - from: map + to: sink \ No newline at end of file diff --git a/packages/pynumaflow-lite/manifests/map/pyproject.toml b/packages/pynumaflow-lite/manifests/map/pyproject.toml new file mode 100644 index 00000000..cc1f0ad2 --- /dev/null +++ b/packages/pynumaflow-lite/manifests/map/pyproject.toml @@ -0,0 +1,16 @@ +[project] +name = "map-cat" +version = "0.1.0" +description = "Testing Python FFI" +authors = [ + { name = "Vigith Maurice", email = "vigith@gmail.com" } +] +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ +] + + +[build-system] +requires = ["poetry-core>=2.0.0,<3.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/packages/pynumaflow-lite/manifests/mapstream/Dockerfile b/packages/pynumaflow-lite/manifests/mapstream/Dockerfile new file mode 100644 index 00000000..3caa0111 --- /dev/null +++ b/packages/pynumaflow-lite/manifests/mapstream/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.11-slim-bullseye AS builder + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PIP_NO_CACHE_DIR=on \ + PIP_DISABLE_PIP_VERSION_CHECK=on \ + PIP_DEFAULT_TIMEOUT=100 \ + POETRY_HOME="/opt/poetry" \ + POETRY_VIRTUALENVS_IN_PROJECT=true \ + POETRY_NO_INTERACTION=1 \ + PYSETUP_PATH="/opt/pysetup" + + ENV PATH="$POETRY_HOME/bin:$PATH" + +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + curl \ + wget \ + # deps for building python deps + build-essential \ + && apt-get install -y git \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + && curl -sSL https://install.python-poetry.org | python3 - + +FROM builder AS udf + +WORKDIR $PYSETUP_PATH +COPY ./ ./ + +RUN pip install $PYSETUP_PATH/pynumaflow_lite-0.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl + +RUN poetry lock +RUN poetry install --no-cache --no-root && \ + rm -rf ~/.cache/pypoetry/ + +CMD ["python", "mapstream_cat.py"] + diff --git a/packages/pynumaflow-lite/manifests/mapstream/README.md b/packages/pynumaflow-lite/manifests/mapstream/README.md new file mode 100644 index 00000000..f52da098 --- /dev/null +++ b/packages/pynumaflow-lite/manifests/mapstream/README.md @@ -0,0 +1,37 @@ +To create the `wheel` file, refer [root](../../README.md) + +## HOWTO build Image + +```bash +docker build . -t quay.io/numaio/numaflow/pynumaflow-lite-mapstream-cat:v2 --load +``` + +Load it now to `k3d` or `minikube`. + +### `k3d` + +```bash +k3d image import quay.io/numaio/numaflow/pynumaflow-lite-mapstream-cat:v2 +``` + +### Minikube + +```bash +minikube image load quay.io/numaio/numaflow/pynumaflow-lite-mapstream-cat:v2 +``` + +#### Delete image from minikube + +`minikube` doesn't like pushing the same image over, delete and load if you are using +the same tag. + +```bash +minikube image rm quay.io/numaio/numaflow/pynumaflow-lite-mapstream-cat:v2 +``` + +## Run the pipeline + +```bash +kubectl apply -f pipeline.yaml +``` + diff --git a/packages/pynumaflow-lite/manifests/mapstream/mapstream_cat.py b/packages/pynumaflow-lite/manifests/mapstream/mapstream_cat.py new file mode 100644 index 00000000..888bb74d --- /dev/null +++ b/packages/pynumaflow-lite/manifests/mapstream/mapstream_cat.py @@ -0,0 +1,53 @@ +import asyncio +import signal +from collections.abc import AsyncIterator + +from pynumaflow_lite import mapstreamer +from pynumaflow_lite.mapstreamer import Message + + +class SimpleStreamCat(mapstreamer.MapStreamer): + async def handler(self, keys: list[str], datum: mapstreamer.Datum) -> AsyncIterator[Message]: + parts = datum.value.decode("utf-8").split(",") + if not parts: + yield Message.to_drop() + return + for s in parts: + yield Message(s.encode(), keys) + + +# Optional: ensure default signal handlers are in place so asyncio.run can handle them cleanly. +signal.signal(signal.SIGINT, signal.default_int_handler) +try: + signal.signal(signal.SIGTERM, signal.SIG_DFL) +except AttributeError: + pass + + +async def start(f: callable): + # Use default socket/info file locations; no explicit sock file passed + server = mapstreamer.MapStreamAsyncServer() + + # Register loop-level signal handlers so we control shutdown and avoid asyncio.run noise. + loop = asyncio.get_running_loop() + try: + loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) + loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) + except (NotImplementedError, RuntimeError): + pass + + try: + await server.start(f) + print("Shutting down gracefully...") + except asyncio.CancelledError: + try: + server.stop() + except Exception: + pass + return + + +if __name__ == "__main__": + async_handler = SimpleStreamCat() + asyncio.run(start(async_handler)) + diff --git a/packages/pynumaflow-lite/manifests/mapstream/pipeline.yaml b/packages/pynumaflow-lite/manifests/mapstream/pipeline.yaml new file mode 100644 index 00000000..5e90375b --- /dev/null +++ b/packages/pynumaflow-lite/manifests/mapstream/pipeline.yaml @@ -0,0 +1,31 @@ +apiVersion: numaflow.numaproj.io/v1alpha1 +kind: Pipeline +metadata: + name: mapstream-cat +spec: + vertices: + - name: in + source: + # A self data generating source + generator: + rpu: 500 + duration: 1s + - name: mapstream + partitions: 2 + scale: + min: 1 + udf: + container: + image: quay.io/numaio/numaflow/pynumaflow-lite-mapstream-cat:v2 + imagePullPolicy: Never + - name: sink + scale: + min: 1 + sink: + log: { } + edges: + - from: in + to: mapstream + - from: mapstream + to: sink + diff --git a/packages/pynumaflow-lite/manifests/mapstream/pyproject.toml b/packages/pynumaflow-lite/manifests/mapstream/pyproject.toml new file mode 100644 index 00000000..73b6aba5 --- /dev/null +++ b/packages/pynumaflow-lite/manifests/mapstream/pyproject.toml @@ -0,0 +1,16 @@ +[project] +name = "mapstream-cat" +version = "0.1.0" +description = "Testing Python FFI" +authors = [ + { name = "Vigith Maurice", email = "vigith@gmail.com" } +] +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ +] + + +[build-system] +requires = ["poetry-core>=2.0.0,<3.0.0"] +build-backend = "poetry.core.masonry.api" \ No newline at end of file diff --git a/packages/pynumaflow-lite/manifests/reduce/Dockerfile b/packages/pynumaflow-lite/manifests/reduce/Dockerfile new file mode 100644 index 00000000..fa1417eb --- /dev/null +++ b/packages/pynumaflow-lite/manifests/reduce/Dockerfile @@ -0,0 +1,39 @@ +FROM python:3.11-slim-bullseye AS builder + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PIP_NO_CACHE_DIR=on \ + PIP_DISABLE_PIP_VERSION_CHECK=on \ + PIP_DEFAULT_TIMEOUT=100 \ + POETRY_HOME="/opt/poetry" \ + POETRY_VIRTUALENVS_IN_PROJECT=true \ + POETRY_NO_INTERACTION=1 \ + PYSETUP_PATH="/opt/pysetup" + +ENV PATH="$POETRY_HOME/bin:$PATH" + +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + curl \ + wget \ + # deps for building python deps + build-essential \ + && apt-get install -y git \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + && curl -sSL https://install.python-poetry.org | python3 - + +FROM builder AS udf + +WORKDIR $PYSETUP_PATH +COPY ./ ./ + +# NOTE: place the built wheel in this directory before building the image +RUN pip install $PYSETUP_PATH/pynumaflow_lite-0.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl + +RUN poetry lock +RUN poetry install --no-cache --no-root && \ + rm -rf ~/.cache/pypoetry/ + +CMD ["python", "reduce_counter_class.py"] + diff --git a/packages/pynumaflow-lite/manifests/reduce/README.md b/packages/pynumaflow-lite/manifests/reduce/README.md new file mode 100644 index 00000000..767e1468 --- /dev/null +++ b/packages/pynumaflow-lite/manifests/reduce/README.md @@ -0,0 +1,37 @@ +To create the `wheel` file, refer [root](../../README.md) + +## HOWTO build Image + +```bash +docker build . -t quay.io/numaio/numaflow/pynumaflow-lite-reduce-counter:v1 --load +``` + +Load it now to `k3d` + +### `k3d` + +```bash +k3d image import quay.io/numaio/numaflow/pynumaflow-lite-reduce-counter:v1 +``` + +### Minikube + +```bash +minikube image load quay.io/numaio/numaflow/pynumaflow-lite-reduce-counter:v1 +``` + +#### Delete image from minikube + +`minikube` doesn't like pushing the same image over, delete and load if you are using +the same tag. + +```bash +minikube image rm quay.io/numaio/numaflow/pynumaflow-lite-reduce-counter:v1 +``` + +## Run the pipeline + +```bash +kubectl apply -f pipeline.yaml +``` + diff --git a/packages/pynumaflow-lite/manifests/reduce/pipeline.yaml b/packages/pynumaflow-lite/manifests/reduce/pipeline.yaml new file mode 100644 index 00000000..1fd22fc9 --- /dev/null +++ b/packages/pynumaflow-lite/manifests/reduce/pipeline.yaml @@ -0,0 +1,36 @@ +apiVersion: numaflow.numaproj.io/v1alpha1 +kind: Pipeline +metadata: + name: reduce-counter +spec: + vertices: + - name: in + source: + # A self data generating source + generator: + rpu: 500 + duration: 1s + - name: reduce + partitions: 1 # keyed: false + udf: + container: + image: quay.io/numaio/numaflow/pynumaflow-lite-reduce-counter:v1 + imagePullPolicy: Never + groupBy: + window: + fixed: + length: 10s + keyed: false + storage: + emptyDir: { } + - name: sink + scale: + min: 1 + sink: + log: { } + edges: + - from: in + to: reduce + - from: reduce + to: sink + diff --git a/packages/pynumaflow-lite/manifests/reduce/pyproject.toml b/packages/pynumaflow-lite/manifests/reduce/pyproject.toml new file mode 100644 index 00000000..e36b2026 --- /dev/null +++ b/packages/pynumaflow-lite/manifests/reduce/pyproject.toml @@ -0,0 +1,16 @@ +[project] +name = "reduce-counter" +version = "0.1.0" +description = "Reduce counter example using pynumaflow-lite" +authors = [ + { name = "Vigith Maurice", email = "vigith@gmail.com" } +] +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ +] + +[build-system] +requires = ["poetry-core>=2.0.0,<3.0.0"] +build-backend = "poetry.core.masonry.api" + diff --git a/packages/pynumaflow-lite/manifests/reduce/reduce_counter_class.py b/packages/pynumaflow-lite/manifests/reduce/reduce_counter_class.py new file mode 100644 index 00000000..a10fb346 --- /dev/null +++ b/packages/pynumaflow-lite/manifests/reduce/reduce_counter_class.py @@ -0,0 +1,60 @@ +import asyncio +import signal +from collections.abc import AsyncIterable + +from pynumaflow_lite import reducer + + +class ReduceCounter(reducer.Reducer): + def __init__(self, initial: int = 0) -> None: + self.counter = initial + + async def handler( + self, keys: list[str], datums: AsyncIterable[reducer.Datum], md: reducer.Metadata + ) -> reducer.Messages: + iw = md.interval_window + self.counter = 0 + async for _ in datums: + self.counter += 1 + msg = ( + f"counter:{self.counter} interval_window_start:{iw.start} interval_window_end:{iw.end}" + ).encode() + out = reducer.Messages() + out.append(reducer.Message(msg, keys)) + return out + + +# Optional: ensure default signal handlers are in place so asyncio.run can handle them cleanly. +signal.signal(signal.SIGINT, signal.default_int_handler) +try: + signal.signal(signal.SIGTERM, signal.SIG_DFL) +except AttributeError: + pass + + +async def start(creator: type, init_args: tuple): + sock_file = "/var/run/numaflow/reduce.sock" + server_info_file = "/var/run/numaflow/reducer-server-info" + server = reducer.ReduceAsyncServer(sock_file, server_info_file) + + loop = asyncio.get_running_loop() + try: + loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) + loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) + except (NotImplementedError, RuntimeError): + pass + + try: + await server.start(creator, init_args) + print("Shutting down gracefully...") + except asyncio.CancelledError: + try: + server.stop() + except Exception: + pass + return + + +if __name__ == "__main__": + asyncio.run(start(ReduceCounter, (0,))) + diff --git a/packages/pynumaflow-lite/manifests/session_reduce/Dockerfile b/packages/pynumaflow-lite/manifests/session_reduce/Dockerfile new file mode 100644 index 00000000..a5f6bdda --- /dev/null +++ b/packages/pynumaflow-lite/manifests/session_reduce/Dockerfile @@ -0,0 +1,39 @@ +FROM python:3.11-slim-bullseye AS builder + +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PIP_NO_CACHE_DIR=on \ + PIP_DISABLE_PIP_VERSION_CHECK=on \ + PIP_DEFAULT_TIMEOUT=100 \ + POETRY_HOME="/opt/poetry" \ + POETRY_VIRTUALENVS_IN_PROJECT=true \ + POETRY_NO_INTERACTION=1 \ + PYSETUP_PATH="/opt/pysetup" + +ENV PATH="$POETRY_HOME/bin:$PATH" + +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + curl \ + wget \ + # deps for building python deps + build-essential \ + && apt-get install -y git \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ + && curl -sSL https://install.python-poetry.org | python3 - + +FROM builder AS udf + +WORKDIR $PYSETUP_PATH +COPY ./ ./ + +# NOTE: place the built wheel in this directory before building the image +RUN pip install $PYSETUP_PATH/pynumaflow_lite-0.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl + +RUN poetry lock +RUN poetry install --no-cache --no-root && \ + rm -rf ~/.cache/pypoetry/ + +CMD ["python", "session_reduce_counter_class.py"] + diff --git a/packages/pynumaflow-lite/manifests/session_reduce/README.md b/packages/pynumaflow-lite/manifests/session_reduce/README.md new file mode 100644 index 00000000..61b174c7 --- /dev/null +++ b/packages/pynumaflow-lite/manifests/session_reduce/README.md @@ -0,0 +1,37 @@ +To create the `wheel` file, refer [root](../../README.md) + +## HOWTO build Image + +```bash +docker build . -t quay.io/numaio/numaflow/pynumaflow-lite-session-reduce-counter:v1 --load +``` + +Load it now to `k3d` + +### `k3d` + +```bash +k3d image import quay.io/numaio/numaflow/pynumaflow-lite-session-reduce-counter:v1 +``` + +### Minikube + +```bash +minikube image load quay.io/numaio/numaflow/pynumaflow-lite-session-reduce-counter:v1 +``` + +#### Delete image from minikube + +`minikube` doesn't like pushing the same image over, delete and load if you are using +the same tag. + +```bash +minikube image rm quay.io/numaio/numaflow/pynumaflow-lite-session-reduce-counter:v1 +``` + +## Run the pipeline + +```bash +kubectl apply -f pipeline.yaml +``` + diff --git a/packages/pynumaflow-lite/manifests/session_reduce/pipeline.yaml b/packages/pynumaflow-lite/manifests/session_reduce/pipeline.yaml new file mode 100644 index 00000000..a7e25ede --- /dev/null +++ b/packages/pynumaflow-lite/manifests/session_reduce/pipeline.yaml @@ -0,0 +1,39 @@ +apiVersion: numaflow.numaproj.io/v1alpha1 +kind: Pipeline +metadata: + name: simple-session-reduce +spec: + watermark: + idleSource: + threshold: 5s + incrementBy: 3s + stepInterval: 2s + vertices: + - name: in + source: + # A self data generating source + http: { } + - name: session-counter + udf: + container: + # count element in sessions + image: quay.io/numaio/numaflow/pynumaflow-lite-session-reduce-counter:v1 + groupBy: + window: + session: + timeout: 30s + keyed: true + storage: + persistentVolumeClaim: + volumeSize: 1Gi + accessMode: ReadWriteOnce + - name: out + sink: + # A simple log printing sink + log: { } + edges: + - from: in + to: session-counter + - from: session-counter + to: out + diff --git a/packages/pynumaflow-lite/manifests/session_reduce/pyproject.toml b/packages/pynumaflow-lite/manifests/session_reduce/pyproject.toml new file mode 100644 index 00000000..9f034274 --- /dev/null +++ b/packages/pynumaflow-lite/manifests/session_reduce/pyproject.toml @@ -0,0 +1,16 @@ +[project] +name = "session-reduce-counter" +version = "0.1.0" +description = "Session reduce counter example using pynumaflow-lite" +authors = [ + { name = "Vigith Maurice", email = "vigith@gmail.com" } +] +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ +] + +[build-system] +requires = ["poetry-core>=2.0.0,<3.0.0"] +build-backend = "poetry.core.masonry.api" + diff --git a/packages/pynumaflow-lite/manifests/session_reduce/session_reduce_counter_class.py b/packages/pynumaflow-lite/manifests/session_reduce/session_reduce_counter_class.py new file mode 100644 index 00000000..d43f8c41 --- /dev/null +++ b/packages/pynumaflow-lite/manifests/session_reduce/session_reduce_counter_class.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +""" +Example session reduce handler that counts messages per session. + +This demonstrates: +- session_reduce: counts incoming messages and yields the count +- accumulator: returns current count as bytes +- merge_accumulator: merges counts from another session +""" + +import asyncio +import signal +from collections.abc import AsyncIterable, AsyncIterator + +from pynumaflow_lite import session_reducer + + +class SessionReduceCounter(session_reducer.SessionReducer): + """ + A session reducer that counts all messages in a session. + When sessions are merged, the counts are added together. + """ + + def __init__(self, initial: int = 0) -> None: + self.counter = initial + + async def session_reduce( + self, keys: list[str], datums: AsyncIterable[session_reducer.Datum] + ) -> AsyncIterator[session_reducer.Message]: + """ + Count all incoming messages in this session and yield the count. + """ + # Count all incoming messages in this session + async for _ in datums: + self.counter += 1 + + # Emit the current count as bytes with the same keys + yield session_reducer.Message(str(self.counter).encode(), keys) + + async def accumulator(self) -> bytes: + """ + Return current count as bytes for merging with other sessions. + """ + return str(self.counter).encode() + + async def merge_accumulator(self, accumulator: bytes) -> None: + """ + Parse serialized accumulator and add to our count. + """ + try: + self.counter += int(accumulator.decode("utf-8"), 10) + except Exception as e: + import sys + + print(f"Failed to parse accumulator value: {e}", file=sys.stderr) + + +async def main(): + """ + Start the session reduce server. + """ + server = session_reducer.SessionReduceAsyncServer() + # Set up signal handlers for graceful shutdown + loop = asyncio.get_running_loop() + try: + loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) + loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) + except (NotImplementedError, RuntimeError): + pass + + try: + await server.start(SessionReduceCounter) + print("Shutting down gracefully...") + except asyncio.CancelledError: + try: + server.stop() + except Exception: + pass + return + + +# Optional: ensure default signal handlers are in place so asyncio.run can handle them cleanly. +signal.signal(signal.SIGINT, signal.default_int_handler) +try: + signal.signal(signal.SIGTERM, signal.SIG_DFL) +except AttributeError: + pass + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/packages/pynumaflow-lite/pynumaflow_lite/__init__.py b/packages/pynumaflow-lite/pynumaflow_lite/__init__.py new file mode 100644 index 00000000..680d066b --- /dev/null +++ b/packages/pynumaflow-lite/pynumaflow_lite/__init__.py @@ -0,0 +1,86 @@ +from .pynumaflow_lite import * + +# Ensure the `mapper`, `batchmapper`, and `mapstreamer` submodules are importable as attributes of the package +# even though they're primarily registered by the extension module. +try: + from importlib import import_module as _import_module + + mapper = _import_module(__name__ + ".mapper") +except Exception: # pragma: no cover - avoid hard failures if extension not built + mapper = None + +try: + batchmapper = _import_module(__name__ + ".batchmapper") +except Exception: # pragma: no cover + batchmapper = None + +try: + mapstreamer = _import_module(__name__ + ".mapstreamer") +except Exception: # pragma: no cover + mapstreamer = None +try: + reducer = _import_module(__name__ + ".reducer") +except Exception: # pragma: no cover + reducer = None + +try: + session_reducer = _import_module(__name__ + ".session_reducer") +except Exception: # pragma: no cover + session_reducer = None + +try: + accumulator = _import_module(__name__ + ".accumulator") +except Exception: # pragma: no cover + accumulator = None + +# Surface the Python Mapper, BatchMapper, MapStreamer, Reducer, SessionReducer, and Accumulator classes under the extension submodules for convenient access +from ._map_dtypes import Mapper +from ._batchmapper_dtypes import BatchMapper +from ._mapstream_dtypes import MapStreamer +from ._reduce_dtypes import Reducer +from ._session_reduce_dtypes import SessionReducer +from ._accumulator_dtypes import Accumulator + +if mapper is not None: + try: + setattr(mapper, "Mapper", Mapper) + except Exception: + pass + +if batchmapper is not None: + try: + setattr(batchmapper, "BatchMapper", BatchMapper) + except Exception: + pass + +if mapstreamer is not None: + try: + setattr(mapstreamer, "MapStreamer", MapStreamer) + except Exception: + pass + +if reducer is not None: + try: + setattr(reducer, "Reducer", Reducer) + except Exception: + pass + +if session_reducer is not None: + try: + setattr(session_reducer, "SessionReducer", SessionReducer) + except Exception: + pass + +if accumulator is not None: + try: + setattr(accumulator, "Accumulator", Accumulator) + except Exception: + pass + +# Public API +__all__ = ["mapper", "batchmapper", "mapstreamer", "reducer", "session_reducer", "accumulator"] + +__doc__ = pynumaflow_lite.__doc__ +if hasattr(pynumaflow_lite, "__all__"): + # Merge to keep our package-level exports + __all__ = list(set(__all__) | set(pynumaflow_lite.__all__)) diff --git a/packages/pynumaflow-lite/pynumaflow_lite/__init__.pyi b/packages/pynumaflow-lite/pynumaflow_lite/__init__.pyi new file mode 100644 index 00000000..7bcf17db --- /dev/null +++ b/packages/pynumaflow-lite/pynumaflow_lite/__init__.pyi @@ -0,0 +1,12 @@ +import _typeshed + +def __getattr__(name: str) -> _typeshed.Incomplete: ... + +from . import mapper as mapper +from . import batchmapper as batchmapper +from . import mapstreamer as mapstreamer +from . import reducer as reducer +from . import session_reducer as session_reducer +from . import accumulator as accumulator + +__all__ = ['mapper', 'batchmapper', 'mapstreamer', 'reducer', 'session_reducer', 'accumulator'] diff --git a/packages/pynumaflow-lite/pynumaflow_lite/_accumulator_dtypes.py b/packages/pynumaflow-lite/pynumaflow_lite/_accumulator_dtypes.py new file mode 100644 index 00000000..a6250647 --- /dev/null +++ b/packages/pynumaflow-lite/pynumaflow_lite/_accumulator_dtypes.py @@ -0,0 +1,34 @@ +from abc import ABCMeta, abstractmethod +from typing import AsyncIterator + +from pynumaflow_lite.accumulator import Datum, Message + + +class Accumulator(metaclass=ABCMeta): + """ + Provides an interface to write an Accumulator + which will be exposed over a gRPC server. + """ + + def __call__(self, *args, **kwargs): + """ + This allows to execute the handler function directly if + class instance is sent as a callable. + """ + return self.handler(*args, **kwargs) + + @abstractmethod + async def handler(self, datums: AsyncIterator[Datum]) -> AsyncIterator[Message]: + """ + Accumulate can read unordered from the input stream and emit the ordered data to the output stream. + Once the watermark (WM) of the output stream progresses, the data in WAL until that WM will be garbage collected. + NOTE: A message can be silently dropped if need be, and it will be cleared from the WAL when the WM progresses. + + Args: + datums: An async iterator of Datum objects + + Yields: + Message objects to be sent to the next vertex + """ + pass + diff --git a/packages/pynumaflow-lite/pynumaflow_lite/_batchmapper_dtypes.py b/packages/pynumaflow-lite/pynumaflow_lite/_batchmapper_dtypes.py new file mode 100644 index 00000000..6fe8d1da --- /dev/null +++ b/packages/pynumaflow-lite/pynumaflow_lite/_batchmapper_dtypes.py @@ -0,0 +1,20 @@ +from abc import ABCMeta, abstractmethod +from pynumaflow_lite.batchmapper import Datum, BatchResponse +from collections.abc import AsyncIterable + + +class BatchMapper(metaclass=ABCMeta): + """ + Provides an interface to write a BatchMap servicer. + """ + + def __call__(self, *args, **kwargs): + return self.handler(*args, **kwargs) + + @abstractmethod + async def handler(self, batch: AsyncIterable[Datum]) -> list[BatchResponse]: + """ + Implement this handler function for batch mapping. + The returned list length should equal the input batch size. + """ + pass diff --git a/packages/pynumaflow-lite/pynumaflow_lite/_map_dtypes.py b/packages/pynumaflow-lite/pynumaflow_lite/_map_dtypes.py new file mode 100644 index 00000000..6c7207c5 --- /dev/null +++ b/packages/pynumaflow-lite/pynumaflow_lite/_map_dtypes.py @@ -0,0 +1,23 @@ +from abc import ABCMeta, abstractmethod +from pynumaflow_lite.mapper import Datum, Messages + + +class Mapper(metaclass=ABCMeta): + """ + Provides an interface to write a SyncMapServicer + which will be exposed over a Synchronous gRPC server. + """ + + def __call__(self, *args, **kwargs): + """ + This allows to execute the handler function directly if + class instance is sent as a callable. + """ + return self.handler(*args, **kwargs) + + @abstractmethod + async def handler(self, keys: list[str], payload: Datum) -> Messages: + """ + Implement this handler function which implements the MapAsyncCallable interface. + """ + pass diff --git a/packages/pynumaflow-lite/pynumaflow_lite/_mapstream_dtypes.py b/packages/pynumaflow-lite/pynumaflow_lite/_mapstream_dtypes.py new file mode 100644 index 00000000..bc4608ec --- /dev/null +++ b/packages/pynumaflow-lite/pynumaflow_lite/_mapstream_dtypes.py @@ -0,0 +1,22 @@ +from abc import ABCMeta, abstractmethod +from pynumaflow_lite.mapstreamer import Datum, Message +from collections.abc import AsyncIterator + + +class MapStreamer(metaclass=ABCMeta): + """ + Provides an interface to write a streaming map servicer. + The handler yields outputs incrementally as an async iterator. + """ + + def __call__(self, *args, **kwargs): + return self.handler(*args, **kwargs) + + @abstractmethod + async def handler(self, keys: list[str], datum: Datum) -> AsyncIterator[Message]: + """ + Implement this handler function for streaming mapping. + It should be an async generator yielding Message objects. + """ + pass + diff --git a/packages/pynumaflow-lite/pynumaflow_lite/_reduce_dtypes.py b/packages/pynumaflow-lite/pynumaflow_lite/_reduce_dtypes.py new file mode 100644 index 00000000..cef2537b --- /dev/null +++ b/packages/pynumaflow-lite/pynumaflow_lite/_reduce_dtypes.py @@ -0,0 +1,20 @@ +from abc import ABCMeta, abstractmethod +from pynumaflow_lite.reducer import Datum, Messages, Metadata +from collections.abc import AsyncIterable + + +class Reducer(metaclass=ABCMeta): + """ + Interface for reduce handlers. A new instance will be created per window. + """ + + def __call__(self, *args, **kwargs): + return self.handler(*args, **kwargs) + + @abstractmethod + async def handler(self, keys: list[str], datums: AsyncIterable[Datum], md: Metadata) -> Messages: + """ + Implement this handler; consume `datums` async iterable and return Messages. + """ + pass + diff --git a/packages/pynumaflow-lite/pynumaflow_lite/_session_reduce_dtypes.py b/packages/pynumaflow-lite/pynumaflow_lite/_session_reduce_dtypes.py new file mode 100644 index 00000000..440ccc33 --- /dev/null +++ b/packages/pynumaflow-lite/pynumaflow_lite/_session_reduce_dtypes.py @@ -0,0 +1,34 @@ +from abc import ABCMeta, abstractmethod +from collections.abc import AsyncIterator + +from pynumaflow_lite.session_reducer import Datum, Message + + +class SessionReducer(metaclass=ABCMeta): + """ + Interface for session reduce handlers. A new instance will be created per keyed window. + """ + + @abstractmethod + async def session_reduce( + self, keys: list[str], datums: AsyncIterator[Datum] + ) -> AsyncIterator[Message]: + """ + Implement this handler; consume `datums` async iterable and yield Messages. + This is called for each session window. + """ + pass + + @abstractmethod + async def accumulator(self) -> bytes: + """ + Return the current state as bytes. Called when this session is merged with another. + """ + pass + + @abstractmethod + async def merge_accumulator(self, accumulator: bytes) -> None: + """ + Merge the given accumulator (from another session) into this session's state. + """ + pass diff --git a/packages/pynumaflow-lite/pynumaflow_lite/accumulator.pyi b/packages/pynumaflow-lite/pynumaflow_lite/accumulator.pyi new file mode 100644 index 00000000..13493ace --- /dev/null +++ b/packages/pynumaflow-lite/pynumaflow_lite/accumulator.pyi @@ -0,0 +1,118 @@ +from datetime import datetime +from typing import AsyncIterator, Optional + +class Message: + """ + A message to be sent to the next vertex from an accumulator handler. + """ + + keys: Optional[list[str]] + value: bytes + tags: Optional[list[str]] + id: str + headers: dict[str, str] + event_time: datetime + watermark: datetime + + def __init__( + self, + value: bytes, + keys: list[str] | None = None, + tags: list[str] | None = None, + id: str = "", + headers: dict[str, str] | None = None, + event_time: datetime | None = None, + watermark: datetime | None = None, + ) -> None: ... + @staticmethod + def message_to_drop() -> Message: + """ + Drop a Message, do not forward to the next vertex. + """ + ... + @staticmethod + def from_datum( + datum: Datum, + value: bytes | None = None, + keys: list[str] | None = None, + tags: list[str] | None = None, + ) -> Message: + """ + Create a Message from a Datum, preserving all metadata (id, headers, event_time, watermark). + + Args: + datum: The source Datum to copy metadata from + value: Optional new value (defaults to datum.value) + keys: Optional new keys (defaults to datum.keys) + tags: Optional tags for conditional forwarding + + Returns: + A new Message with metadata from the datum + """ + ... + +class Datum: + """ + The incoming AccumulatorRequest accessible in Python function (streamed). + """ + + keys: list[str] + value: bytes + watermark: datetime + event_time: datetime + headers: dict[str, str] + id: str + +class AccumulatorAsyncServer: + """ + Async Accumulator Server that can be started from Python code. + """ + + def __init__( + self, + sock_file: str | None = "/var/run/numaflow/accumulator.sock", + info_file: str | None = "/var/run/numaflow/accumulator-server-info", + ) -> None: ... + async def start( + self, py_creator: object, init_args: object | None = None + ) -> None: + """ + Start the server with the given Python class (creator). + + Args: + py_creator: The Python class to instantiate per key + init_args: Optional tuple of positional arguments for class instantiation + """ + ... + def stop(self) -> None: + """ + Trigger server shutdown from Python (idempotent). + """ + ... + +class PyAsyncDatumStream: + """ + Python-visible async iterator that yields Datum items from a Tokio mpsc channel. + """ + + def __init__(self) -> None: ... + def __aiter__(self) -> PyAsyncDatumStream: ... + def __anext__(self) -> Datum: ... + +class Accumulator: + """ + Base class for implementing an Accumulator. + """ + + async def handler(self, datums: AsyncIterator[Datum]) -> AsyncIterator[Message]: + """ + Accumulate can read unordered from the input stream and emit the ordered data to the output stream. + + Args: + datums: An async iterator of Datum objects + + Yields: + Message objects to be sent to the next vertex + """ + ... + diff --git a/packages/pynumaflow-lite/pynumaflow_lite/batchmapper.pyi b/packages/pynumaflow-lite/pynumaflow_lite/batchmapper.pyi new file mode 100644 index 00000000..c63a5328 --- /dev/null +++ b/packages/pynumaflow-lite/pynumaflow_lite/batchmapper.pyi @@ -0,0 +1,76 @@ +from __future__ import annotations + +from typing import Optional, List, Dict, Callable, Awaitable, Any, AsyncIterator +import datetime as _dt + + +class Message: + keys: Optional[List[str]] + value: bytes + tags: Optional[List[str]] + + def __init__( + self, + value: bytes, + keys: Optional[List[str]] = ..., + tags: Optional[List[str]] = ..., + ) -> None: ... + + @staticmethod + def message_to_drop() -> Message: ... + + +class Datum: + keys: List[str] + value: bytes + watermark: _dt.datetime + eventtime: _dt.datetime + id: str + headers: Dict[str, str] + + def __repr__(self) -> str: ... + + def __str__(self) -> str: ... + + +class BatchResponse: + id: str + + def __init__(self, id: str) -> None: ... + + @staticmethod + def from_id(id: str) -> BatchResponse: ... + + def append(self, message: Message) -> None: ... + + +class BatchResponses: + def __init__(self) -> None: ... + + def append(self, response: BatchResponse) -> None: ... + + +class BatchMapAsyncServer: + def __init__( + self, + sock_file: str | None = ..., + info_file: str | None = ..., + ) -> None: ... + + def start(self, py_func: Callable[..., Any]) -> Awaitable[None]: ... + + def stop(self) -> None: ... + + +class BatchMapper: + async def handler(self, batch: AsyncIterator[Datum]) -> BatchResponses: ... + + +__all__ = [ + "Message", + "Datum", + "BatchResponse", + "BatchResponses", + "BatchMapAsyncServer", + "BatchMapper", +] diff --git a/packages/pynumaflow-lite/pynumaflow_lite/mapper.pyi b/packages/pynumaflow-lite/pynumaflow_lite/mapper.pyi new file mode 100644 index 00000000..3d9e2977 --- /dev/null +++ b/packages/pynumaflow-lite/pynumaflow_lite/mapper.pyi @@ -0,0 +1,70 @@ +from __future__ import annotations + +from typing import Optional, List, Dict, Callable, Awaitable, Any +import datetime as _dt + +# Re-export the Python ABC for user convenience and typing +from ._map_dtypes import Mapper as Mapper + + +class Messages: + def __init__(self) -> None: ... + + def append(self, message: Message) -> None: ... + + def __repr__(self) -> str: ... + + def __str__(self) -> str: ... + + +class Message: + keys: Optional[List[str]] + value: bytes + tags: Optional[List[str]] + + def __init__( + self, + value: bytes, + keys: Optional[List[str]] = ..., + tags: Optional[List[str]] = ..., + ) -> None: ... + + @staticmethod + def message_to_drop() -> Message: ... + + +class Datum: + # Read-only attributes provided by the extension + keys: List[str] + value: bytes + watermark: _dt.datetime + eventtime: _dt.datetime + headers: Dict[str, str] + + def __repr__(self) -> str: ... + + def __str__(self) -> str: ... + + +class MapAsyncServer: + def __init__( + self, + sock_file: str | None = ..., + info_file: str | None = ..., + ) -> None: ... + + def start(self, py_func: Callable[..., Any]) -> Awaitable[None]: ... + + def stop(self) -> None: ... + + +# Simple utility function exposed by the extension + + +__all__ = [ + "Messages", + "Message", + "Datum", + "MapAsyncServer", + "Mapper", +] diff --git a/packages/pynumaflow-lite/pynumaflow_lite/mapstreamer.pyi b/packages/pynumaflow-lite/pynumaflow_lite/mapstreamer.pyi new file mode 100644 index 00000000..c7601f7f --- /dev/null +++ b/packages/pynumaflow-lite/pynumaflow_lite/mapstreamer.pyi @@ -0,0 +1,62 @@ +from __future__ import annotations + +from typing import Optional, List, Dict, Callable, Awaitable, Any, AsyncIterator +import datetime as _dt + +# Re-export the Python ABC for user convenience and typing +from ._stream_dtypes import MapStreamer as MapStreamer + + +class Message: + keys: Optional[List[str]] + value: bytes + tags: Optional[List[str]] + + def __init__( + self, + value: bytes, + keys: Optional[List[str]] = ..., + tags: Optional[List[str]] = ..., + ) -> None: ... + + @staticmethod + def message_to_drop() -> Message: ... + + @staticmethod + def to_drop() -> Message: ... + + +class Datum: + keys: List[str] + value: bytes + watermark: _dt.datetime + eventtime: _dt.datetime + headers: Dict[str, str] + + def __repr__(self) -> str: ... + + def __str__(self) -> str: ... + + +class MapStreamAsyncServer: + def __init__( + self, + sock_file: str | None = ..., + info_file: str | None = ..., + ) -> None: ... + + def start(self, py_func: Callable[..., Any]) -> Awaitable[None]: ... + + def stop(self) -> None: ... + + +class MapStreamer: + async def handler(self, keys: list[str], datum: Datum) -> AsyncIterator[Message]: ... + + +__all__ = [ + "Message", + "Datum", + "MapStreamAsyncServer", + "MapStreamer", +] diff --git a/packages/pynumaflow-lite/pynumaflow_lite/py.typed b/packages/pynumaflow-lite/pynumaflow_lite/py.typed new file mode 100644 index 00000000..8521f826 --- /dev/null +++ b/packages/pynumaflow-lite/pynumaflow_lite/py.typed @@ -0,0 +1,2 @@ +# This is generated by AI Agent, we will have to redo when Typing Toolset is integrated with `maturin`. +# More details in: https://pyo3.rs/main/type-stub.html \ No newline at end of file diff --git a/packages/pynumaflow-lite/pynumaflow_lite/reducer.pyi b/packages/pynumaflow-lite/pynumaflow_lite/reducer.pyi new file mode 100644 index 00000000..907261e4 --- /dev/null +++ b/packages/pynumaflow-lite/pynumaflow_lite/reducer.pyi @@ -0,0 +1,80 @@ +from __future__ import annotations + +import datetime as _dt +from typing import Optional, List, Dict, Awaitable + +# Re-export the Python ABC for user convenience and typing +from ._reduce_dtypes import Reducer as Reducer + + +class Message: + keys: Optional[List[str]] + value: bytes + tags: Optional[List[str]] + + def __init__( + self, + value: bytes, + keys: Optional[List[str]] = ..., + tags: Optional[List[str]] = ..., + ) -> None: ... + + @staticmethod + def message_to_drop() -> Message: ... + + @staticmethod + def to_drop() -> Message: ... + + +class Messages: + def __init__(self) -> None: ... + + def append(self, message: Message) -> None: ... + + def __repr__(self) -> str: ... + + def __str__(self) -> str: ... + + +class Datum: + keys: List[str] + value: bytes + watermark: _dt.datetime + eventtime: _dt.datetime + headers: Dict[str, str] + + def __repr__(self) -> str: ... + + def __str__(self) -> str: ... + + +class IntervalWindow: + start: _dt.datetime + end: _dt.datetime + + +class Metadata: + interval_window: IntervalWindow + + +class ReduceAsyncServer: + def __init__( + self, + sock_file: str | None = ..., + info_file: str | None = ..., + ) -> None: ... + + def start(self, py_creator: type, init_args: tuple | None = ...) -> Awaitable[None]: ... + + def stop(self) -> None: ... + + +__all__ = [ + "Message", + "Messages", + "Datum", + "IntervalWindow", + "Metadata", + "ReduceAsyncServer", + "Reducer", +] diff --git a/packages/pynumaflow-lite/pynumaflow_lite/session_reducer.pyi b/packages/pynumaflow-lite/pynumaflow_lite/session_reducer.pyi new file mode 100644 index 00000000..e63d52a2 --- /dev/null +++ b/packages/pynumaflow-lite/pynumaflow_lite/session_reducer.pyi @@ -0,0 +1,56 @@ +from __future__ import annotations + +import datetime as _dt +from typing import Optional, List, Dict, Awaitable + +# Re-export the Python ABC for user convenience and typing +from ._session_reduce_dtypes import SessionReducer as SessionReducer + + +class Message: + keys: Optional[List[str]] + value: bytes + tags: Optional[List[str]] + + def __init__( + self, + value: bytes, + keys: Optional[List[str]] = ..., + tags: Optional[List[str]] = ..., + ) -> None: ... + + @staticmethod + def message_to_drop() -> Message: ... + + +class Datum: + keys: List[str] + value: bytes + watermark: _dt.datetime + eventtime: _dt.datetime + headers: Dict[str, str] + + def __repr__(self) -> str: ... + + def __str__(self) -> str: ... + + +class SessionReduceAsyncServer: + def __init__( + self, + sock_file: str | None = ..., + info_file: str | None = ..., + ) -> None: ... + + def start(self, py_creator: type, init_args: tuple | None = ...) -> Awaitable[None]: ... + + def stop(self) -> None: ... + + +__all__ = [ + "Message", + "Datum", + "SessionReduceAsyncServer", + "SessionReducer", +] + diff --git a/packages/pynumaflow-lite/pyproject.toml b/packages/pynumaflow-lite/pyproject.toml new file mode 100644 index 00000000..6784ba57 --- /dev/null +++ b/packages/pynumaflow-lite/pyproject.toml @@ -0,0 +1,20 @@ +[build-system] +requires = ["maturin>=1.8,<2.0"] +build-backend = "maturin" + +[project] +name = "pynumaflow-lite" +requires-python = ">=3.8" +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dynamic = ["version"] +[tool.maturin] +features = ["pyo3/extension-module"] + +[dependency-groups] +dev = [ + "pytest>=8.3.5", +] diff --git a/packages/pynumaflow-lite/src/accumulate/mod.rs b/packages/pynumaflow-lite/src/accumulate/mod.rs new file mode 100644 index 00000000..18b28db6 --- /dev/null +++ b/packages/pynumaflow-lite/src/accumulate/mod.rs @@ -0,0 +1,296 @@ +use chrono::{DateTime, Utc}; +use numaflow::accumulator; +use std::collections::HashMap; +use std::sync::Mutex; + +pub mod server; + +use pyo3::prelude::*; +use tokio::sync::mpsc; + +/// A message to be sent to the next vertex from an accumulator handler. +#[pyclass(module = "pynumaflow_lite.accumulator")] +#[derive(Clone, Debug)] +pub struct Message { + #[pyo3(get)] + /// Keys are a collection of strings which will be passed on to the next vertex as is. It can + /// be an empty collection. + pub keys: Option>, + #[pyo3(get)] + /// Value is the value passed to the next vertex. + pub value: Vec, + #[pyo3(get)] + /// Tags are used for [conditional forwarding](https://numaflow.numaproj.io/user-guide/reference/conditional-forwarding/). + pub tags: Option>, + #[pyo3(get)] + /// ID is used for deduplication. Read-only, set from the input datum. + pub id: String, + #[pyo3(get)] + /// Headers for the message. Read-only, set from the input datum. + pub headers: HashMap, + #[pyo3(get)] + /// Time of the element as seen at source or aligned after a reduce operation. Read-only, set from the input datum. + pub event_time: DateTime, + #[pyo3(get)] + /// Watermark represented by time is a guarantee that we will not see an element older than this time. Read-only, set from the input datum. + pub watermark: DateTime, +} + +#[pymethods] +impl Message { + #[new] + #[pyo3(signature = ( + value, + keys=None, + tags=None, + id=String::new(), + headers=HashMap::new(), + event_time=chrono::Utc::now(), + watermark=chrono::Utc::now() + ))] + fn new( + value: Vec, + keys: Option>, + tags: Option>, + id: String, + headers: HashMap, + event_time: DateTime, + watermark: DateTime, + ) -> Self { + Self { + keys, + value, + tags, + id, + headers, + event_time, + watermark, + } + } + + /// Drop a Message, do not forward to the next vertex. + #[pyo3(signature = ())] + #[staticmethod] + fn message_to_drop() -> Self { + Self { + keys: None, + value: vec![], + tags: Some(vec![numaflow::shared::DROP.to_string()]), + id: String::new(), + headers: HashMap::new(), + event_time: chrono::Utc::now(), + watermark: chrono::Utc::now(), + } + } + + /// Create a Message from a Datum, preserving all metadata. + #[pyo3(signature = (datum, value=None, keys=None, tags=None))] + #[staticmethod] + fn from_datum( + datum: &Datum, + value: Option>, + keys: Option>, + tags: Option>, + ) -> Self { + Self { + keys: keys.or_else(|| Some(datum.keys.clone())), + value: value.unwrap_or_else(|| datum.value.clone()), + tags, + id: datum.id.clone(), + headers: datum.headers.clone(), + event_time: datum.event_time, + watermark: datum.watermark, + } + } +} + +impl From for accumulator::Message { + fn from(value: Message) -> Self { + // Create an AccumulatorRequest with all the fields from the Message + let request = accumulator::AccumulatorRequest { + keys: value.keys.clone().unwrap_or_default(), + value: value.value.clone(), + watermark: value.watermark, + event_time: value.event_time, + headers: value.headers.clone(), + id: value.id.clone(), + }; + + let mut msg = Self::from_accumulator_request(request); + + // Update with optional fields + if let Some(keys) = value.keys { + msg = msg.with_keys(keys); + } + msg = msg.with_value(value.value); + if let Some(tags) = value.tags { + msg = msg.with_tags(tags); + } + msg + } +} + +/// The incoming AccumulatorRequest accessible in Python function (streamed). +#[pyclass(module = "pynumaflow_lite.accumulator")] +pub struct Datum { + #[pyo3(get)] + pub keys: Vec, + #[pyo3(get)] + pub value: Vec, + #[pyo3(get)] + pub watermark: DateTime, + #[pyo3(get)] + pub event_time: DateTime, + #[pyo3(get)] + pub headers: HashMap, + #[pyo3(get)] + pub id: String, +} + +impl Datum { + fn new( + keys: Vec, + value: Vec, + watermark: DateTime, + event_time: DateTime, + headers: HashMap, + id: String, + ) -> Self { + Self { + keys, + value, + watermark, + event_time, + headers, + id, + } + } + + fn __repr__(&self) -> String { + format!( + "Datum(keys={:?}, value={:?}, watermark={}, event_time={}, headers={:?}, id={:?})", + self.keys, self.value, self.watermark, self.event_time, self.headers, self.id + ) + } + + fn __str__(&self) -> String { + format!( + "Datum(keys={:?}, value={:?}, watermark={}, event_time={}, headers={:?}, id={:?})", + self.keys, + String::from_utf8_lossy(&self.value), + self.watermark, + self.event_time, + self.headers, + self.id + ) + } +} + +impl From for Datum { + fn from(value: accumulator::AccumulatorRequest) -> Self { + Self::new( + value.keys, + value.value, + value.watermark, + value.event_time, + value.headers, + value.id, + ) + } +} + +/// Python-visible async iterator that yields Datum items from a Tokio mpsc channel. +/// This is a thin wrapper around the generic AsyncChannelStream implementation. +#[pyclass(module = "pynumaflow_lite.accumulator")] +pub struct PyAsyncDatumStream { + inner: crate::pyiterables::AsyncChannelStream, +} + +#[pymethods] +impl PyAsyncDatumStream { + #[new] + fn new() -> Self { + let (_tx, rx) = mpsc::channel::(1); + Self { + inner: crate::pyiterables::AsyncChannelStream::new(rx), + } + } + + fn __aiter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { + slf + } + + fn __anext__<'a>(&self, py: Python<'a>) -> PyResult> { + self.inner.py_anext(py) + } +} + +impl PyAsyncDatumStream { + pub fn new_with(rx: mpsc::Receiver) -> Self { + Self { + inner: crate::pyiterables::AsyncChannelStream::new(rx), + } + } +} + +/// Async Accumulator Server that can be started from Python code, taking a class (creator). +#[pyclass(module = "pynumaflow_lite.accumulator")] +pub struct AccumulatorAsyncServer { + sock_file: String, + info_file: String, + shutdown_tx: Mutex>>, +} + +#[pymethods] +impl AccumulatorAsyncServer { + #[new] + #[pyo3(signature = (sock_file="/var/run/numaflow/accumulator.sock".to_string(), info_file="/var/run/numaflow/accumulator-server-info".to_string()))] + fn new(sock_file: String, info_file: String) -> Self { + Self { + sock_file, + info_file, + shutdown_tx: Mutex::new(None), + } + } + + /// Start the server with the given Python class (creator). + /// - For class-based: pass the class and optionally init_args (tuple). + #[pyo3(signature = (py_creator, init_args=None))] + pub fn start<'a>( + &self, + py: Python<'a>, + py_creator: Py, + init_args: Option>, + ) -> PyResult> { + let sock_file = self.sock_file.clone(); + let info_file = self.info_file.clone(); + let (tx, rx) = tokio::sync::oneshot::channel::<()>(); + { + let mut guard = self.shutdown_tx.lock().unwrap(); + *guard = Some(tx); + } + + pyo3_async_runtimes::tokio::future_into_py(py, async move { + crate::accumulate::server::start(py_creator, init_args, sock_file, info_file, rx) + .await?; + Ok(()) + }) + } + + /// Trigger server shutdown from Python (idempotent). + pub fn stop(&self) -> PyResult<()> { + if let Some(tx) = self.shutdown_tx.lock().unwrap().take() { + let _ = tx.send(()); + } + Ok(()) + } +} + +/// Helper to populate a PyModule with accumulator types/functions. +pub(crate) fn populate_py_module(m: &Bound) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + Ok(()) +} diff --git a/packages/pynumaflow-lite/src/accumulate/server.rs b/packages/pynumaflow-lite/src/accumulate/server.rs new file mode 100644 index 00000000..842cb682 --- /dev/null +++ b/packages/pynumaflow-lite/src/accumulate/server.rs @@ -0,0 +1,162 @@ +use crate::accumulate::{Datum as PyDatum, Message as PyMessage, PyAsyncDatumStream}; +use crate::pyiterables::PyAsyncIterStream; +use numaflow::accumulator; +use numaflow::shared::ServerExtras; +use pyo3::prelude::*; +use pyo3::types::PyTuple; +use std::sync::Arc; +use tokio_stream::StreamExt; + +pub(crate) struct PyAccumulatorCreator { + /// handle to Python event loop + pub(crate) event_loop: Arc>, + /// Python class to instantiate per key + pub(crate) py_creator: Arc>, + /// optional tuple of positional args + pub(crate) init_args: Option>>, +} + +pub(crate) struct PyAccumulatorRunner { + /// handle to Python event loop + pub(crate) event_loop: Arc>, + /// Instance of class per key + pub(crate) py_instance: Arc>, +} + +impl accumulator::AccumulatorCreator for PyAccumulatorCreator { + type A = PyAccumulatorRunner; + + fn create(&self) -> Self::A { + // Instantiate the Python class synchronously under the GIL. + let inst = Python::attach(|py| { + let class = self.py_creator.as_ref(); + match &self.init_args { + Some(args) => { + let bound = args.as_ref().bind(py); + let py_tuple = bound.downcast::()?; + class.call1(py, py_tuple) + } + None => class.call0(py), + } + }) + .expect("failed to instantiate Python accumulator class"); + + PyAccumulatorRunner { + event_loop: self.event_loop.clone(), + py_instance: Arc::new(inst), + } + } +} + +#[tonic::async_trait] +impl accumulator::Accumulator for PyAccumulatorRunner { + async fn accumulate( + &self, + mut input: tokio::sync::mpsc::Receiver, + output: tokio::sync::mpsc::Sender, + ) { + // Create a channel to stream Datum into Python as an async iterator + let (tx, rx) = tokio::sync::mpsc::channel::(64); + + // Spawn a task forwarding incoming datums to the Python-facing channel + let forwarder = tokio::spawn(async move { + while let Some(req) = input.recv().await { + let datum = PyDatum::from(req); + if tx.send(datum).await.is_err() { + break; + } + } + // When input ends, dropping tx closes the channel and that is when + // the Python async iterable will stop. + }); + + // Call the Python coroutine: + // obj.handler(datums: AsyncIterable[Datum]) -> AsyncIterator[Message] + let agen_obj = Python::attach(|py| { + let obj = self.py_instance.clone(); + let stream = PyAsyncDatumStream::new_with(rx); + + // Call handler method + let agen = obj + .call_method1(py, "handler", (stream,)) + .expect("python handler method raised before returning async iterable"); + + // Keep as Py + agen.extract(py).unwrap_or(agen) + }); + + // Wrap the Python AsyncIterable in a Rust Stream that yields incrementally + let mut stream = PyAsyncIterStream::::new(agen_obj, self.event_loop.clone()) + .expect("failed to construct PyAsyncIterStream"); + + // Forward each yielded message immediately to the output sender + while let Some(item) = stream.next().await { + match item { + Ok(py_msg) => { + let out: accumulator::Message = py_msg.into(); + if output.send(out).await.is_err() { + break; + } + } + Err(e) => { + // Non-stop errors are surfaced per-item; log and stop this stream. + eprintln!("Python async iteration error in accumulator: {:?}", e); + break; + } + } + } + + // Ensure forwarder completes + let _ = forwarder.await; + } +} + +/// Start the accumulator server by spinning up a dedicated Python asyncio loop and wiring shutdown. +pub(super) async fn start( + py_creator: Py, + init_args: Option>, + sock_file: String, + info_file: String, + shutdown_rx: tokio::sync::oneshot::Receiver<()>, +) -> Result<(), pyo3::PyErr> { + let (tx, rx) = tokio::sync::oneshot::channel(); + let py_asyncio_loop_handle = tokio::task::spawn_blocking(move || crate::pyrs::run_asyncio(tx)); + let event_loop = rx.await.unwrap(); + + let (sig_handle, combined_rx) = crate::pyrs::setup_sig_handler(shutdown_rx); + + let creator = PyAccumulatorCreator { + event_loop: event_loop.clone(), + py_creator: Arc::new(py_creator), + init_args: init_args.map(Arc::new), + }; + + let server = accumulator::Server::new(creator) + .with_socket_file(sock_file) + .with_server_info_file(info_file); + + let result = server + .start_with_shutdown(combined_rx) + .await + .map_err(|e| pyo3::PyErr::new::(e.to_string())); + + // Ensure the event loop is stopped even if shutdown came from elsewhere. + Python::attach(|py| { + if let Ok(stop_cb) = event_loop.getattr(py, "stop") { + let _ = event_loop.call_method1(py, "call_soon_threadsafe", (stop_cb,)); + } + }); + + println!("Numaflow Core (accumulator) has shutdown..."); + + // Wait for the blocking asyncio thread to finish. + let _ = py_asyncio_loop_handle.await; + + // if not finished, abort it + if !sig_handle.is_finished() { + println!("Aborting signal handler"); + let _ = sig_handle.abort(); + } + + result +} diff --git a/packages/pynumaflow-lite/src/batchmap/mod.rs b/packages/pynumaflow-lite/src/batchmap/mod.rs new file mode 100644 index 00000000..bab1a3f4 --- /dev/null +++ b/packages/pynumaflow-lite/src/batchmap/mod.rs @@ -0,0 +1,297 @@ +use std::collections::HashMap; + +use numaflow::batchmap; + +use chrono::{DateTime, Utc}; + +/// BatchMap interface managed by Python. Python code will start the server +/// and can pass in the Python coroutine. +pub mod server; + +use tokio::sync::mpsc; + +use pyo3::prelude::*; +use std::sync::Mutex; + +/// A message to be sent for a single datum in batch response. +#[pyclass(module = "pynumaflow_lite.batchmapper")] +#[derive(Clone, Default, Debug)] +pub struct Message { + /// Keys are a collection of strings which will be passed on to the next vertex as is. It can + /// be an empty collection. + pub keys: Option>, + /// Value is the value passed to the next vertex. + pub value: Vec, + /// Tags are used for conditional forwarding. + pub tags: Option>, +} + +#[pymethods] +impl Message { + /// Create a new [Message] with the given value, keys, and tags. + #[new] + #[pyo3(signature = (value: "bytes", keys: "list[str] | None"=None, tags: "list[str] | None"=None) -> "Message" + )] + fn new(value: Vec, keys: Option>, tags: Option>) -> Self { + Self { keys, value, tags } + } + + /// Drop a [Message], do not forward to the next vertex. + #[pyo3(signature = ())] + #[staticmethod] + fn message_to_drop() -> Self { + Self { + keys: None, + value: vec![], + tags: Some(vec![numaflow::shared::DROP.to_string()]), + } + } +} + +impl From for batchmap::Message { + fn from(value: Message) -> Self { + Self { + keys: value.keys, + value: value.value, + tags: value.tags, + } + } +} + +/// The incoming Datum for BatchMap +#[pyclass(module = "pynumaflow_lite.batchmapper")] +pub struct Datum { + /// Set of keys in the (key, value) terminology of map/reduce paradigm. + #[pyo3(get)] + pub keys: Vec, + /// The value in the (key, value) terminology of map/reduce paradigm. + #[pyo3(get)] + pub value: Vec, + /// watermark represented by time is a guarantee that we will not see an element older than this time. + #[pyo3(get)] + pub watermark: DateTime, + /// Time of the element as seen at source or aligned after a reduce operation. + #[pyo3(get)] + pub eventtime: DateTime, + /// ID is the unique id of the message + #[pyo3(get)] + pub id: String, + /// Headers for the message. + #[pyo3(get)] + pub headers: HashMap, +} + +impl Datum { + fn new( + keys: Vec, + value: Vec, + watermark: DateTime, + eventtime: DateTime, + id: String, + headers: HashMap, + ) -> Self { + Self { + keys, + value, + watermark, + eventtime, + id, + headers, + } + } + + fn __repr__(&self) -> String { + format!( + "Datum(keys={:?}, value={:?}, watermark={}, eventtime={}, id={}, headers={:?})", + self.keys, self.value, self.watermark, self.eventtime, self.id, self.headers + ) + } + + fn __str__(&self) -> String { + format!( + "Datum(keys={:?}, value={:?}, watermark={}, eventtime={}, id={}, headers={:?})", + self.keys, + String::from_utf8_lossy(&self.value), + self.watermark, + self.eventtime, + self.id, + self.headers + ) + } +} + +impl From for Datum { + fn from(value: batchmap::Datum) -> Self { + Datum::new( + value.keys, + value.value, + value.watermark, + value.event_time, + value.id, + value.headers, + ) + } +} + +/// BatchResponse mirrors numaflow::batchmap::BatchResponse for Python +#[pyclass(module = "pynumaflow_lite.batchmapper")] +#[derive(Clone, Debug)] +pub struct BatchResponse { + #[pyo3(get)] + pub id: String, + pub messages: Vec, +} + +#[pymethods] +impl BatchResponse { + #[new] + #[pyo3(signature = (id: "str") -> "BatchResponse")] + fn new(id: String) -> Self { + Self { + id, + messages: Vec::new(), + } + } + + #[staticmethod] + #[pyo3(signature = (id: "str") -> "BatchResponse")] + fn from_id(id: String) -> Self { + Self { + id, + messages: Vec::new(), + } + } + + #[pyo3(signature = (message))] + fn append(&mut self, message: Message) { + self.messages.push(message); + } +} + +/// A collection of BatchResponse objects for a batch. +#[pyclass(module = "pynumaflow_lite.batchmapper")] +#[derive(Clone, Debug)] +pub struct BatchResponses { + pub(crate) responses: Vec, +} + +#[pymethods] +impl BatchResponses { + #[new] + #[pyo3(signature = () -> "BatchResponses")] + fn new() -> Self { + Self { responses: vec![] } + } + + /// Append a BatchResponse to the collection. + #[pyo3(signature = (response: "BatchResponse"))] + fn append(&mut self, response: BatchResponse) { + self.responses.push(response); + } +} + +impl From for batchmap::BatchResponse { + fn from(value: BatchResponse) -> Self { + let mut resp = batchmap::BatchResponse::from_id(value.id); + for m in value.messages.into_iter() { + resp.append(m.into()); + } + resp + } +} + +/// Python-visible async iterator that yields Datum items from a Tokio mpsc channel. +/// This is a thin wrapper around the generic AsyncChannelStream implementation. +#[pyclass(module = "pynumaflow_lite.batchmapper")] +pub struct PyAsyncDatumStream { + inner: crate::pyiterables::AsyncChannelStream, +} + +#[pymethods] +impl PyAsyncDatumStream { + #[new] + fn new() -> Self { + let (_tx, rx) = mpsc::channel::(1); + Self { + inner: crate::pyiterables::AsyncChannelStream::new(rx), + } + } + + fn __aiter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { + slf + } + + fn __anext__<'a>(&self, py: Python<'a>) -> PyResult> { + self.inner.py_anext(py) + } +} + +impl PyAsyncDatumStream { + pub fn new_with(rx: mpsc::Receiver) -> Self { + Self { + inner: crate::pyiterables::AsyncChannelStream::new(rx), + } + } +} + +/// Async Batch Map Server that can be started from Python code +#[pyclass(module = "pynumaflow_lite.batchmapper")] +pub struct BatchMapAsyncServer { + sock_file: String, + info_file: String, + shutdown_tx: Mutex>>, +} + +#[pymethods] +impl BatchMapAsyncServer { + #[new] + #[pyo3(signature = (sock_file: "str | None"=batchmap::SOCK_ADDR.to_string(), info_file: "str | None"=batchmap::SERVER_INFO_FILE.to_string()) -> "BatchMapAsyncServer" + )] + fn new(sock_file: String, info_file: String) -> Self { + Self { + sock_file, + info_file, + shutdown_tx: Mutex::new(None), + } + } + + #[pyo3(signature = (py_func: "callable") -> "None")] + pub fn start<'a>(&self, py: Python<'a>, py_func: Py) -> PyResult> { + let sock_file = self.sock_file.clone(); + let info_file = self.info_file.clone(); + let (tx, rx) = tokio::sync::oneshot::channel::<()>(); + { + let mut guard = self.shutdown_tx.lock().unwrap(); + *guard = Some(tx); + } + + pyo3_async_runtimes::tokio::future_into_py(py, async move { + // batch server uses the same runner loop and shutdown composition for now + // dedicated start is wired below + crate::batchmap::server::start(py_func, sock_file, info_file, rx) + .await + .expect("server failed to start"); + Ok(()) + }) + } + + #[pyo3(signature = () -> "None")] + pub fn stop(&self) -> PyResult<()> { + if let Some(tx) = self.shutdown_tx.lock().unwrap().take() { + let _ = tx.send(()); + } + Ok(()) + } +} + +/// Helper to populate a PyModule with batch map types/functions. +pub(crate) fn populate_py_module(m: &Bound) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + + Ok(()) +} diff --git a/packages/pynumaflow-lite/src/batchmap/server.rs b/packages/pynumaflow-lite/src/batchmap/server.rs new file mode 100644 index 00000000..65a5d879 --- /dev/null +++ b/packages/pynumaflow-lite/src/batchmap/server.rs @@ -0,0 +1,106 @@ +// use crate::batchmap::Datum; +use numaflow::batchmap; +use numaflow::shared::ServerExtras; + +use pyo3::prelude::*; +use std::sync::Arc; + +pub(crate) struct PyBatchMapRunner { + pub(crate) event_loop: Arc>, + pub(crate) py_func: Arc>, +} + +#[tonic::async_trait] +impl batchmap::BatchMapper for PyBatchMapRunner { + async fn batchmap( + &self, + mut input: tokio::sync::mpsc::Receiver, + ) -> Vec { + // Create a channel to stream Datum into Python as an async iterator + let (tx, rx) = tokio::sync::mpsc::channel::(64); + + // Spawn a task forwarding incoming datums to the Python-facing channel + let forwarder = tokio::spawn(async move { + while let Some(d) = input.recv().await { + if tx.send(d.into()).await.is_err() { + break; + } + } + // When input ends, dropping tx closes the channel + }); + + // Call the Python coroutine: py_func(batch: AsyncIterable[Datum]) -> BatchResponses + let fut = Python::attach(|py| { + let locals = pyo3_async_runtimes::TaskLocals::new(self.event_loop.bind(py).clone()); + let py_func = self.py_func.clone(); + + let stream = crate::batchmap::PyAsyncDatumStream::new_with(rx); + let coro = py_func.call1(py, (stream,)).unwrap().into_bound(py); + pyo3_async_runtimes::into_future_with_locals(&locals, coro).unwrap() + }); + + let result = fut.await.unwrap(); + + // Ensure forwarder completes + let _ = forwarder.await; + + let responses = Python::attach(|py| { + let x: crate::batchmap::BatchResponses = result.extract(py).unwrap(); + x + }); + + responses + .responses + .into_iter() + .map(|resp| resp.into()) + .collect::>() + } +} + +// Start the batchmap server by spinning up a dedicated Python asyncio loop and wiring shutdown. +pub(super) async fn start( + py_func: Py, + sock_file: String, + info_file: String, + shutdown_rx: tokio::sync::oneshot::Receiver<()>, +) -> Result<(), pyo3::PyErr> { + let (tx, rx) = tokio::sync::oneshot::channel(); + let py_asyncio_loop_handle = tokio::task::spawn_blocking(move || crate::pyrs::run_asyncio(tx)); + let event_loop = rx.await.unwrap(); + + let (sig_handle, combined_rx) = crate::pyrs::setup_sig_handler(shutdown_rx); + + let py_runner = PyBatchMapRunner { + py_func: Arc::new(py_func), + event_loop: event_loop.clone(), + }; + + let server = numaflow::batchmap::Server::new(py_runner) + .with_socket_file(sock_file) + .with_server_info_file(info_file); + + let result = server + .start_with_shutdown(combined_rx) + .await + .map_err(|e| pyo3::PyErr::new::(e.to_string())); + + // Ensure the event loop is stopped even if shutdown came from elsewhere. + Python::attach(|py| { + if let Ok(stop_cb) = event_loop.getattr(py, "stop") { + let _ = event_loop.call_method1(py, "call_soon_threadsafe", (stop_cb,)); + } + }); + + println!("Numaflow Core (batch) has shutdown..."); + + // Wait for the blocking asyncio thread to finish. + let _ = py_asyncio_loop_handle.await; + + // if not finished, abort it + if !sig_handle.is_finished() { + println!("Aborting signal handler"); + let _ = sig_handle.abort(); + } + + result +} diff --git a/packages/pynumaflow-lite/src/lib.rs b/packages/pynumaflow-lite/src/lib.rs new file mode 100644 index 00000000..89339eb5 --- /dev/null +++ b/packages/pynumaflow-lite/src/lib.rs @@ -0,0 +1,120 @@ +pub mod accumulate; +pub mod batchmap; +pub mod map; +pub mod mapstream; +pub mod pyiterables; +pub mod pyrs; +pub mod reduce; +pub mod session_reduce; + +use pyo3::prelude::*; + +/// Submodule: pynumaflow_lite.mapper +#[pymodule] +fn mapper(_py: Python, m: &Bound) -> PyResult<()> { + crate::map::populate_py_module(m)?; + Ok(()) +} + +/// Submodule: pynumaflow_lite.batchmapper +#[pymodule] +fn batchmapper(_py: Python, m: &Bound) -> PyResult<()> { + crate::batchmap::populate_py_module(m)?; + Ok(()) +} + +/// Submodule: pynumaflow_lite.mapstreamer +#[pymodule] +fn mapstreamer(_py: Python, m: &Bound) -> PyResult<()> { + crate::mapstream::populate_py_module(m)?; + Ok(()) +} + +/// Submodule: pynumaflow_lite.reducer +#[pymodule] +fn reducer(_py: Python, m: &Bound) -> PyResult<()> { + crate::reduce::populate_py_module(m)?; + Ok(()) +} + +/// Submodule: pynumaflow_lite.session_reducer +#[pymodule] +fn session_reducer(_py: Python, m: &Bound) -> PyResult<()> { + crate::session_reduce::populate_py_module(m)?; + Ok(()) +} + +/// Submodule: pynumaflow_lite.accumulator +#[pymodule] +fn accumulator(_py: Python, m: &Bound) -> PyResult<()> { + crate::accumulate::populate_py_module(m)?; + Ok(()) +} + +/// Top-level Python module `pynumaflow_lite` with submodules like `mapper`, `batchmapper`, and `mapstreamer`. +#[pymodule] +fn pynumaflow_lite(py: Python, m: &Bound) -> PyResult<()> { + // Register the submodules via wrap_pymodule! + m.add_wrapped(pyo3::wrap_pymodule!(mapper))?; + m.add_wrapped(pyo3::wrap_pymodule!(batchmapper))?; + m.add_wrapped(pyo3::wrap_pymodule!(mapstreamer))?; + m.add_wrapped(pyo3::wrap_pymodule!(reducer))?; + m.add_wrapped(pyo3::wrap_pymodule!(session_reducer))?; + m.add_wrapped(pyo3::wrap_pymodule!(accumulator))?; + + // Ensure it's importable as `pynumaflow_lite.mapper` as well as attribute access + let binding = m.getattr("mapper")?; + let sub = binding.downcast::()?; + let fullname = "pynumaflow_lite.mapper"; + sub.setattr("__name__", fullname)?; + py.import("sys")? + .getattr("modules")? + .set_item(fullname, &sub)?; + + // Ensure it's importable as `pynumaflow_lite.batchmapper` as well + let binding = m.getattr("batchmapper")?; + let sub = binding.downcast::()?; + let fullname = "pynumaflow_lite.batchmapper"; + sub.setattr("__name__", fullname)?; + py.import("sys")? + .getattr("modules")? + .set_item(fullname, &sub)?; + + // Ensure it's importable as `pynumaflow_lite.mapstreamer` as well + let binding = m.getattr("mapstreamer")?; + let sub = binding.downcast::()?; + let fullname = "pynumaflow_lite.mapstreamer"; + sub.setattr("__name__", fullname)?; + py.import("sys")? + .getattr("modules")? + .set_item(fullname, &sub)?; + + // Ensure it's importable as `pynumaflow_lite.reducer` as well + let binding = m.getattr("reducer")?; + let sub = binding.downcast::()?; + let fullname = "pynumaflow_lite.reducer"; + sub.setattr("__name__", fullname)?; + py.import("sys")? + .getattr("modules")? + .set_item(fullname, &sub)?; + + // Ensure it's importable as `pynumaflow_lite.session_reducer` as well + let binding = m.getattr("session_reducer")?; + let sub = binding.downcast::()?; + let fullname = "pynumaflow_lite.session_reducer"; + sub.setattr("__name__", fullname)?; + py.import("sys")? + .getattr("modules")? + .set_item(fullname, &sub)?; + + // Ensure it's importable as `pynumaflow_lite.accumulator` as well + let binding = m.getattr("accumulator")?; + let sub = binding.downcast::()?; + let fullname = "pynumaflow_lite.accumulator"; + sub.setattr("__name__", fullname)?; + py.import("sys")? + .getattr("modules")? + .set_item(fullname, &sub)?; + + Ok(()) +} diff --git a/packages/pynumaflow-lite/src/map/mod.rs b/packages/pynumaflow-lite/src/map/mod.rs new file mode 100644 index 00000000..b40d19c8 --- /dev/null +++ b/packages/pynumaflow-lite/src/map/mod.rs @@ -0,0 +1,216 @@ +use std::collections::HashMap; + +use numaflow::map; + +use chrono::{DateTime, Utc}; + +/// Map interface managed by Python. It means Python code will start the server +/// and can pass in the Python function. +pub mod server; + +use pyo3::prelude::*; +use std::sync::Mutex; + +/// A collection of [Message]s. +#[pyclass(module = "pynumaflow_lite.mapper")] +#[derive(Clone, Debug)] +pub struct Messages { + pub(crate) messages: Vec, +} + +#[pymethods] +impl Messages { + #[new] + #[pyo3(signature = () -> "Messages")] + fn new() -> Self { + Self { messages: vec![] } + } + + /// Append a [Message] to the collection. + #[pyo3(signature = (message: "Message"))] + fn append(&mut self, message: Message) { + self.messages.push(message); + } + + fn __repr__(&self) -> String { + format!("Messages({:?})", self.messages) + } + + fn __str__(&self) -> String { + format!("Messages({:?})", self.messages) + } +} + +/// A message to be sent to the next vertex. +#[pyclass(module = "pynumaflow_lite.mapper")] +#[derive(Clone, Default, Debug)] +pub struct Message { + /// Keys are a collection of strings which will be passed on to the next vertex as is. It can + /// be an empty collection. + pub keys: Option>, + /// Value is the value passed to the next vertex. + pub value: Vec, + /// Tags are used for [conditional forwarding](https://numaflow.numaproj.io/user-guide/reference/conditional-forwarding/). + pub tags: Option>, +} + +#[pymethods] +impl Message { + /// Create a new [Message] with the given value, keys, and tags. + #[new] + #[pyo3(signature = (value: "bytes", keys: "list[str] | None"=None, tags: "list[str] | None"=None) -> "Message" + )] + fn new(value: Vec, keys: Option>, tags: Option>) -> Self { + Self { keys, value, tags } + } + + /// Drop a [Message], do not forward to the next vertex. + #[pyo3(signature = ())] + #[staticmethod] + fn message_to_drop() -> Self { + Self { + keys: None, + value: vec![], + tags: Some(vec![numaflow::shared::DROP.to_string()]), + } + } +} + +impl From for map::Message { + fn from(value: Message) -> Self { + Self { + keys: value.keys, + value: value.value, + tags: value.tags, + } + } +} + +/// The incoming [MapRequest] accessible in Python function. +#[pyclass(module = "pynumaflow_lite.mapper")] +pub struct Datum { + /// Set of keys in the (key, value) terminology of map/reduce paradigm. + #[pyo3(get)] + pub keys: Vec, + /// The value in the (key, value) terminology of map/reduce paradigm. + #[pyo3(get)] + pub value: Vec, + /// [watermark](https://numaflow.numaproj.io/core-concepts/watermarks/) represented by time is a + /// guarantee that we will not see an element older than this time. + #[pyo3(get)] + pub watermark: DateTime, + /// Time of the element as seen at source or aligned after a reduce operation. + #[pyo3(get)] + pub eventtime: DateTime, + /// Headers for the message. + #[pyo3(get)] + pub headers: HashMap, +} + +impl Datum { + fn new( + keys: Vec, + value: Vec, + watermark: DateTime, + eventtime: DateTime, + headers: HashMap, + ) -> Self { + Self { + keys, + value, + watermark, + eventtime, + headers, + } + } + + fn __repr__(&self) -> String { + format!( + "Datum(keys={:?}, value={:?}, watermark={}, eventtime={}, headers={:?})", + self.keys, self.value, self.watermark, self.eventtime, self.headers + ) + } + + fn __str__(&self) -> String { + format!( + "Datum(keys={:?}, value={:?}, watermark={}, eventtime={}, headers={:?})", + self.keys, + String::from_utf8_lossy(&self.value), + self.watermark, + self.eventtime, + self.headers + ) + } +} + +impl From for Datum { + fn from(value: map::MapRequest) -> Self { + Datum::new( + value.keys, + value.value, + value.watermark, + value.eventtime, + value.headers, + ) + } +} + +/// Async Map Server that can be started from Python code which will run the Python UDF function. +#[pyclass(module = "pynumaflow_lite.mapper")] +pub struct MapAsyncServer { + sock_file: String, + info_file: String, + shutdown_tx: Mutex>>, +} + +#[pymethods] +impl MapAsyncServer { + #[new] + #[pyo3(signature = (sock_file: "str | None"=map::SOCK_ADDR.to_string(), info_file: "str | None"=map::SERVER_INFO_FILE.to_string()) -> "MapAsyncServer" + )] + fn new(sock_file: String, info_file: String) -> Self { + Self { + sock_file, + info_file, + shutdown_tx: Mutex::new(None), + } + } + + /// Start the server with the given Python function. + #[pyo3(signature = (py_func: "callable") -> "None")] + pub fn start<'a>(&self, py: Python<'a>, py_func: Py) -> PyResult> { + let sock_file = self.sock_file.clone(); + let info_file = self.info_file.clone(); + let (tx, rx) = tokio::sync::oneshot::channel::<()>(); + { + let mut guard = self.shutdown_tx.lock().unwrap(); + *guard = Some(tx); + } + + pyo3_async_runtimes::tokio::future_into_py(py, async move { + crate::map::server::start(py_func, sock_file, info_file, rx) + .await + .expect("server failed to start"); + Ok(()) + }) + } + + /// Trigger server shutdown from Python (idempotent). + #[pyo3(signature = () -> "None")] + pub fn stop(&self) -> PyResult<()> { + if let Some(tx) = self.shutdown_tx.lock().unwrap().take() { + let _ = tx.send(()); + } + Ok(()) + } +} + +/// Helper to populate a PyModule with map types/functions. +pub(crate) fn populate_py_module(m: &Bound) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + + Ok(()) +} diff --git a/packages/pynumaflow-lite/src/map/server.rs b/packages/pynumaflow-lite/src/map/server.rs new file mode 100644 index 00000000..8817e6ff --- /dev/null +++ b/packages/pynumaflow-lite/src/map/server.rs @@ -0,0 +1,87 @@ +use crate::map::{Datum, Messages}; +use numaflow::map; +use numaflow::shared::ServerExtras; + +use pyo3::prelude::*; +use std::sync::Arc; + +pub(crate) struct PyMapRunner { + pub(crate) event_loop: Arc>, + pub(crate) py_func: Arc>, +} + +#[tonic::async_trait] +impl map::Mapper for PyMapRunner { + async fn map(&self, input: map::MapRequest) -> Vec { + let fut = Python::attach(|py| { + let keys = input.keys.clone(); + let input: Datum = input.into(); + let py_func = self.py_func.clone(); + + let locals = pyo3_async_runtimes::TaskLocals::new(self.event_loop.bind(py).clone()); + + let coro = py_func.call1(py, (keys, input)).unwrap().into_bound(py); + + pyo3_async_runtimes::into_future_with_locals(&locals, coro).unwrap() + }); + + let result = fut.await.unwrap(); + + let result = Python::attach(|py| { + let x: Messages = result.extract(py).unwrap(); + x + }); + + println!("{:?}", result); + + result.messages.into_iter().map(|m| m.into()).collect() + } +} + +// Start the map server by spinning up a dedicated Python asyncio loop and wiring shutdown. +pub(super) async fn start( + py_func: Py, + sock_file: String, + info_file: String, + shutdown_rx: tokio::sync::oneshot::Receiver<()>, +) -> Result<(), pyo3::PyErr> { + let (tx, rx) = tokio::sync::oneshot::channel(); + let py_asyncio_loop_handle = tokio::task::spawn_blocking(move || crate::pyrs::run_asyncio(tx)); + let event_loop = rx.await.unwrap(); + + let (sig_handle, combined_rx) = crate::pyrs::setup_sig_handler(shutdown_rx); + + let py_map_runner = PyMapRunner { + py_func: Arc::new(py_func), + event_loop: event_loop.clone(), + }; + + let server = numaflow::map::Server::new(py_map_runner) + .with_socket_file(sock_file) + .with_server_info_file(info_file); + + let result = server + .start_with_shutdown(combined_rx) + .await + .map_err(|e| pyo3::PyErr::new::(e.to_string())); + + // Ensure the event loop is stopped even if shutdown came from elsewhere. + Python::attach(|py| { + if let Ok(stop_cb) = event_loop.getattr(py, "stop") { + let _ = event_loop.call_method1(py, "call_soon_threadsafe", (stop_cb,)); + } + }); + + println!("Numaflow Core has shutdown..."); + + // Wait for the blocking asyncio thread to finish. + let _ = py_asyncio_loop_handle.await; + + // if not finished, abort it + if !sig_handle.is_finished() { + println!("Aborting signal handler"); + let _ = sig_handle.abort(); + } + + result +} diff --git a/packages/pynumaflow-lite/src/mapstream/mod.rs b/packages/pynumaflow-lite/src/mapstream/mod.rs new file mode 100644 index 00000000..35566f9a --- /dev/null +++ b/packages/pynumaflow-lite/src/mapstream/mod.rs @@ -0,0 +1,169 @@ +use chrono::{DateTime, Utc}; +use numaflow::mapstream; +use std::collections::HashMap; +use std::sync::Mutex; + +pub mod server; + +/// Types for streaming handler +use pyo3::prelude::*; + +/// Streaming Datum mirrors MapStreamRequest for Python +#[pyclass(module = "pynumaflow_lite.mapstreamer")] +#[derive(Clone)] +pub struct Datum { + /// Set of keys in the (key, value) terminology of the map/reduce paradigm. + #[pyo3(get)] + pub keys: Vec, + /// The value in the (key, value) terminology of the map/reduce paradigm. + #[pyo3(get)] + pub value: Vec, + /// [Watermark](https://numaflow.numaproj.io/core-concepts/watermarks/) represented by time is a + /// guarantee that we will not see an element older than this time. + #[pyo3(get)] + pub watermark: DateTime, + /// Time of the element as seen at source or aligned after a reduce operation. + #[pyo3(get)] + pub eventtime: DateTime, + /// Headers associated with the message. + #[pyo3(get)] + pub headers: HashMap, +} + +impl Datum { + pub(crate) fn new( + keys: Vec, + value: Vec, + watermark: DateTime, + eventtime: DateTime, + headers: HashMap, + ) -> Self { + Self { + keys, + value, + watermark, + eventtime, + headers, + } + } +} + +impl From for Datum { + fn from(value: numaflow::mapstream::MapStreamRequest) -> Self { + Self::new( + value.keys, + value.value, + value.watermark, + value.eventtime, + value.headers, + ) + } +} + +/// A message to be sent downstream from a streaming handler. +#[pyclass(module = "pynumaflow_lite.mapstreamer")] +#[derive(Clone, Default, Debug)] +pub struct Message { + /// Keys are a collection of strings which will be passed on to the next vertex as is. + pub keys: Option>, + /// Value is the value passed to the next vertex. + pub value: Vec, + /// Tags are used for conditional forwarding. + pub tags: Option>, +} + +#[pymethods] +impl Message { + /// Create a new Message with the given value, keys, and tags. + #[new] + #[pyo3(signature = (value: "bytes", keys: "list[str] | None"=None, tags: "list[str] | None"=None) -> "Message")] + fn new(value: Vec, keys: Option>, tags: Option>) -> Self { + Self { keys, value, tags } + } + + /// Drop a Message, do not forward to the next vertex. + #[pyo3(signature = ())] + #[staticmethod] + fn message_to_drop() -> Self { + Self { + keys: None, + value: vec![], + tags: Some(vec![numaflow::shared::DROP.to_string()]), + } + } + + /// Convenience alias to match example usage: Message.to_drop() + #[pyo3(signature = ())] + #[staticmethod] + fn to_drop() -> Self { + Self::message_to_drop() + } +} + +impl From for mapstream::Message { + fn from(value: Message) -> Self { + Self { + keys: value.keys, + value: value.value, + tags: value.tags, + } + } +} + +/// Async MapStream Server that can be started from Python code which will run the Python UDF async generator. +#[pyclass(module = "pynumaflow_lite.mapstreamer")] +pub struct MapStreamAsyncServer { + sock_file: String, + info_file: String, + shutdown_tx: Mutex>>, +} + +#[pymethods] +impl MapStreamAsyncServer { + #[new] + #[pyo3(signature = (sock_file: "str | None"=mapstream::SOCK_ADDR.to_string(), info_file: "str | None"=mapstream::SERVER_INFO_FILE.to_string()) -> "MapStreamAsyncServer" + )] + fn new(sock_file: String, info_file: String) -> Self { + Self { + sock_file, + info_file, + shutdown_tx: Mutex::new(None), + } + } + + /// Start the server with the given Python async generator function. + #[pyo3(signature = (py_func: "callable") -> "None")] + pub fn start<'a>(&self, py: Python<'a>, py_func: Py) -> PyResult> { + let sock_file = self.sock_file.clone(); + let info_file = self.info_file.clone(); + let (tx, rx) = tokio::sync::oneshot::channel::<()>(); + { + let mut guard = self.shutdown_tx.lock().unwrap(); + *guard = Some(tx); + } + + pyo3_async_runtimes::tokio::future_into_py(py, async move { + crate::mapstream::server::start(py_func, sock_file, info_file, rx) + .await + .expect("server failed to start"); + Ok(()) + }) + } + + /// Trigger server shutdown from Python (idempotent). + #[pyo3(signature = () -> "None")] + pub fn stop(&self) -> PyResult<()> { + if let Some(tx) = self.shutdown_tx.lock().unwrap().take() { + let _ = tx.send(()); + } + Ok(()) + } +} + +/// Helper to populate a PyModule with mapstream types/functions. +pub(crate) fn populate_py_module(m: &Bound) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + Ok(()) +} diff --git a/packages/pynumaflow-lite/src/mapstream/server.rs b/packages/pynumaflow-lite/src/mapstream/server.rs new file mode 100644 index 00000000..19f25cab --- /dev/null +++ b/packages/pynumaflow-lite/src/mapstream/server.rs @@ -0,0 +1,102 @@ +use crate::mapstream::Datum; +use crate::mapstream::Message as PyMessage; +use crate::pyiterables::PyAsyncIterStream; + +use numaflow::mapstream; +use numaflow::shared::ServerExtras; + +use pyo3::prelude::*; +use std::sync::Arc; +use tokio::sync::mpsc::Sender; +use tokio_stream::StreamExt; + +pub(crate) struct PyMapStreamRunner { + pub(crate) event_loop: Arc>, + pub(crate) py_func: Arc>, +} + +#[tonic::async_trait] +impl mapstream::MapStreamer for PyMapStreamRunner { + async fn map_stream(&self, input: mapstream::MapStreamRequest, tx: Sender) { + // Call Python handler: handler(keys, datum) -> AsyncIterator + let agen_obj = Python::attach(|py| { + let keys = input.keys.clone(); + let datum: Datum = input.into(); + let py_func = self.py_func.clone(); + let agen = py_func + .call1(py, (keys, datum)) + .expect("python handler raised before returning async iterable"); + // Keep as Py + agen.extract(py).unwrap_or(agen) + }); + + // Wrap the Python AsyncIterable in a Rust Stream that yields incrementally + let mut stream = PyAsyncIterStream::::new(agen_obj, self.event_loop.clone()) + .expect("failed to construct PyAsyncIterStream"); + + // Forward each yielded message immediately to the sender + while let Some(item) = stream.next().await { + match item { + Ok(py_msg) => { + let out: mapstream::Message = py_msg.into(); + if tx.send(out).await.is_err() { + break; + } + } + Err(e) => { + // Non-stop errors are surfaced per-item; log and stop this stream. + eprintln!("Python async iteration error: {:?}", e); + break; + } + } + } + } +} + +/// Start the mapstream server by spinning up a dedicated Python asyncio loop and wiring shutdown. +pub(super) async fn start( + py_func: Py, + sock_file: String, + info_file: String, + shutdown_rx: tokio::sync::oneshot::Receiver<()>, +) -> Result<(), pyo3::PyErr> { + let (tx, rx) = tokio::sync::oneshot::channel(); + let py_asyncio_loop_handle = tokio::task::spawn_blocking(move || crate::pyrs::run_asyncio(tx)); + let event_loop = rx.await.unwrap(); + + let (sig_handle, combined_rx) = crate::pyrs::setup_sig_handler(shutdown_rx); + + let py_runner = PyMapStreamRunner { + py_func: Arc::new(py_func), + event_loop: event_loop.clone(), + }; + + let server = numaflow::mapstream::Server::new(py_runner) + .with_socket_file(sock_file) + .with_server_info_file(info_file); + + let result = server + .start_with_shutdown(combined_rx) + .await + .map_err(|e| pyo3::PyErr::new::(e.to_string())); + + // Ensure the event loop is stopped even if shutdown came from elsewhere. + Python::attach(|py| { + if let Ok(stop_cb) = event_loop.getattr(py, "stop") { + let _ = event_loop.call_method1(py, "call_soon_threadsafe", (stop_cb,)); + } + }); + + println!("Numaflow Core (stream) has shutdown..."); + + // Wait for the blocking asyncio thread to finish. + let _ = py_asyncio_loop_handle.await; + + // if not finished, abort it + if !sig_handle.is_finished() { + println!("Aborting signal handler"); + let _ = sig_handle.abort(); + } + + result +} diff --git a/packages/pynumaflow-lite/src/pyiterables.rs b/packages/pynumaflow-lite/src/pyiterables.rs new file mode 100644 index 00000000..197b0e28 --- /dev/null +++ b/packages/pynumaflow-lite/src/pyiterables.rs @@ -0,0 +1,631 @@ +//! Convert Python AsyncIterator into Rust Stream and Rust channel into Python AsyncIterator. + +use std::future::Future; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; + +use futures_core::Stream; +use pin_project::pin_project; +use pyo3::{PyClass, exceptions::PyStopAsyncIteration, prelude::*}; +use tokio::sync::Mutex as AsyncMutex; +use tokio::sync::mpsc; + +/// Stream over a Python AsyncIterator, yielding `M` as soon as each value is produced. +/// `M` must be extractable from the Python object. +/// +/// Keep a handle to the target asyncio event loop and await each `__anext__` using +/// pyo3-async-runtimes so items are yielded incrementally without buffering. +#[pin_project] +pub struct PyAsyncIterStream { + // The Python object that is the async-iterator (result of __aiter__()). + aiter: Py, + // The asyncio event loop this stream should use. + event_loop: Arc>, + // In-flight future for the next item (converted from Python awaitable). + #[pin] + next_fut: Option>> + Send + 'static>>>, + // Phantom: we yield M + _marker: std::marker::PhantomData, +} + +impl PyAsyncIterStream +where + M: for<'py> FromPyObject<'py> + Send + 'static, +{ + /// Given a Python AsyncIterator and the event loop, build a stream over its items. + /// It calls `__aiter__` on the `async_iterable` to get the iterator. + pub fn new(async_iterable: Py, event_loop: Arc>) -> PyResult { + let aiter = Python::attach(|py| async_iterable.call_method0(py, "__aiter__"))?; + Ok(Self { + aiter, + event_loop, + next_fut: None, + _marker: std::marker::PhantomData, + }) + } +} + +impl Stream for PyAsyncIterStream +where + M: for<'py> FromPyObject<'py> + Send + 'static, +{ + type Item = PyResult; + + /// Polls the next item from the Python AsyncIterable. + /// + /// Overview + /// - Lazily creates a new Python `__anext__()` awaitable when there is no in-flight + /// future and binds it to the target asyncio event loop via pyo3-async-runtimes. + /// - Drives that awaitable as a Rust `Future`, mapping results to the `Stream` API: + /// - On success, extracts the yielded Python object into `M` and returns + /// `Poll::Ready(Some(Ok(M)))`. + /// - On `StopAsyncIteration`, returns `Poll::Ready(None)` to signal end of stream. + /// - On any other Python exception, returns `Poll::Ready(Some(Err(PyErr)))`. + /// + /// State machine + /// - `next_fut: Option` holds the in-flight future for the next element. + /// - When `next_fut` is `None`, we create a new future by calling `__anext__()` + /// on the Python async iterator, then converting that awaitable into a Rust + /// future using `into_future_with_locals()` bound to the saved event loop. + /// - When the future resolves (Ready), we immediately clear `next_fut` so that the + /// subsequent poll will create a new future for the next item (or end on stop). + /// + /// Event loop / threading + /// - We do not run Python on the current thread's event loop. Instead, we keep an + /// owned reference to the target loop (`self.event_loop`) and use + /// `TaskLocals::new(loop)` so the `__anext__` awaitable runs on that loop. + /// - All Python API interactions are performed via `Python::attach(|py| ...)`, + /// which safely acquires the GIL on the current thread when needed. + /// + /// Error and termination semantics + /// - If `__anext__` raises `StopAsyncIteration`, we return `None` and the stream + /// is finished. + /// - If `__anext__` raises any other exception, we surface it as `Some(Err(err))`. + /// Typically, an async generator that raises will be terminated by Python; the + /// next call to `__anext__` will then yield `StopAsyncIteration` and we return + /// `None` on the following poll. + fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let mut this = self.project(); + + // If we don't currently have an in-flight future, create one. + if this.next_fut.is_none() { + let event_loop = this.event_loop.clone(); + let fut = Python::attach(|py| { + // Call __anext__ on the bound iterator to get an awaitable. + let awaitable = this.aiter.bind(py).call_method0("__anext__")?; + // Build TaskLocals tied to our target loop, and convert awaitable to a Rust Future. + let locals = pyo3_async_runtimes::TaskLocals::new(event_loop.bind(py).clone()); + let f = pyo3_async_runtimes::into_future_with_locals(&locals, awaitable.clone()) + .expect("failed to create future for __anext__"); + Ok::<_, PyErr>(f) + })?; + *this.next_fut = Some(Box::pin(fut)); + } + + // Poll the Python -> Rust future. + match this.next_fut.as_mut().as_pin_mut() { + None => Poll::Pending, + Some(fut) => match fut.poll(cx) { + Poll::Pending => Poll::Pending, + Poll::Ready(res) => { + // Consume and clear the in-flight future. This is crucial: it ensures + // the next call to `poll_next` will construct a fresh `__anext__()` + // awaitable for the subsequent element (or detect StopAsyncIteration). + // Without clearing this, we’d keep polling a completed future. + this.next_fut.set(None); + match res { + Ok(obj) => { + // Convert PyObject -> M + let m = Python::attach(|py| obj.extract::(py)); + Poll::Ready(Some(m)) + } + Err(err) => { + // Stop on StopAsyncIteration; otherwise surface the error. + let is_stop = + Python::attach(|py| err.is_instance_of::(py)); + if is_stop { + Poll::Ready(None) + } else { + Poll::Ready(Some(Err(err))) + } + } + } + } + }, + } + } +} + +/// Generic async iterator that yields items of type `T` from a Tokio mpsc channel. +/// This is the internal implementation used by module-specific PyAsyncDatumStream wrappers. +/// +/// This struct is NOT exposed to Python directly. Instead, each module (reduce, session_reduce, +/// batchmap) wraps this in a `#[pyclass]` struct that delegates to this implementation. +/// +/// Type `T` must implement `PyClass` so items can be converted to Python objects via `Py::new`. +pub struct AsyncChannelStream { + rx: Arc>>, +} + +impl AsyncChannelStream +where + T: PyClass + Send + 'static, + T: Into>, +{ + /// Create a new AsyncChannelStream from a Tokio mpsc receiver. + pub fn new(rx: mpsc::Receiver) -> Self { + Self { + rx: Arc::new(AsyncMutex::new(rx)), + } + } + + /// Implementation of Python's __aiter__ protocol. + /// Returns a reference to self (the iterator itself). + /// + /// This should be called from the `#[pyclass]` wrapper's __aiter__ method. + pub fn py_aiter<'a>(&self, slf: &'a Bound<'_, PyAny>) -> PyResult> { + Ok(slf.clone()) + } + + /// Implementation of Python's __anext__ protocol. + /// Returns a future that resolves to the next item from the channel. + /// + /// This should be called from the `#[pyclass]` wrapper's __anext__ method. + pub fn py_anext<'a>(&self, py: Python<'a>) -> PyResult> { + let rx = self.rx.clone(); + pyo3_async_runtimes::tokio::future_into_py(py, async move { + let mut guard = rx.lock().await; + match guard.recv().await { + Some(item) => Python::attach(|py| { + let bound = Bound::new(py, item)?; + Ok(bound.into_any().unbind()) + }), + None => Err(PyStopAsyncIteration::new_err("stream closed")), + } + }) + } +} + +#[cfg(test)] +mod tests { + use super::{AsyncChannelStream, PyAsyncIterStream}; + use pyo3::prelude::*; + use std::sync::Arc; + use tokio::sync::mpsc; + use tokio_stream::StreamExt; + + // Spawn a dedicated asyncio event loop running forever and return it. + async fn spawn_event_loop() -> (tokio::task::JoinHandle<()>, Arc>) { + let (tx, rx) = tokio::sync::oneshot::channel(); + let handle = tokio::task::spawn_blocking(move || { + Python::attach(|py| { + let aio: Py = py.import("asyncio").unwrap().into(); + let event_loop = aio.call_method0(py, "new_event_loop").unwrap(); + let _ = tx.send(event_loop.clone_ref(py)); + event_loop.call_method0(py, "run_forever").unwrap(); + }); + }); + let loop_obj = rx.await.expect("event loop create"); + (handle, std::sync::Arc::new(loop_obj)) + } + + #[tokio::test] + async fn py_async_iter_stream_yields_incrementally() { + // Initialize the Python interpreter for use from Rust + pyo3::Python::initialize(); + let (loop_handle, loop_obj) = spawn_event_loop().await; + + // Create a Python async generator that yields items with delays + let agen_obj = Python::attach(|py| { + use pyo3::types::PyDict; + let code = r#" +import asyncio +from typing import AsyncIterator +async def agen() -> AsyncIterator[int]: + for i in range(3): + await asyncio.sleep(0.05) + yield i +result = agen() +"#; + let globals = PyDict::new(py); + use std::ffi::CString; + let code_c = CString::new(code).unwrap(); + py.run(&code_c, Some(&globals), None).unwrap(); + let any: Option> = globals.get_item("result").expect("result missing"); + let obj: Py = any.expect("result missing").unbind(); + obj + }); + + let mut stream = + PyAsyncIterStream::::new(agen_obj, loop_obj.clone()).expect("construct stream"); + + let t0 = tokio::time::Instant::now(); + let v1 = stream.next().await.unwrap().unwrap(); + let t1 = tokio::time::Instant::now(); + let v2 = stream.next().await.unwrap().unwrap(); + let t2 = tokio::time::Instant::now(); + let v3 = stream.next().await.unwrap().unwrap(); + let _t3 = tokio::time::Instant::now(); + let end = stream.next().await; + + assert_eq!(v1, 0); + assert!(t1.duration_since(t0) >= tokio::time::Duration::from_millis(40)); + assert_eq!(v2, 1); + assert!(t2.duration_since(t1) >= tokio::time::Duration::from_millis(40)); + assert_eq!(v3, 2); + assert!(end.is_none()); + + // Stop the event loop and wait for the loop thread to finish + Python::attach(|py| { + if let Ok(stop_cb) = loop_obj.getattr(py, "stop") { + let _ = loop_obj.call_method1(py, "call_soon_threadsafe", (stop_cb,)); + } + }); + let _ = loop_handle.await; + } + + #[tokio::test] + async fn py_async_iter_stream_yields_error() { + // No delay; the async iterable should error on first next + pyo3::Python::initialize(); + let (loop_handle, loop_obj) = spawn_event_loop().await; + + // Create a Python async generator that immediately raises + let agen_obj = Python::attach(|py| { + use pyo3::types::PyDict; + let code = r#" +from typing import AsyncIterator +async def agen_fail() -> AsyncIterator[int]: + if True: + raise RuntimeError('boom') + yield 0 +result = agen_fail() +"#; + let globals = PyDict::new(py); + use std::ffi::CString; + let code_c = CString::new(code).unwrap(); + py.run(&code_c, Some(&globals), None).unwrap(); + let any = globals.get_item("result").expect("result missing"); + any.expect("result missing").unbind() + }); + + let mut stream = PyAsyncIterStream::::new(agen_obj, loop_obj.clone()).unwrap(); + + // First next should be an error + let first = stream.next().await; + assert!(first.is_some()); + let err = first + .unwrap() + .expect_err("expected error from async iterable"); + let is_runtime = + Python::attach(|py| err.is_instance_of::(py)); + assert!(is_runtime, "error should be RuntimeError"); + + // Subsequent next should terminate the stream (StopAsyncIteration) + let end = stream.next().await; + assert!(end.is_none()); + + // Stop the event loop and wait for the loop thread to finish + Python::attach(|py| { + if let Ok(stop_cb) = loop_obj.getattr(py, "stop") { + let _ = loop_obj.call_method1(py, "call_soon_threadsafe", (stop_cb,)); + } + }); + let _ = loop_handle.await; + } + + // Test data structure for AsyncChannelStream tests + #[pyclass] + #[derive(Clone)] + struct TestDatum { + #[pyo3(get)] + value: i32, + #[pyo3(get)] + message: String, + } + + #[pymethods] + impl TestDatum { + #[new] + fn new(value: i32, message: String) -> Self { + Self { value, message } + } + } + + #[tokio::test] + async fn async_channel_stream_yields_items() { + // Initialize Python interpreter + pyo3::Python::initialize(); + + // Create a channel and send some items + let (tx, rx) = mpsc::channel::(10); + + // Send test data + tx.send(TestDatum::new(1, "first".to_string())) + .await + .unwrap(); + tx.send(TestDatum::new(2, "second".to_string())) + .await + .unwrap(); + tx.send(TestDatum::new(3, "third".to_string())) + .await + .unwrap(); + drop(tx); // Close the channel + + // Create AsyncChannelStream + let stream = AsyncChannelStream::new(rx); + + // Test by calling from Python + Python::attach(|py| { + // Create a Python wrapper to test the stream + let code = r#" +async def consume_stream(stream): + results = [] + async for item in stream: + results.append((item.value, item.message)) + return results +"#; + use pyo3::types::PyDict; + let globals = PyDict::new(py); + use std::ffi::CString; + let code_c = CString::new(code).unwrap(); + py.run(&code_c, Some(&globals), None).unwrap(); + + let consume_fn = globals.get_item("consume_stream").unwrap().unwrap(); + + // Create a simple wrapper class to expose the stream + #[pyclass] + struct StreamWrapper { + inner: AsyncChannelStream, + } + + #[pymethods] + impl StreamWrapper { + fn __aiter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { + slf + } + + fn __anext__<'a>(&self, py: Python<'a>) -> PyResult> { + self.inner.py_anext(py) + } + } + + let wrapper = Py::new(py, StreamWrapper { inner: stream }).unwrap(); + + // Run the async function + let asyncio = py.import("asyncio").unwrap(); + let result = asyncio + .call_method1("run", (consume_fn.call1((wrapper,)).unwrap(),)) + .unwrap(); + + // Verify results + let results: Vec<(i32, String)> = result.extract().unwrap(); + assert_eq!(results.len(), 3); + assert_eq!(results[0], (1, "first".to_string())); + assert_eq!(results[1], (2, "second".to_string())); + assert_eq!(results[2], (3, "third".to_string())); + }); + } + + #[tokio::test] + async fn async_channel_stream_handles_empty_channel() { + pyo3::Python::initialize(); + + // Create a channel and immediately close it + let (tx, rx) = mpsc::channel::(10); + drop(tx); + + let stream = AsyncChannelStream::new(rx); + + Python::attach(|py| { + let code = r#" +async def consume_empty_stream(stream): + count = 0 + async for _ in stream: + count += 1 + return count +"#; + use pyo3::types::PyDict; + let globals = PyDict::new(py); + use std::ffi::CString; + let code_c = CString::new(code).unwrap(); + py.run(&code_c, Some(&globals), None).unwrap(); + + let consume_fn = globals.get_item("consume_empty_stream").unwrap().unwrap(); + + #[pyclass] + struct StreamWrapper { + inner: AsyncChannelStream, + } + + #[pymethods] + impl StreamWrapper { + fn __aiter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { + slf + } + + fn __anext__<'a>(&self, py: Python<'a>) -> PyResult> { + self.inner.py_anext(py) + } + } + + let wrapper = Py::new(py, StreamWrapper { inner: stream }).unwrap(); + + let asyncio = py.import("asyncio").unwrap(); + let result = asyncio + .call_method1("run", (consume_fn.call1((wrapper,)).unwrap(),)) + .unwrap(); + + let count: i32 = result.extract().unwrap(); + assert_eq!(count, 0, "Empty stream should yield no items"); + }); + } + + #[tokio::test] + async fn async_channel_stream_yields_multiple_items() { + pyo3::Python::initialize(); + + let (tx, rx) = mpsc::channel::(10); + + // Send multiple items + for i in 1..=5 { + tx.send(TestDatum::new(i, format!("item_{}", i))) + .await + .unwrap(); + } + drop(tx); // Close the channel + + let stream = AsyncChannelStream::new(rx); + + Python::attach(|py| { + let code = r#" +async def consume_all(stream): + results = [] + async for item in stream: + results.append((item.value, item.message)) + return results +"#; + use pyo3::types::PyDict; + let globals = PyDict::new(py); + use std::ffi::CString; + let code_c = CString::new(code).unwrap(); + py.run(&code_c, Some(&globals), None).unwrap(); + + let consume_fn = globals.get_item("consume_all").unwrap().unwrap(); + + #[pyclass] + struct StreamWrapper { + inner: AsyncChannelStream, + } + + #[pymethods] + impl StreamWrapper { + fn __aiter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { + slf + } + + fn __anext__<'a>(&self, py: Python<'a>) -> PyResult> { + self.inner.py_anext(py) + } + } + + let wrapper = Py::new(py, StreamWrapper { inner: stream }).unwrap(); + + let asyncio = py.import("asyncio").unwrap(); + let result = asyncio + .call_method1("run", (consume_fn.call1((wrapper,)).unwrap(),)) + .unwrap(); + + let results: Vec<(i32, String)> = result.extract().unwrap(); + assert_eq!(results.len(), 5); + for i in 1..=5 { + assert_eq!(results[i - 1], (i as i32, format!("item_{}", i))); + } + }); + } + + #[test] + fn async_channel_stream_production_workload() { + // This test simulates production workload where: + // - Channel buffer size is 1 (backpressure) + // - Data is produced incrementally by a Tokio task + // - Data is streamed to Python as it arrives (not buffered) + pyo3::Python::initialize(); + + // Create a Tokio runtime for this test + let rt = tokio::runtime::Runtime::new().unwrap(); + + rt.block_on(async { + // Buffer size of 1 to simulate real backpressure + let (tx, rx) = mpsc::channel::(1); + + // Spawn a producer task that sends items with small delays + // This simulates data arriving from an external source (e.g., gRPC stream) + let producer = tokio::spawn(async move { + for i in 1..=10 { + // Simulate some processing/network delay + tokio::time::sleep(tokio::time::Duration::from_millis(5)).await; + + // This will block if the channel is full (backpressure) + // With buffer size 1, this ensures items are sent one at a time + if tx + .send(TestDatum::new(i, format!("datum_{}", i))) + .await + .is_err() + { + break; // Receiver dropped + } + } + // Close the channel after sending all items + }); + + let stream = AsyncChannelStream::new(rx); + + // Test by manually calling __anext__ multiple times + // This simulates how Python would consume the stream + let results = Python::attach(|py| { + #[pyclass] + struct StreamWrapper { + inner: AsyncChannelStream, + } + + #[pymethods] + impl StreamWrapper { + fn __aiter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { + slf + } + + fn __anext__<'a>(&self, py: Python<'a>) -> PyResult> { + self.inner.py_anext(py) + } + } + + let wrapper = Py::new(py, StreamWrapper { inner: stream }).unwrap(); + + // Manually consume items by calling __anext__ in a loop + // This is what Python's "async for" does under the hood + let code = r#" +async def consume_stream(stream): + results = [] + try: + while True: + item = await stream.__anext__() + results.append((item.value, item.message)) + except StopAsyncIteration: + pass + return results +"#; + use pyo3::types::PyDict; + let globals = PyDict::new(py); + use std::ffi::CString; + let code_c = CString::new(code).unwrap(); + py.run(&code_c, Some(&globals), None).unwrap(); + + let consume_fn = globals.get_item("consume_stream").unwrap().unwrap(); + + // Use asyncio.run to execute the async function + let asyncio = py.import("asyncio").unwrap(); + let result = asyncio + .call_method1("run", (consume_fn.call1((wrapper,)).unwrap(),)) + .unwrap(); + + result.extract::>().unwrap() + }); + + // Wait for producer to complete + producer.await.unwrap(); + + // Verify all items were received in order + assert_eq!(results.len(), 10, "Should receive all 10 items"); + for i in 1..=10 { + assert_eq!( + results[i - 1], + (i as i32, format!("datum_{}", i)), + "Item {} should match", + i + ); + } + }); + } +} diff --git a/packages/pynumaflow-lite/src/pyrs.rs b/packages/pynumaflow-lite/src/pyrs.rs new file mode 100644 index 00000000..27436338 --- /dev/null +++ b/packages/pynumaflow-lite/src/pyrs.rs @@ -0,0 +1,53 @@ +use pyo3::{Py, PyAny, Python}; +use std::sync::Arc; +use tokio::sync::oneshot::{Receiver, Sender}; +use tokio::task::JoinHandle; + +/// Start asyncio event loop and block on it forever +pub(crate) fn run_asyncio(tx: Sender>>) { + let event_loop: Py = Python::attach(|py| { + let aio: Py = py.import("asyncio").unwrap().into(); + aio.call_method0(py, "new_event_loop").unwrap() + }); + let event_loop = Arc::new(event_loop); + let _ = tx.send(event_loop.clone()); + Python::attach(|py| { + println!("Starting NumaflowCore: event_loop={:?}", event_loop); + event_loop.call_method0(py, "run_forever").unwrap(); + }); +} + +pub(crate) fn setup_sig_handler(shutdown_rx: Receiver<()>) -> (JoinHandle<()>, Receiver<()>) { + // Listen for OS signals (Ctrl+C and SIGTERM) to trigger shutdown from Rust as well. + let (os_sig_tx, mut os_sig_rx) = tokio::sync::oneshot::channel::<()>(); + + let sig_handle = tokio::spawn(async move { + let ctrl_c = tokio::signal::ctrl_c(); + #[cfg(unix)] + let mut sigterm_stream = + tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate()) + .expect("failed to install SIGTERM handler"); + #[cfg(unix)] + let sigterm = sigterm_stream.recv(); + #[cfg(not(unix))] + let sigterm = std::future::pending::<()>(); + tokio::select! { + _ = ctrl_c => {}, + _ = sigterm => {}, + } + let _ = os_sig_tx.send(()); + }); + + // Combine Python-initiated shutdown and OS signal shutdown into one channel for the server. + let (combined_tx, combined_rx) = tokio::sync::oneshot::channel::<()>(); + + tokio::spawn(async move { + tokio::select! { + _ = shutdown_rx => {}, + _ = &mut os_sig_rx => {}, + } + let _ = combined_tx.send(()); + }); + + (sig_handle, combined_rx) +} diff --git a/packages/pynumaflow-lite/src/reduce/mod.rs b/packages/pynumaflow-lite/src/reduce/mod.rs new file mode 100644 index 00000000..f65d7899 --- /dev/null +++ b/packages/pynumaflow-lite/src/reduce/mod.rs @@ -0,0 +1,273 @@ +use chrono::{DateTime, Utc}; +use numaflow::reduce; +use std::collections::HashMap; +use std::sync::Mutex; + +pub mod server; + +use pyo3::prelude::*; +use tokio::sync::mpsc; + +/// A message to be sent to the next vertex from a reduce handler. +#[pyclass(module = "pynumaflow_lite.reducer")] +#[derive(Clone, Default, Debug)] +pub struct Message { + pub keys: Option>, // optional keys + pub value: Vec, // payload + pub tags: Option>, // optional tags (e.g., DROP) +} + +#[pymethods] +impl Message { + #[new] + #[pyo3(signature = (value: "bytes", keys: "list[str] | None"=None, tags: "list[str] | None"=None) -> "Message")] + fn new(value: Vec, keys: Option>, tags: Option>) -> Self { + Self { keys, value, tags } + } + + /// Drop a Message, do not forward to the next vertex. + #[pyo3(signature = ())] + #[staticmethod] + fn message_to_drop() -> Self { + Self { + keys: None, + value: vec![], + tags: Some(vec![numaflow::shared::DROP.to_string()]), + } + } +} + +impl From for reduce::Message { + fn from(value: Message) -> Self { + Self { + keys: value.keys, + value: value.value, + tags: value.tags, + } + } +} + +/// A collection of Messages returned by reducer. +#[pyclass(module = "pynumaflow_lite.reducer")] +#[derive(Clone, Debug)] +pub struct Messages { + pub(crate) messages: Vec, +} + +#[pymethods] +impl Messages { + #[new] + #[pyo3(signature = () -> "Messages")] + fn new() -> Self { + Self { messages: vec![] } + } + + /// Append a Message to the collection. + #[pyo3(signature = (message: "Message"))] + fn append(&mut self, message: Message) { + self.messages.push(message); + } + + fn __repr__(&self) -> String { + format!("Messages({:?})", self.messages) + } + + fn __str__(&self) -> String { + format!("Messages({:?})", self.messages) + } +} + +/// The incoming ReduceRequest accessible in Python function (streamed). +#[pyclass(module = "pynumaflow_lite.reducer")] +pub struct Datum { + #[pyo3(get)] + pub keys: Vec, + #[pyo3(get)] + pub value: Vec, + #[pyo3(get)] + pub watermark: DateTime, + #[pyo3(get)] + pub eventtime: DateTime, + #[pyo3(get)] + pub headers: HashMap, +} + +impl Datum { + fn new( + keys: Vec, + value: Vec, + watermark: DateTime, + eventtime: DateTime, + headers: HashMap, + ) -> Self { + Self { + keys, + value, + watermark, + eventtime, + headers, + } + } + + fn __repr__(&self) -> String { + format!( + "Datum(keys={:?}, value={:?}, watermark={}, eventtime={}, headers={:?})", + self.keys, self.value, self.watermark, self.eventtime, self.headers + ) + } + + fn __str__(&self) -> String { + format!( + "Datum(keys={:?}, value={:?}, watermark={}, eventtime={}, headers={:?})", + self.keys, + String::from_utf8_lossy(&self.value), + self.watermark, + self.eventtime, + self.headers + ) + } +} + +impl From for Datum { + fn from(value: reduce::ReduceRequest) -> Self { + Self::new( + value.keys, + value.value, + value.watermark, + value.eventtime, + value.headers, + ) + } +} + +/// IntervalWindow exposed to Python metadata +#[pyclass(module = "pynumaflow_lite.reducer")] +#[derive(Clone)] +pub struct IntervalWindow { + #[pyo3(get)] + pub start: DateTime, + #[pyo3(get)] + pub end: DateTime, +} + +impl IntervalWindow { + pub(crate) fn new(start: DateTime, end: DateTime) -> Self { + Self { start, end } + } +} + +/// Metadata passed to reducer handler in Python. +#[pyclass(module = "pynumaflow_lite.reducer")] +#[derive(Clone)] +pub struct Metadata { + #[pyo3(get)] + pub interval_window: IntervalWindow, +} + +impl Metadata { + pub(crate) fn new(interval_window: IntervalWindow) -> Self { + Self { interval_window } + } +} + +/// Python-visible async iterator that yields Datum items from a Tokio mpsc channel. +/// This is a thin wrapper around the generic AsyncChannelStream implementation. +#[pyclass(module = "pynumaflow_lite.reducer")] +pub struct PyAsyncDatumStream { + inner: crate::pyiterables::AsyncChannelStream, +} + +#[pymethods] +impl PyAsyncDatumStream { + #[new] + fn new() -> Self { + let (_tx, rx) = mpsc::channel::(1); + Self { + inner: crate::pyiterables::AsyncChannelStream::new(rx), + } + } + + fn __aiter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { + slf + } + + fn __anext__<'a>(&self, py: Python<'a>) -> PyResult> { + self.inner.py_anext(py) + } +} + +impl PyAsyncDatumStream { + pub fn new_with(rx: mpsc::Receiver) -> Self { + Self { + inner: crate::pyiterables::AsyncChannelStream::new(rx), + } + } +} + +/// Async Reduce Server that can be started from Python code, taking a class (creator). +#[pyclass(module = "pynumaflow_lite.reducer")] +pub struct ReduceAsyncServer { + sock_file: String, + info_file: String, + shutdown_tx: Mutex>>, +} + +#[pymethods] +impl ReduceAsyncServer { + #[new] + #[pyo3(signature = (sock_file: "str | None"=reduce::SOCK_ADDR.to_string(), info_file: "str | None"=reduce::SERVER_INFO_FILE.to_string()) -> "ReduceAsyncServer")] + fn new(sock_file: String, info_file: String) -> Self { + Self { + sock_file, + info_file, + shutdown_tx: Mutex::new(None), + } + } + + /// Start the server with the given Python class (creator) or function. + /// - For class-based: pass the class and optionally init_args (tuple). + /// - For function-based: pass the async function directly (init_args is ignored). + #[pyo3(signature = (py_creator: "object", init_args: "object | None"=None) -> "None")] + pub fn start<'a>( + &self, + py: Python<'a>, + py_creator: Py, + init_args: Option>, + ) -> PyResult> { + let sock_file = self.sock_file.clone(); + let info_file = self.info_file.clone(); + let (tx, rx) = tokio::sync::oneshot::channel::<()>(); + { + let mut guard = self.shutdown_tx.lock().unwrap(); + *guard = Some(tx); + } + + pyo3_async_runtimes::tokio::future_into_py(py, async move { + crate::reduce::server::start(py_creator, init_args, sock_file, info_file, rx) + .await + .expect("server failed to start"); + Ok(()) + }) + } + + /// Trigger server shutdown from Python (idempotent). + #[pyo3(signature = () -> "None")] + pub fn stop(&self) -> PyResult<()> { + if let Some(tx) = self.shutdown_tx.lock().unwrap().take() { + let _ = tx.send(()); + } + Ok(()) + } +} + +/// Helper to populate a PyModule with reduce types/functions. +pub(crate) fn populate_py_module(m: &Bound) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + Ok(()) +} diff --git a/packages/pynumaflow-lite/src/reduce/server.rs b/packages/pynumaflow-lite/src/reduce/server.rs new file mode 100644 index 00000000..813af1aa --- /dev/null +++ b/packages/pynumaflow-lite/src/reduce/server.rs @@ -0,0 +1,201 @@ +use crate::reduce::{ + Datum as PyDatum, IntervalWindow as PyIntervalWindow, Message as PyMessage, + Messages as PyMessages, Metadata as PyMetadata, PyAsyncDatumStream, +}; +use numaflow::reduce; +use numaflow::shared::ServerExtras; +use pyo3::prelude::*; +use pyo3::types::PyTuple; +use std::sync::Arc; + +pub(crate) struct PyReduceCreator { + /// handle to Python event loop + pub(crate) event_loop: Arc>, + /// Python class to instantiate per window OR a function + pub(crate) py_creator: Arc>, + /// optional tuple of positional args (only for class) + pub(crate) init_args: Option>>, + /// true if py_creator is a function, false if it's a class. + pub(crate) is_function: bool, +} + +pub(crate) struct PyReduceRunner { + /// handle to Python event loop + pub(crate) event_loop: Arc>, + /// Instance of class per window OR the function itself. It can be class because we have overloaded + /// __call__ method to make it callable like a function. + pub(crate) py_obj: Arc>, +} + +#[tonic::async_trait] +impl reduce::ReducerCreator for PyReduceCreator { + type R = PyReduceRunner; + + fn create(&self) -> Self::R { + // If it's a function, just clone the reference; if it's a class, instantiate it. + let inst = if self.is_function { + // For functions, we don't instantiate; just use the function directly + Python::attach(|py| self.py_creator.clone_ref(py)) + } else { + // For classes, instantiate the Python class synchronously under the GIL. + Python::attach(|py| { + let class = self.py_creator.as_ref(); + match &self.init_args { + Some(args) => { + let bound = args.as_ref().bind(py); + let py_tuple = bound.downcast::()?; + class.call1(py, py_tuple) + } + None => class.call0(py), + } + }) + .expect("failed to instantiate Python reducer class") + }; + + PyReduceRunner { + event_loop: self.event_loop.clone(), + py_obj: Arc::new(inst), + } + } +} + +#[tonic::async_trait] +impl reduce::Reducer for PyReduceRunner { + async fn reduce( + &self, + keys: Vec, + mut input: tokio::sync::mpsc::Receiver, + md: &reduce::Metadata, + ) -> Vec { + // Create a channel to stream Datum into Python as an async iterator + let (tx, rx) = tokio::sync::mpsc::channel::(64); + + // Spawn a task forwarding incoming datums to the Python-facing channel + let forwarder = tokio::spawn(async move { + while let Some(req) = input.recv().await { + let datum = PyDatum::from(req); + if tx.send(datum).await.is_err() { + break; + } + } + }); + + // Call the Python coroutine: + // - For class-based: obj.handler(keys, datums: AsyncIterable[Datum], md: Metadata) -> Messages + // - For function-based: obj(keys, datums: AsyncIterable[Datum], md: Metadata) -> Messages + let fut = Python::attach(|py| { + let locals = pyo3_async_runtimes::TaskLocals::new(self.event_loop.bind(py).clone()); + let obj = self.py_obj.clone(); + + // Build metadata and stream objects + let stream = PyAsyncDatumStream::new_with(rx); + let interval = + PyIntervalWindow::new(md.interval_window.start_time, md.interval_window.end_time); + let md_py = PyMetadata::new(interval); + + // doesn't matter whether it is func or class, obj is callable + let coro = obj + .call1(py, (keys.clone(), stream, md_py)) + .unwrap() + .into_bound(py); + + pyo3_async_runtimes::into_future_with_locals(&locals, coro).unwrap() + }); + + let result = fut.await.unwrap(); + + // Ensure forwarder completes + let _ = forwarder.await; + + let messages = Python::attach(|py| { + // Expect Messages; also allow a single Message for convenience + if let Ok(msgs) = result.extract::(py) { + msgs.messages + .into_iter() + .map(|m| reduce::Message::from(m)) + .collect::>() + } else if let Ok(single) = result.extract::(py) { + vec![single.into()] + } else { + vec![] + } + }); + + messages + } +} + +/// Start the reduce server by spinning up a dedicated Python asyncio loop and wiring shutdown. +pub(super) async fn start( + py_creator: Py, + init_args: Option>, + sock_file: String, + info_file: String, + shutdown_rx: tokio::sync::oneshot::Receiver<()>, +) -> Result<(), pyo3::PyErr> { + let (tx, rx) = tokio::sync::oneshot::channel(); + let py_asyncio_loop_handle = tokio::task::spawn_blocking(move || crate::pyrs::run_asyncio(tx)); + let event_loop = rx.await.unwrap(); + + let (sig_handle, combined_rx) = crate::pyrs::setup_sig_handler(shutdown_rx); + + // Detect if py_creator is a function or a class + let is_function = Python::attach(|py| { + let obj = py_creator.bind(py); + // Check if it's a function or coroutine function using inspect module + let inspect = py.import("inspect").ok()?; + if let Ok(is_func) = inspect.call_method1("isfunction", (obj,)) { + if let Ok(result) = is_func.extract::() { + if result { + return Some(true); + } + } + } + // Also check for coroutine function + if let Ok(is_coro) = inspect.call_method1("iscoroutinefunction", (obj,)) { + if let Ok(result) = is_coro.extract::() { + if result { + return Some(true); + } + } + } + Some(false) + }) + .unwrap_or(false); + + let creator = PyReduceCreator { + event_loop: event_loop.clone(), + py_creator: Arc::new(py_creator), + init_args: init_args.map(Arc::new), + is_function, + }; + + let server = reduce::Server::new(creator) + .with_socket_file(sock_file) + .with_server_info_file(info_file); + + let result = server + .start_with_shutdown(combined_rx) + .await + .map_err(|e| pyo3::PyErr::new::(e.to_string())); + + // Ensure the event loop is stopped even if shutdown came from elsewhere. + Python::attach(|py| { + if let Ok(stop_cb) = event_loop.getattr(py, "stop") { + let _ = event_loop.call_method1(py, "call_soon_threadsafe", (stop_cb,)); + } + }); + + println!("Numaflow Core (reduce) has shutdown..."); + + // Wait for the blocking asyncio thread to finish. + let _ = py_asyncio_loop_handle.await; + + // if not finished, abort it + if !sig_handle.is_finished() { + println!("Aborting signal handler"); + let _ = sig_handle.abort(); + } + + result +} diff --git a/packages/pynumaflow-lite/src/session_reduce/mod.rs b/packages/pynumaflow-lite/src/session_reduce/mod.rs new file mode 100644 index 00000000..36fcbbe3 --- /dev/null +++ b/packages/pynumaflow-lite/src/session_reduce/mod.rs @@ -0,0 +1,210 @@ +use chrono::{DateTime, Utc}; +use numaflow::session_reduce; +use std::collections::HashMap; +use std::sync::Mutex; + +pub mod server; + +use pyo3::prelude::*; +use tokio::sync::mpsc; + +/// A message to be sent to the next vertex from a session reduce handler. +#[pyclass(module = "pynumaflow_lite.session_reducer")] +#[derive(Clone, Default, Debug)] +pub struct Message { + #[pyo3(get)] + pub keys: Option>, // optional keys + #[pyo3(get)] + pub value: Vec, // payload + #[pyo3(get)] + pub tags: Option>, // optional tags (e.g., DROP) +} + +#[pymethods] +impl Message { + #[new] + #[pyo3(signature = (value: "bytes", keys: "list[str] | None"=None, tags: "list[str] | None"=None) -> "Message")] + fn new(value: Vec, keys: Option>, tags: Option>) -> Self { + Self { keys, value, tags } + } + + /// Drop a Message, do not forward to the next vertex. + #[pyo3(signature = ())] + #[staticmethod] + fn message_to_drop() -> Self { + Self { + keys: None, + value: vec![], + tags: Some(vec![numaflow::shared::DROP.to_string()]), + } + } +} + +impl From for session_reduce::Message { + fn from(value: Message) -> Self { + Self { + keys: value.keys, + value: value.value, + tags: value.tags, + } + } +} + +/// The incoming SessionReduceRequest accessible in Python function (streamed). +#[pyclass(module = "pynumaflow_lite.session_reducer")] +pub struct Datum { + #[pyo3(get)] + pub keys: Vec, + #[pyo3(get)] + pub value: Vec, + #[pyo3(get)] + pub watermark: DateTime, + #[pyo3(get)] + pub eventtime: DateTime, + #[pyo3(get)] + pub headers: HashMap, +} + +impl Datum { + fn new( + keys: Vec, + value: Vec, + watermark: DateTime, + eventtime: DateTime, + headers: HashMap, + ) -> Self { + Self { + keys, + value, + watermark, + eventtime, + headers, + } + } + + fn __repr__(&self) -> String { + format!( + "Datum(keys={:?}, value={:?}, watermark={}, eventtime={}, headers={:?})", + self.keys, self.value, self.watermark, self.eventtime, self.headers + ) + } + + fn __str__(&self) -> String { + format!( + "Datum(keys={:?}, value={:?}, watermark={}, eventtime={}, headers={:?})", + self.keys, + String::from_utf8_lossy(&self.value), + self.watermark, + self.eventtime, + self.headers + ) + } +} + +impl From for Datum { + fn from(value: session_reduce::SessionReduceRequest) -> Self { + Self::new( + value.keys, + value.value, + value.watermark, + value.event_time, + value.headers, + ) + } +} + +/// Python-visible async iterator that yields Datum items from a Tokio mpsc channel. +/// This is a thin wrapper around the generic AsyncChannelStream implementation. +#[pyclass(module = "pynumaflow_lite.session_reducer")] +pub struct PyAsyncDatumStream { + inner: crate::pyiterables::AsyncChannelStream, +} + +#[pymethods] +impl PyAsyncDatumStream { + #[new] + fn new() -> Self { + let (_tx, rx) = mpsc::channel::(1); + Self { + inner: crate::pyiterables::AsyncChannelStream::new(rx), + } + } + + fn __aiter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { + slf + } + + fn __anext__<'a>(&self, py: Python<'a>) -> PyResult> { + self.inner.py_anext(py) + } +} + +impl PyAsyncDatumStream { + pub fn new_with(rx: mpsc::Receiver) -> Self { + Self { + inner: crate::pyiterables::AsyncChannelStream::new(rx), + } + } +} + +/// Async Session Reduce Server that can be started from Python code, taking a class (creator). +#[pyclass(module = "pynumaflow_lite.session_reducer")] +pub struct SessionReduceAsyncServer { + sock_file: String, + info_file: String, + shutdown_tx: Mutex>>, +} + +#[pymethods] +impl SessionReduceAsyncServer { + #[new] + #[pyo3(signature = (sock_file=session_reduce::SOCK_ADDR.to_string(), info_file=session_reduce::SERVER_INFO_FILE.to_string()))] + fn new(sock_file: String, info_file: String) -> Self { + Self { + sock_file, + info_file, + shutdown_tx: Mutex::new(None), + } + } + + /// Start the server with the given Python class (creator). + /// - For class-based: pass the class and optionally init_args (tuple). + #[pyo3(signature = (py_creator, init_args=None))] + pub fn start<'a>( + &self, + py: Python<'a>, + py_creator: Py, + init_args: Option>, + ) -> PyResult> { + let sock_file = self.sock_file.clone(); + let info_file = self.info_file.clone(); + let (tx, rx) = tokio::sync::oneshot::channel::<()>(); + { + let mut guard = self.shutdown_tx.lock().unwrap(); + *guard = Some(tx); + } + + pyo3_async_runtimes::tokio::future_into_py(py, async move { + crate::session_reduce::server::start(py_creator, init_args, sock_file, info_file, rx) + .await?; + Ok(()) + }) + } + + /// Trigger server shutdown from Python (idempotent). + pub fn stop(&self) -> PyResult<()> { + if let Some(tx) = self.shutdown_tx.lock().unwrap().take() { + let _ = tx.send(()); + } + Ok(()) + } +} + +/// Helper to populate a PyModule with session_reduce types/functions. +pub(crate) fn populate_py_module(m: &Bound) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + Ok(()) +} diff --git a/packages/pynumaflow-lite/src/session_reduce/server.rs b/packages/pynumaflow-lite/src/session_reduce/server.rs new file mode 100644 index 00000000..d9006d63 --- /dev/null +++ b/packages/pynumaflow-lite/src/session_reduce/server.rs @@ -0,0 +1,205 @@ +use crate::pyiterables::PyAsyncIterStream; +use crate::session_reduce::{Datum as PyDatum, Message as PyMessage, PyAsyncDatumStream}; +use numaflow::session_reduce; +use numaflow::shared::ServerExtras; +use pyo3::prelude::*; +use pyo3::types::PyTuple; +use std::sync::Arc; +use tokio_stream::StreamExt; + +pub(crate) struct PySessionReduceCreator { + /// handle to Python event loop + pub(crate) event_loop: Arc>, + /// Python class to instantiate per window + pub(crate) py_creator: Arc>, + /// optional tuple of positional args + pub(crate) init_args: Option>>, +} + +pub(crate) struct PySessionReduceRunner { + /// handle to Python event loop + pub(crate) event_loop: Arc>, + /// Instance of class per window + pub(crate) py_instance: Arc>, +} + +impl session_reduce::SessionReducerCreator for PySessionReduceCreator { + type R = PySessionReduceRunner; + + fn create(&self) -> Self::R { + // Instantiate the Python class synchronously under the GIL. + let inst = Python::attach(|py| { + let class = self.py_creator.as_ref(); + match &self.init_args { + Some(args) => { + let bound = args.as_ref().bind(py); + let py_tuple = bound.downcast::()?; + class.call1(py, py_tuple) + } + None => class.call0(py), + } + }) + .expect("failed to instantiate Python session reducer class"); + + PySessionReduceRunner { + event_loop: self.event_loop.clone(), + py_instance: Arc::new(inst), + } + } +} + +#[tonic::async_trait] +impl session_reduce::SessionReducer for PySessionReduceRunner { + async fn session_reduce( + &self, + keys: Vec, + mut input: tokio::sync::mpsc::Receiver, + output: tokio::sync::mpsc::Sender, + ) { + // Create a channel to stream Datum into Python as an async iterator + let (tx, rx) = tokio::sync::mpsc::channel::(64); + + // Spawn a task forwarding incoming datums to the Python-facing channel + let forwarder = tokio::spawn(async move { + while let Some(req) = input.recv().await { + let datum = PyDatum::from(req); + if tx.send(datum).await.is_err() { + break; + } + } + // When input ends, dropping tx closes the channel and that is when + // the Python async iterable will stop. + }); + + // Call the Python coroutine: + // obj.session_reduce(keys, datums: AsyncIterable[Datum]) -> AsyncIterator[Message] + let agen_obj = Python::attach(|py| { + let obj = self.py_instance.clone(); + let stream = PyAsyncDatumStream::new_with(rx); + + // Call session_reduce method + let agen = obj + .call_method1(py, "session_reduce", (keys.clone(), stream)) + .expect("python session_reduce method raised before returning async iterable"); + + // Keep as Py + agen.extract(py).unwrap_or(agen) + }); + + // Wrap the Python AsyncIterable in a Rust Stream that yields incrementally + let mut stream = PyAsyncIterStream::::new(agen_obj, self.event_loop.clone()) + .expect("failed to construct PyAsyncIterStream"); + + // Forward each yielded message immediately to the output sender + while let Some(item) = stream.next().await { + match item { + Ok(py_msg) => { + let out: session_reduce::Message = py_msg.into(); + if output.send(out).await.is_err() { + break; + } + } + Err(e) => { + // Non-stop errors are surfaced per-item; log and stop this stream. + eprintln!("Python async iteration error in session_reduce: {:?}", e); + break; + } + } + } + + // Ensure forwarder completes + let _ = forwarder.await; + } + + async fn accumulator(&self) -> Vec { + // Call Python's accumulator() async method + let fut = Python::attach(|py| { + let locals = pyo3_async_runtimes::TaskLocals::new(self.event_loop.bind(py).clone()); + let obj = self.py_instance.clone(); + + let coro = obj + .call_method0(py, "accumulator") + .expect("python accumulator method raised") + .into_bound(py); + + pyo3_async_runtimes::into_future_with_locals(&locals, coro) + .expect("failed to create future for accumulator") + }); + + let result = fut.await.expect("accumulator future failed"); + + Python::attach(|py| { + result + .extract::>(py) + .expect("accumulator must return bytes") + }) + } + + async fn merge_accumulator(&self, accumulator: Vec) { + // Call Python's merge_accumulator(accumulator: bytes) async method + let fut = Python::attach(|py| { + let locals = pyo3_async_runtimes::TaskLocals::new(self.event_loop.bind(py).clone()); + let obj = self.py_instance.clone(); + + let coro = obj + .call_method1(py, "merge_accumulator", (accumulator,)) + .expect("python merge_accumulator method raised") + .into_bound(py); + + pyo3_async_runtimes::into_future_with_locals(&locals, coro) + .expect("failed to create future for merge_accumulator") + }); + + let _ = fut.await.expect("merge_accumulator future failed"); + } +} + +/// Start the session reduce server by spinning up a dedicated Python asyncio loop and wiring shutdown. +pub(super) async fn start( + py_creator: Py, + init_args: Option>, + sock_file: String, + info_file: String, + shutdown_rx: tokio::sync::oneshot::Receiver<()>, +) -> Result<(), pyo3::PyErr> { + let (tx, rx) = tokio::sync::oneshot::channel(); + let py_asyncio_loop_handle = tokio::task::spawn_blocking(move || crate::pyrs::run_asyncio(tx)); + let event_loop = rx.await.unwrap(); + + let (sig_handle, combined_rx) = crate::pyrs::setup_sig_handler(shutdown_rx); + + let creator = PySessionReduceCreator { + event_loop: event_loop.clone(), + py_creator: Arc::new(py_creator), + init_args: init_args.map(Arc::new), + }; + + let server = session_reduce::Server::new(creator) + .with_socket_file(sock_file) + .with_server_info_file(info_file); + + let result = server + .start_with_shutdown(combined_rx) + .await + .map_err(|e| pyo3::PyErr::new::(e.to_string())); + + // Ensure the event loop is stopped even if shutdown came from elsewhere. + Python::attach(|py| { + if let Ok(stop_cb) = event_loop.getattr(py, "stop") { + let _ = event_loop.call_method1(py, "call_soon_threadsafe", (stop_cb,)); + } + }); + + println!("Numaflow Core (session_reduce) has shutdown..."); + + // Wait for the blocking asyncio thread to finish. + let _ = py_asyncio_loop_handle.await; + + // if not finished, abort it + if !sig_handle.is_finished() { + println!("Aborting signal handler"); + let _ = sig_handle.abort(); + } + + result +} diff --git a/packages/pynumaflow-lite/tests/_test_utils.py b/packages/pynumaflow-lite/tests/_test_utils.py new file mode 100644 index 00000000..5a3c9150 --- /dev/null +++ b/packages/pynumaflow-lite/tests/_test_utils.py @@ -0,0 +1,121 @@ +import os +import signal +import socket +import subprocess +import sys +import time +from pathlib import Path +from typing import List, Optional + +import pytest + + +def _wait_for_socket(path: Path, timeout: float = 10.0) -> None: + deadline = time.time() + timeout + while time.time() < deadline: + if path.exists(): + try: + with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as s: + s.settimeout(0.2) + s.connect(str(path)) + return + except OSError: + pass + time.sleep(0.1) + raise TimeoutError(f"Socket {path} not ready after {timeout}s") + + +def run_python_server_with_rust_client( + script: str, + sock_path: Path, + server_info_path: Path, + rust_bin_name: str, + rust_bin_args: Optional[List[str]] = None, + socket_timeout: float = 20.0, + rust_timeout: float = 60.0, + server_shutdown_timeout: float = 15.0, +) -> None: + """ + Generic test runner for Python server + Rust client integration tests. + + Args: + script: Name of the Python script to run (e.g., "map_cat.py") + sock_path: Path to the Unix socket + server_info_path: Path to the server info file + rust_bin_name: Name of the Rust binary to run (e.g., "test_map") + rust_bin_args: Optional additional arguments to pass to the Rust binary + socket_timeout: Timeout for waiting for socket to be ready + rust_timeout: Timeout for Rust client execution + server_shutdown_timeout: Timeout for server graceful shutdown + """ + # Ensure clean socket state + for p in [sock_path, server_info_path]: + try: + if p.exists(): + p.unlink() + except FileNotFoundError: + pass + + # Start Python server + tests_dir = Path(__file__).resolve().parent + examples_dir = tests_dir / "examples" + script_path = examples_dir / script + assert script_path.exists(), f"Missing script: {script_path}" + + # Cargo needs to run from the pynumaflow-lite root (parent of tests) + cargo_root = tests_dir.parent + + env = os.environ.copy() + py_cmd = [sys.executable, "-u", str(script_path)] + server = subprocess.Popen( + py_cmd, + cwd=str(cargo_root), + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + env=env, + preexec_fn=os.setsid if hasattr(os, "setsid") else None, + ) + + try: + _wait_for_socket(sock_path, timeout=socket_timeout) + + # Run Rust client bin + rust_cmd = ["cargo", "run", "--quiet", "--bin", rust_bin_name] + if rust_bin_args: + rust_cmd.extend(["--"] + rust_bin_args) + + rust = subprocess.run( + rust_cmd, cwd=str(cargo_root), capture_output=True, text=True, env=env, timeout=rust_timeout + ) + if rust.returncode != 0: + # Dump helpful logs for debugging + server_logs = server.stdout.read() if server.stdout else "" + pytest.fail( + f"Rust client failed: code={rust.returncode}\nStdout:\n{rust.stdout}\nStderr:\n{rust.stderr}\nServer logs so far:\n{server_logs}" + ) + + finally: + # Request graceful shutdown via SIGINT + try: + if server.poll() is None: + if hasattr(os, "killpg") and server.pid: + os.killpg(os.getpgid(server.pid), signal.SIGINT) + else: + server.send_signal(signal.SIGINT) + except Exception: + pass + + # Wait for server to exit + try: + server.wait(timeout=server_shutdown_timeout) + except subprocess.TimeoutExpired: + try: + if hasattr(os, "killpg") and server.pid: + os.killpg(os.getpgid(server.pid), signal.SIGKILL) + else: + server.kill() + except Exception: + pass + + assert server.returncode == 0, f"Server did not exit cleanly, code={server.returncode}" diff --git a/packages/pynumaflow-lite/tests/bin/accumulator.rs b/packages/pynumaflow-lite/tests/bin/accumulator.rs new file mode 100644 index 00000000..3a7e61bb --- /dev/null +++ b/packages/pynumaflow-lite/tests/bin/accumulator.rs @@ -0,0 +1,202 @@ +use std::env; +use std::path::PathBuf; +use std::time::{SystemTime, UNIX_EPOCH}; + +use tokio::net::UnixStream; +use tokio::sync::mpsc; +use tokio_stream::wrappers::ReceiverStream; +use tonic::{Request, transport::Uri}; +use tower::service_fn; + +fn now_ts() -> prost_types::Timestamp { + let now = SystemTime::now().duration_since(UNIX_EPOCH).unwrap(); + prost_types::Timestamp { + seconds: now.as_secs() as i64, + nanos: now.subsec_nanos() as i32, + } +} + +fn ts_from_secs(secs: i64) -> prost_types::Timestamp { + prost_types::Timestamp { + seconds: secs, + nanos: 0, + } +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + // Default socket path with env/arg override + let sock_path = env::args() + .nth(1) + .or_else(|| env::var("NUMAFLOW_ACCUMULATOR_SOCK").ok()) + .unwrap_or_else(|| "/tmp/var/run/numaflow/accumulator.sock".to_string()); + + // Connect over UDS + let channel = tonic::transport::Endpoint::try_from("http://[::]:50051")? + .connect_with_connector(service_fn(move |_: Uri| { + let sock = PathBuf::from(sock_path.clone()); + async move { + Ok::<_, std::io::Error>(hyper_util::rt::TokioIo::new( + UnixStream::connect(sock).await?, + )) + } + })) + .await?; + + // Accumulator client from generated proto + let mut client = + numaflow::proto::accumulator::accumulator_client::AccumulatorClient::new(channel); + + let (tx, rx) = mpsc::channel(16); + + // Test scenario: Send out-of-order messages and verify they are sorted by event_time + { + use numaflow::proto::accumulator as acc_proto; + + let base_time = now_ts().seconds; + + // We'll send messages with event times: t+30, t+10, t+20, t+40 + // And watermarks that advance: t+5, t+15, t+25, t+45 + // Expected output order after sorting: t+10, t+20, t+30, t+40 + + // 1. OPEN operation - create window with message at t+30 + let open_req = acc_proto::AccumulatorRequest { + payload: Some(acc_proto::Payload { + keys: vec!["key1".into()], + value: b"msg_at_t30".to_vec(), + watermark: Some(ts_from_secs(base_time + 5)), // WM at t+5 + event_time: Some(ts_from_secs(base_time + 30)), // Event at t+30 + headers: Default::default(), + id: "msg1".to_string(), + }), + operation: Some(acc_proto::accumulator_request::WindowOperation { + event: acc_proto::accumulator_request::window_operation::Event::Open as i32, + keyed_window: Some(acc_proto::KeyedWindow { + start: Some(ts_from_secs(base_time)), + end: Some(ts_from_secs(base_time + 60)), + slot: "slot-0".to_string(), + keys: vec!["key1".into()], + }), + }), + }; + tx.send(open_req).await.unwrap(); + + // 2. APPEND - message at t+10 with WM at t+15 (should flush t+10) + let append_req1 = acc_proto::AccumulatorRequest { + payload: Some(acc_proto::Payload { + keys: vec!["key1".into()], + value: b"msg_at_t10".to_vec(), + watermark: Some(ts_from_secs(base_time + 15)), // WM advances to t+15 + event_time: Some(ts_from_secs(base_time + 10)), // Event at t+10 + headers: Default::default(), + id: "msg2".to_string(), + }), + operation: Some(acc_proto::accumulator_request::WindowOperation { + event: acc_proto::accumulator_request::window_operation::Event::Append as i32, + keyed_window: Some(acc_proto::KeyedWindow { + start: Some(ts_from_secs(base_time)), + end: Some(ts_from_secs(base_time + 60)), + slot: "slot-0".to_string(), + keys: vec!["key1".into()], + }), + }), + }; + tx.send(append_req1).await.unwrap(); + + // 3. APPEND - message at t+20 with WM at t+25 (should flush t+20) + let append_req2 = acc_proto::AccumulatorRequest { + payload: Some(acc_proto::Payload { + keys: vec!["key1".into()], + value: b"msg_at_t20".to_vec(), + watermark: Some(ts_from_secs(base_time + 25)), // WM advances to t+25 + event_time: Some(ts_from_secs(base_time + 20)), // Event at t+20 + headers: Default::default(), + id: "msg3".to_string(), + }), + operation: Some(acc_proto::accumulator_request::WindowOperation { + event: acc_proto::accumulator_request::window_operation::Event::Append as i32, + keyed_window: Some(acc_proto::KeyedWindow { + start: Some(ts_from_secs(base_time)), + end: Some(ts_from_secs(base_time + 60)), + slot: "slot-0".to_string(), + keys: vec!["key1".into()], + }), + }), + }; + tx.send(append_req2).await.unwrap(); + + // 4. APPEND - message at t+40 with WM at t+45 (should flush t+30, t+40) + let append_req3 = acc_proto::AccumulatorRequest { + payload: Some(acc_proto::Payload { + keys: vec!["key1".into()], + value: b"msg_at_t40".to_vec(), + watermark: Some(ts_from_secs(base_time + 45)), // WM advances to t+45 + event_time: Some(ts_from_secs(base_time + 40)), // Event at t+40 + headers: Default::default(), + id: "msg4".to_string(), + }), + operation: Some(acc_proto::accumulator_request::WindowOperation { + event: acc_proto::accumulator_request::window_operation::Event::Append as i32, + keyed_window: Some(acc_proto::KeyedWindow { + start: Some(ts_from_secs(base_time)), + end: Some(ts_from_secs(base_time + 60)), + slot: "slot-0".to_string(), + keys: vec!["key1".into()], + }), + }), + }; + tx.send(append_req3).await.unwrap(); + + // 5. CLOSE operation - close the window + let close_req = acc_proto::AccumulatorRequest { + payload: None, + operation: Some(acc_proto::accumulator_request::WindowOperation { + event: acc_proto::accumulator_request::window_operation::Event::Close as i32, + keyed_window: Some(acc_proto::KeyedWindow { + start: Some(ts_from_secs(base_time)), + end: Some(ts_from_secs(base_time + 60)), + slot: "slot-0".to_string(), + keys: vec!["key1".into()], + }), + }), + }; + tx.send(close_req).await.unwrap(); + } + + // Drop sender to signal end-of-stream + drop(tx); + + // Start the RPC using a Request-wrapped stream + let request = Request::new(ReceiverStream::new(rx)); + let mut resp = client.accumulate_fn(request).await?.into_inner(); + + // Read responses and verify they are in sorted order + let mut messages = Vec::new(); + let mut found_eof = false; + + while let Some(r) = resp.message().await? { + if let Some(payload) = r.payload { + let value = String::from_utf8(payload.value.clone()) + .expect("Payload value should be valid UTF-8"); + println!("Received message: {}", value); + messages.push(value); + } + + if r.eof { + println!("Received EOF"); + found_eof = true; + } + } + + // Verify we got all 4 messages in sorted order + assert_eq!(messages.len(), 4, "Expected 4 messages"); + assert_eq!(messages[0], "msg_at_t10", "First message should be t+10"); + assert_eq!(messages[1], "msg_at_t20", "Second message should be t+20"); + assert_eq!(messages[2], "msg_at_t30", "Third message should be t+30"); + assert_eq!(messages[3], "msg_at_t40", "Fourth message should be t+40"); + assert!(found_eof, "Should have received EOF"); + + println!("All messages received in correct sorted order!"); + + Ok(()) +} diff --git a/packages/pynumaflow-lite/tests/bin/batchmap.rs b/packages/pynumaflow-lite/tests/bin/batchmap.rs new file mode 100644 index 00000000..aa325aaf --- /dev/null +++ b/packages/pynumaflow-lite/tests/bin/batchmap.rs @@ -0,0 +1,125 @@ +use std::collections::HashMap; +use std::env; +use std::path::PathBuf; + +use numaflow::proto; +use numaflow::proto::map::map_client::MapClient; +use tokio::net::UnixStream; +use tokio::sync::mpsc; +use tokio_stream::wrappers::ReceiverStream; +use tonic::transport::Uri; +use tower::service_fn; + +#[tokio::main] +async fn main() -> Result<(), Box> { + // Allow overriding the socket path via first CLI arg or env var. + let sock_path = env::args() + .nth(1) + .or_else(|| env::var("NUMAFLOW_BATCHMAP_SOCK").ok()) + .unwrap_or_else(|| "/tmp/var/run/numaflow/batchmap.sock".to_string()); + + // Set up tonic channel over Unix Domain Socket. + let channel = tonic::transport::Endpoint::try_from("http://[::]:50051")? + .connect_with_connector(service_fn(move |_: Uri| { + let sock = PathBuf::from(sock_path.clone()); + async move { + Ok::<_, std::io::Error>(hyper_util::rt::TokioIo::new( + UnixStream::connect(sock).await?, + )) + } + })) + .await?; + + let mut client = MapClient::new(channel); + + let (tx, rx) = mpsc::channel(16); + + // Handshake to initialize the stream + let handshake_request = proto::map::MapRequest { + request: None, + id: "".to_string(), + handshake: Some(proto::map::Handshake { sot: true }), + status: None, + }; + tx.send(handshake_request).await.unwrap(); + + let resp = client.map_fn(ReceiverStream::new(rx)).await.unwrap(); + let mut resp = resp.into_inner(); + + // Expect handshake response from server + let handshake_response = resp.message().await.unwrap(); + assert!(handshake_response.is_some()); + let handshake_response = handshake_response.unwrap(); + assert!(handshake_response.handshake.is_some()); + + // Build three requests with IDs + let mk_req = |id: &str, keys: Vec<&str>, value: &str| -> proto::map::MapRequest { + proto::map::MapRequest { + request: Some(proto::map::map_request::Request { + keys: keys.into_iter().map(|s| s.to_string()).collect(), + value: value.as_bytes().to_vec(), + watermark: Some(prost_types::Timestamp::default()), + event_time: Some(prost_types::Timestamp::default()), + headers: HashMap::new(), + }), + id: id.to_string(), + handshake: None, + status: None, + } + }; + + let req1 = mk_req("id-1", vec!["k1"], "hello-1"); + let req2 = mk_req("id-2", vec!["k2"], "hello-2"); + let req3 = mk_req("id-3", vec!["k3"], "hello-3"); + + // Sender must live long enough; keep a clone for clarity + let tx_ref = tx; + tx_ref.send(req1).await.unwrap(); + tx_ref.send(req2).await.unwrap(); + tx_ref.send(req3).await.unwrap(); + + // Send End-Of-Batch marker via status + // This uses the proto Status message to indicate an EOT for the batch. + let eot = numaflow::proto::map::TransmissionStatus { eot: true }; + let eot_req = proto::map::MapRequest { + request: None, + id: "eot".to_string(), + handshake: None, + status: Some(eot), + }; + tx_ref.send(eot_req).await.unwrap(); + + // Collect exactly 3 responses, one per id + use std::collections::BTreeMap; + let mut got: BTreeMap> = BTreeMap::new(); + + while got.len() < 3 { + let maybe = resp.message().await.unwrap(); + assert!(maybe.is_some()); + let r = maybe.unwrap(); + // MapResponse is expected to include an id and results list. + // We'll take the first result's value. + let value = r + .results + .get(0) + .map(|res| res.value.clone()) + .unwrap_or_default(); + let id = r.id.clone(); + got.insert(id, value); + } + + assert_eq!( + got.get("id-1").map(|v| v.as_slice()), + Some("hello-1".as_bytes()) + ); + assert_eq!( + got.get("id-2").map(|v| v.as_slice()), + Some("hello-2".as_bytes()) + ); + assert_eq!( + got.get("id-3").map(|v| v.as_slice()), + Some("hello-3".as_bytes()) + ); + + Ok(()) +} diff --git a/packages/pynumaflow-lite/tests/bin/map.rs b/packages/pynumaflow-lite/tests/bin/map.rs new file mode 100644 index 00000000..42748670 --- /dev/null +++ b/packages/pynumaflow-lite/tests/bin/map.rs @@ -0,0 +1,137 @@ +use std::env; +use std::path::PathBuf; + +use numaflow::proto; +use numaflow::proto::map::map_client::MapClient; +use tokio::net::UnixStream; +use tokio::sync::mpsc; +use tokio_stream::wrappers::ReceiverStream; +use tonic::transport::Uri; +use tower::service_fn; + +// Simple Rust client binary that exercises the Map server over Unix Domain Socket. +// This test is a complex one. To start it, first manually run the python code and then +// run this test. +// You can run the python code with: +// maturin develop && python simple_cat.py +// This won't exit, so kill it after running the tests +#[tokio::main] +async fn main() -> Result<(), Box> { + // Allow overriding the socket path via first CLI arg or env var. + let sock_path = env::args() + .nth(1) + .or_else(|| env::var("NUMAFLOW_MAP_SOCK").ok()) + .unwrap_or_else(|| "/tmp/var/run/numaflow/map.sock".to_string()); + + // Set up tonic channel over Unix Domain Socket. + let channel = tonic::transport::Endpoint::try_from("http://[::]:50051")? + .connect_with_connector(service_fn(move |_: Uri| { + let sock = PathBuf::from(sock_path.clone()); + async move { + Ok::<_, std::io::Error>(hyper_util::rt::TokioIo::new( + UnixStream::connect(sock).await?, + )) + } + })) + .await?; + + let mut client = MapClient::new(channel); + + let (tx, rx) = mpsc::channel(8); + + // Handshake + let handshake_request = proto::map::MapRequest { + request: None, + id: "".to_string(), + handshake: Some(proto::map::Handshake { sot: true }), + status: None, + }; + tx.send(handshake_request).await.unwrap(); + + let resp = client.map_fn(ReceiverStream::new(rx)).await.unwrap(); + let mut resp = resp.into_inner(); + + let handshake_response = resp.message().await.unwrap(); + assert!(handshake_response.is_some()); + let handshake_response = handshake_response.unwrap(); + assert!(handshake_response.handshake.is_some()); + + // Request 1 + let request_1 = proto::map::MapRequest { + request: Some(proto::map::map_request::Request { + keys: vec!["first".into(), "second".into()], + value: "hello".into(), + watermark: Some(prost_types::Timestamp::default()), + event_time: Some(prost_types::Timestamp::default()), + headers: Default::default(), + }), + id: "".to_string(), + handshake: None, + status: None, + }; + // use a fresh channel tx for requests after map_fn is created + // but tonic's client holds the stream created above; we keep using tx + // created pre-call. So just continue sending on tx via clone. + // (We still hold `tx` by move; creating clone in case of future change.) + + // Request stream sender recovered via a channel before map_fn invocation. + // The tx is still in scope here; send requests. + // Note: if you change channel creation, ensure tx lives long enough. + + // We must keep `tx` alive until all messages are sent. + let tx_ref = tx; + tx_ref.send(request_1).await.unwrap(); + + let actual_response = resp.message().await.unwrap(); + assert!(actual_response.is_some()); + let r = actual_response.unwrap(); + let msg = &r.results[0]; + assert_eq!(msg.keys.first(), Some(&"first".to_owned())); + assert_eq!(msg.value, "hello".as_bytes()); + + // Request 2 + let request_2 = proto::map::MapRequest { + request: Some(proto::map::map_request::Request { + keys: vec!["third".into(), "fourth".into()], + value: "world".into(), + watermark: Some(prost_types::Timestamp::default()), + event_time: Some(prost_types::Timestamp::default()), + headers: Default::default(), + }), + id: "".to_string(), + handshake: None, + status: None, + }; + tx_ref.send(request_2).await.unwrap(); + + let actual_response = resp.message().await.unwrap(); + assert!(actual_response.is_some()); + let msg = &actual_response.unwrap().results[0]; + assert_eq!(msg.keys.first(), Some(&"third".to_owned())); + assert_eq!(msg.value, "world".as_bytes()); + + // Request 3 (drop) + let request_3 = proto::map::MapRequest { + request: Some(proto::map::map_request::Request { + keys: vec!["third".into(), "fourth".into()], + value: "bad world".into(), + watermark: Some(prost_types::Timestamp::default()), + event_time: Some(prost_types::Timestamp::default()), + headers: Default::default(), + }), + id: "".to_string(), + handshake: None, + status: None, + }; + tx_ref.send(request_3).await.unwrap(); + + let actual_response = resp.message().await.unwrap(); + assert!(actual_response.is_some()); + let msg = &actual_response.unwrap().results[0]; + assert_eq!(msg.tags, vec![numaflow::shared::DROP.to_string()]); + + // close request stream + drop(tx_ref); + + Ok(()) +} diff --git a/packages/pynumaflow-lite/tests/bin/mapstream.rs b/packages/pynumaflow-lite/tests/bin/mapstream.rs new file mode 100644 index 00000000..981e2d70 --- /dev/null +++ b/packages/pynumaflow-lite/tests/bin/mapstream.rs @@ -0,0 +1,90 @@ +use std::env; +use std::path::PathBuf; + +use numaflow::proto; +use numaflow::proto::map::map_client::MapClient; +use tokio::net::UnixStream; +use tokio::sync::mpsc; +use tokio_stream::wrappers::ReceiverStream; +use tonic::transport::Uri; +use tower::service_fn; + +// Rust client binary to exercise the MapStream server (streaming outputs). +// It connects over Unix Domain Socket to the mapstream server, sends a single +// request with comma-separated values, and asserts it receives multiple +// MapResponse messages, each containing one split as the value. +#[tokio::main] +async fn main() -> Result<(), Box> { + // Allow overriding the socket path via first CLI arg or env var. + let sock_path = env::args() + .nth(1) + .or_else(|| env::var("NUMAFLOW_MAPSTREAM_SOCK").ok()) + .unwrap_or_else(|| "/tmp/var/run/numaflow/mapstream.sock".to_string()); + + // Set up tonic channel over Unix Domain Socket. + let channel = tonic::transport::Endpoint::try_from("http://[::]:50051")? + .connect_with_connector(service_fn(move |_: Uri| { + let sock = PathBuf::from(sock_path.clone()); + async move { + Ok::<_, std::io::Error>(hyper_util::rt::TokioIo::new( + UnixStream::connect(sock).await?, + )) + } + })) + .await?; + + let mut client = MapClient::new(channel); + + // Build one request with comma-separated payload + let request = proto::map::MapRequest { + request: Some(proto::map::map_request::Request { + keys: vec!["k".into()], + value: "a,b,c".as_bytes().to_vec(), + watermark: Some(prost_types::Timestamp::default()), + event_time: Some(prost_types::Timestamp::default()), + headers: Default::default(), + }), + id: "".to_string(), + handshake: None, + status: None, + }; + + // Create request stream with handshake first + let (tx, rx) = mpsc::channel(8); + let handshake_request = proto::map::MapRequest { + request: None, + id: "".to_string(), + handshake: Some(proto::map::Handshake { sot: true }), + status: None, + }; + tx.send(handshake_request).await.unwrap(); + + // Start the RPC + let response_stream = client.map_fn(ReceiverStream::new(rx)).await.unwrap(); + let mut response_stream = response_stream.into_inner(); + + // After handshake, send the real request + let tx_ref = tx; + tx_ref.send(request).await.unwrap(); + + // Expect three MapResponse messages, each carrying one result value + let mut got: Vec> = Vec::new(); + while got.len() < 3 { + let maybe = response_stream.message().await.unwrap(); + assert!(maybe.is_some()); + let resp = maybe.unwrap(); + // Each MapResponse carries results; we take the first + if let Some(first) = resp.results.get(0) { + got.push(first.value.clone()); + } + } + + assert_eq!(got[0], b"a".to_vec()); + assert_eq!(got[1], b"b".to_vec()); + assert_eq!(got[2], b"c".to_vec()); + + // Close request stream + drop(tx_ref); + + Ok(()) +} diff --git a/packages/pynumaflow-lite/tests/bin/reduce.rs b/packages/pynumaflow-lite/tests/bin/reduce.rs new file mode 100644 index 00000000..9174da8a --- /dev/null +++ b/packages/pynumaflow-lite/tests/bin/reduce.rs @@ -0,0 +1,87 @@ +use std::env; +use std::path::PathBuf; +use std::time::{SystemTime, UNIX_EPOCH}; + +use tokio::net::UnixStream; +use tokio::sync::mpsc; +use tokio_stream::wrappers::ReceiverStream; +use tonic::{Request, transport::Uri}; +use tower::service_fn; + +fn now_ts() -> prost_types::Timestamp { + let now = SystemTime::now().duration_since(UNIX_EPOCH).unwrap(); + prost_types::Timestamp { + seconds: now.as_secs() as i64, + nanos: now.subsec_nanos() as i32, + } +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + // Default socket path with env/arg override + let sock_path = env::args() + .nth(1) + .or_else(|| env::var("NUMAFLOW_REDUCE_SOCK").ok()) + .unwrap_or_else(|| "/tmp/var/run/numaflow/reduce.sock".to_string()); + + // Connect over UDS + let channel = tonic::transport::Endpoint::try_from("http://[::]:50051")? + .connect_with_connector(service_fn(move |_: Uri| { + let sock = PathBuf::from(sock_path.clone()); + async move { + Ok::<_, std::io::Error>(hyper_util::rt::TokioIo::new( + UnixStream::connect(sock).await?, + )) + } + })) + .await?; + + // Reduce client from generated proto + let mut client = numaflow::proto::reduce::reduce_client::ReduceClient::new(channel); + + let (tx, rx) = mpsc::channel(16); + + // Build a single valid ReduceRequest that includes BOTH payload and window operation + { + use numaflow::proto::reduce as reduce_proto; + let rr = reduce_proto::ReduceRequest { + payload: Some(reduce_proto::reduce_request::Payload { + keys: vec!["k".into()], + value: b"1".to_vec(), + watermark: Some(now_ts()), + event_time: Some(now_ts()), + headers: Default::default(), + }), + operation: Some(reduce_proto::reduce_request::WindowOperation { + event: 0, // not used by server logic currently + windows: vec![reduce_proto::Window { + start: Some(now_ts()), + end: Some(now_ts()), + slot: "slot-0".to_string(), + }], + }), + }; + tx.send(rr).await.unwrap(); + } + + // Drop sender to signal end-of-stream (COB) + drop(tx); + + // Start the RPC using a Request-wrapped stream + let request = Request::new(ReceiverStream::new(rx)); + let mut resp = client.reduce_fn(request).await?.into_inner(); + + // Read responses until we see a result; server will also send an EOF after tasks close + loop { + if let Some(r) = resp.message().await? { + if let Some(res) = r.result { + assert!(!res.value.is_empty()); + break; + } + } else { + panic!("Stream ended without a result"); + } + } + + Ok(()) +} diff --git a/packages/pynumaflow-lite/tests/bin/session_reduce.rs b/packages/pynumaflow-lite/tests/bin/session_reduce.rs new file mode 100644 index 00000000..bdaf58cb --- /dev/null +++ b/packages/pynumaflow-lite/tests/bin/session_reduce.rs @@ -0,0 +1,174 @@ +use std::env; +use std::path::PathBuf; +use std::time::{SystemTime, UNIX_EPOCH}; + +use tokio::net::UnixStream; +use tokio::sync::mpsc; +use tokio_stream::wrappers::ReceiverStream; +use tonic::{Request, transport::Uri}; +use tower::service_fn; + +fn now_ts() -> prost_types::Timestamp { + let now = SystemTime::now().duration_since(UNIX_EPOCH).unwrap(); + prost_types::Timestamp { + seconds: now.as_secs() as i64, + nanos: now.subsec_nanos() as i32, + } +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + // Default socket path with env/arg override + let sock_path = env::args() + .nth(1) + .or_else(|| env::var("NUMAFLOW_SESSION_REDUCE_SOCK").ok()) + .unwrap_or_else(|| "/tmp/var/run/numaflow/sessionreduce.sock".to_string()); + + // Connect over UDS + let channel = tonic::transport::Endpoint::try_from("http://[::]:50051")? + .connect_with_connector(service_fn(move |_: Uri| { + let sock = PathBuf::from(sock_path.clone()); + async move { + Ok::<_, std::io::Error>(hyper_util::rt::TokioIo::new( + UnixStream::connect(sock).await?, + )) + } + })) + .await?; + + // SessionReduce client from generated proto + let mut client = + numaflow::proto::session_reduce::session_reduce_client::SessionReduceClient::new(channel); + + let (tx, rx) = mpsc::channel(16); + + // Test scenario: Create a session window, append some data, then close it + { + use numaflow::proto::session_reduce as sr_proto; + + // 1. OPEN operation - create a new session window with first message + let open_req = sr_proto::SessionReduceRequest { + payload: Some(sr_proto::session_reduce_request::Payload { + keys: vec!["key1".into()], + value: b"1".to_vec(), + watermark: Some(now_ts()), + event_time: Some(now_ts()), + headers: Default::default(), + }), + operation: Some(sr_proto::session_reduce_request::WindowOperation { + event: sr_proto::session_reduce_request::window_operation::Event::Open as i32, + keyed_windows: vec![sr_proto::KeyedWindow { + start: Some(now_ts()), + end: Some(prost_types::Timestamp { + seconds: now_ts().seconds + 60, + nanos: 0, + }), + slot: "slot-0".to_string(), + keys: vec!["key1".into()], + }], + }), + }; + tx.send(open_req).await.unwrap(); + + // 2. APPEND operation - add more data to the same window + let append_req = sr_proto::SessionReduceRequest { + payload: Some(sr_proto::session_reduce_request::Payload { + keys: vec!["key1".into()], + value: b"2".to_vec(), + watermark: Some(now_ts()), + event_time: Some(now_ts()), + headers: Default::default(), + }), + operation: Some(sr_proto::session_reduce_request::WindowOperation { + event: sr_proto::session_reduce_request::window_operation::Event::Append as i32, + keyed_windows: vec![sr_proto::KeyedWindow { + start: Some(now_ts()), + end: Some(prost_types::Timestamp { + seconds: now_ts().seconds + 60, + nanos: 0, + }), + slot: "slot-0".to_string(), + keys: vec!["key1".into()], + }], + }), + }; + tx.send(append_req).await.unwrap(); + + // 3. APPEND another message + let append_req2 = sr_proto::SessionReduceRequest { + payload: Some(sr_proto::session_reduce_request::Payload { + keys: vec!["key1".into()], + value: b"3".to_vec(), + watermark: Some(now_ts()), + event_time: Some(now_ts()), + headers: Default::default(), + }), + operation: Some(sr_proto::session_reduce_request::WindowOperation { + event: sr_proto::session_reduce_request::window_operation::Event::Append as i32, + keyed_windows: vec![sr_proto::KeyedWindow { + start: Some(now_ts()), + end: Some(prost_types::Timestamp { + seconds: now_ts().seconds + 60, + nanos: 0, + }), + slot: "slot-0".to_string(), + keys: vec!["key1".into()], + }], + }), + }; + tx.send(append_req2).await.unwrap(); + + // 4. CLOSE operation - close the window + let close_req = sr_proto::SessionReduceRequest { + payload: None, + operation: Some(sr_proto::session_reduce_request::WindowOperation { + event: sr_proto::session_reduce_request::window_operation::Event::Close as i32, + keyed_windows: vec![sr_proto::KeyedWindow { + start: Some(now_ts()), + end: Some(prost_types::Timestamp { + seconds: now_ts().seconds + 60, + nanos: 0, + }), + slot: "slot-0".to_string(), + keys: vec!["key1".into()], + }], + }), + }; + tx.send(close_req).await.unwrap(); + } + + // Drop sender to signal end-of-stream + drop(tx); + + // Start the RPC using a Request-wrapped stream + let request = Request::new(ReceiverStream::new(rx)); + let mut resp = client.session_reduce_fn(request).await?.into_inner(); + + // Read responses until we see a result and EOF + let mut found_result = false; + let mut found_eof = false; + + while let Some(r) = resp.message().await? { + if let Some(res) = r.result { + // We should get a count of 3 (three messages appended) + let count_str = + String::from_utf8(res.value.clone()).expect("Result value should be valid UTF-8"); + let count: i32 = count_str.parse().expect("Result should be a number"); + + println!("Received result: count={}", count); + assert_eq!(count, 3, "Expected count of 3 messages"); + assert_eq!(res.keys, vec!["key1"], "Expected keys to match"); + found_result = true; + } + + if r.eof { + println!("Received EOF"); + found_eof = true; + } + } + + assert!(found_result, "Should have received a result"); + assert!(found_eof, "Should have received EOF"); + + Ok(()) +} diff --git a/packages/pynumaflow-lite/tests/examples/accumulator_stream_sorter.py b/packages/pynumaflow-lite/tests/examples/accumulator_stream_sorter.py new file mode 100644 index 00000000..484cb371 --- /dev/null +++ b/packages/pynumaflow-lite/tests/examples/accumulator_stream_sorter.py @@ -0,0 +1,135 @@ +""" +Stream sorter accumulator example. + +This accumulator buffers incoming data and sorts it by event time, +flushing sorted data when the watermark advances. +""" +import asyncio +from datetime import datetime +from typing import AsyncIterator + +from pynumaflow_lite.accumulator import Datum, Message, AccumulatorAsyncServer, Accumulator + + +class StreamSorter(Accumulator): + """ + A stream sorter that buffers and sorts data by event time, + flushing when watermark advances. + """ + + def __init__(self): + from datetime import timezone + # Initialize with a very old timestamp (timezone-aware) + self.latest_wm = datetime.fromtimestamp(-1, tz=timezone.utc) + self.sorted_buffer: list[Datum] = [] + print("StreamSorter initialized") + + async def handler(self, datums: AsyncIterator[Datum]) -> AsyncIterator[Message]: + """ + Buffer and sort datums, yielding sorted messages when watermark advances. + """ + print("Handler started, waiting for datums...") + datum_count = 0 + + async for datum in datums: + datum_count += 1 + print(f"Received datum #{datum_count}: event_time={datum.event_time}, " + f"watermark={datum.watermark}, value={datum.value}") + + # If watermark has moved forward + if datum.watermark and datum.watermark > self.latest_wm: + old_wm = self.latest_wm + self.latest_wm = datum.watermark + print(f"Watermark advanced from {old_wm} to {self.latest_wm}") + + # Flush buffer + flushed = 0 + async for msg in self.flush_buffer(): + flushed += 1 + yield msg + + if flushed > 0: + print(f"Flushed {flushed} messages from buffer") + + # Insert into sorted buffer + self.insert_sorted(datum) + print(f"Buffer size: {len(self.sorted_buffer)}") + + print(f"Handler finished. Total datums processed: {datum_count}") + print(f"Remaining in buffer: {len(self.sorted_buffer)}") + + # Flush any remaining items in the buffer at the end + if self.sorted_buffer: + print("Flushing remaining buffer at end...") + for datum in self.sorted_buffer: + print(f" Flushing: event_time={datum.event_time}, value={datum.value}") + # Use Message.from_datum to preserve all metadata + yield Message.from_datum(datum) + self.sorted_buffer = [] + + def insert_sorted(self, datum: Datum): + """Binary insert to keep sorted buffer in order by event_time.""" + left, right = 0, len(self.sorted_buffer) + while left < right: + mid = (left + right) // 2 + if self.sorted_buffer[mid].event_time > datum.event_time: + right = mid + else: + left = mid + 1 + self.sorted_buffer.insert(left, datum) + + async def flush_buffer(self) -> AsyncIterator[Message]: + """Flush all items from buffer that are before or at the watermark.""" + i = 0 + for datum in self.sorted_buffer: + if datum.event_time > self.latest_wm: + break + print(f" Flushing: event_time={datum.event_time}, value={datum.value}") + # Use Message.from_datum to preserve all metadata (id, headers, event_time, watermark) + yield Message.from_datum(datum) + i += 1 + + # Remove flushed items + self.sorted_buffer = self.sorted_buffer[i:] + + +async def main(): + """ + Start the accumulator server. + """ + import signal + + sock_file = "/tmp/var/run/numaflow/accumulator.sock" + server_info_file = "/tmp/var/run/numaflow/accumulator-server-info" + server = AccumulatorAsyncServer(sock_file, server_info_file) + + # Set up signal handlers for graceful shutdown + loop = asyncio.get_running_loop() + try: + loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) + loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) + except (NotImplementedError, RuntimeError): + pass + + try: + print("Starting Stream Sorter Accumulator Server...") + await server.start(StreamSorter) + print("Shutting down gracefully...") + except asyncio.CancelledError: + try: + server.stop() + except Exception: + pass + return + + +# Optional: ensure default signal handlers are in place so asyncio.run can handle them cleanly. +import signal +signal.signal(signal.SIGINT, signal.default_int_handler) +try: + signal.signal(signal.SIGTERM, signal.SIG_DFL) +except AttributeError: + pass + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/packages/pynumaflow-lite/tests/examples/batchmap_cat.py b/packages/pynumaflow-lite/tests/examples/batchmap_cat.py new file mode 100644 index 00000000..388926d7 --- /dev/null +++ b/packages/pynumaflow-lite/tests/examples/batchmap_cat.py @@ -0,0 +1,47 @@ +import asyncio +import collections.abc + +import signal + +from pynumaflow_lite import batchmapper + + +async def async_handler(batch: collections.abc.AsyncIterator[batchmapper.Datum]) -> batchmapper.BatchResponses: + responses = batchmapper.BatchResponses() + async for d in batch: + resp = batchmapper.BatchResponse.from_id(d.id) + if d.value == b"bad world": + resp.append(batchmapper.Message.message_to_drop()) + continue + + resp.append(batchmapper.Message(d.value, d.keys)) + responses.append(resp) + return responses + + +async def start(f: callable): + sock_file = "/tmp/var/run/numaflow/batchmap.sock" + server_info_file = "/tmp/var/run/numaflow/mapper-server-info" + server = batchmapper.BatchMapAsyncServer(sock_file, server_info_file) + + # Register loop-level signal handlers to request graceful shutdown + loop = asyncio.get_running_loop() + try: + loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) + loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) + except (NotImplementedError, RuntimeError): + pass + + try: + await server.start(f) + print("Shutting down gracefully...") + except asyncio.CancelledError: + try: + server.stop() + except Exception: + pass + return + + +if __name__ == "__main__": + asyncio.run(start(async_handler)) diff --git a/packages/pynumaflow-lite/tests/examples/batchmap_cat_class.py b/packages/pynumaflow-lite/tests/examples/batchmap_cat_class.py new file mode 100644 index 00000000..245ebe1e --- /dev/null +++ b/packages/pynumaflow-lite/tests/examples/batchmap_cat_class.py @@ -0,0 +1,57 @@ +import asyncio +import signal +from collections.abc import AsyncIterator + +from pynumaflow_lite import batchmapper +from pynumaflow_lite.batchmapper import Message + + +class SimpleBatchCat(batchmapper.BatchMapper): + async def handler(self, batch: AsyncIterator[batchmapper.Datum]) -> batchmapper.BatchResponses: + responses = batchmapper.BatchResponses() + async for d in batch: + resp = batchmapper.BatchResponse(d.id) + if d.value == b"bad world": + resp.append(Message.message_to_drop()) + continue + + resp.append(Message(d.value, d.keys)) + responses.append(resp) + return responses + + +# Optional: ensure default signal handlers are in place so asyncio.run can handle them cleanly. +signal.signal(signal.SIGINT, signal.default_int_handler) +try: + signal.signal(signal.SIGTERM, signal.SIG_DFL) +except AttributeError: + pass + + +async def start(f: callable): + sock_file = "/tmp/var/run/numaflow/batchmap.sock" + server_info_file = "/tmp/var/run/numaflow/mapper-server-info" + server = batchmapper.BatchMapAsyncServer(sock_file, server_info_file) + + # Register loop-level signal handlers so we control shutdown and avoid asyncio.run + loop = asyncio.get_running_loop() + try: + loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) + loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) + except (NotImplementedError, RuntimeError): + pass + + try: + await server.start(f) + print("Shutting down gracefully...") + except asyncio.CancelledError: + try: + server.stop() + except Exception: + pass + return + + +if __name__ == "__main__": + async_handler = SimpleBatchCat() + asyncio.run(start(async_handler)) diff --git a/packages/pynumaflow-lite/tests/examples/map_cat.py b/packages/pynumaflow-lite/tests/examples/map_cat.py new file mode 100644 index 00000000..4d4463a3 --- /dev/null +++ b/packages/pynumaflow-lite/tests/examples/map_cat.py @@ -0,0 +1,45 @@ +import asyncio +import signal + +from pynumaflow_lite import mapper + + +async def async_handler( + keys: list[str], payload: mapper.Datum +) -> mapper.Messages: + messages = mapper.Messages() + + if payload.value == b"bad world": + messages.append(mapper.Message.message_to_drop()) + else: + messages.append(mapper.Message(payload.value, keys)) + + return messages + + +async def start(f: callable): + sock_file = "/tmp/var/run/numaflow/map.sock" + server_info_file = "/tmp/var/run/numaflow/mapper-server-info" + server = mapper.MapAsyncServer(sock_file, server_info_file) + + # Register loop-level signal handlers to request graceful shutdown + loop = asyncio.get_running_loop() + try: + loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) + loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) + except (NotImplementedError, RuntimeError): + pass + + try: + await server.start(f) + print("Shutting down gracefully...") + except asyncio.CancelledError: + try: + server.stop() + except Exception: + pass + return + + +if __name__ == "__main__": + asyncio.run(start(async_handler)) diff --git a/packages/pynumaflow-lite/tests/examples/map_cat_class.py b/packages/pynumaflow-lite/tests/examples/map_cat_class.py new file mode 100644 index 00000000..2d96e086 --- /dev/null +++ b/packages/pynumaflow-lite/tests/examples/map_cat_class.py @@ -0,0 +1,61 @@ +import asyncio +import signal +from pynumaflow_lite import mapper + + +class SimpleCat(mapper.Mapper): + async def handler( + self, keys: list[str], payload: mapper.Datum + ) -> mapper.Messages: + + messages = mapper.Messages() + + if payload.value == b"bad world": + messages.append(mapper.Message.message_to_drop()) + else: + messages.append(mapper.Message(payload.value, keys)) + + return messages + + +# Optional: ensure default signal handlers are in place so asyncio.run can handle them cleanly. +signal.signal(signal.SIGINT, signal.default_int_handler) +try: + signal.signal(signal.SIGTERM, signal.SIG_DFL) +except AttributeError: + pass + + +async def start(f: callable): + sock_file = "/tmp/var/run/numaflow/map.sock" + server_info_file = "/tmp/var/run/numaflow/mapper-server-info" + server = mapper.MapAsyncServer(sock_file, server_info_file) + + # Register loop-level signal handlers so we control shutdown and avoid asyncio.run + # converting it into KeyboardInterrupt/CancelledError traces. + loop = asyncio.get_running_loop() + loop.set_debug(True) + print("Registering signal handlers", loop) + try: + loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) + loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) + except (NotImplementedError, RuntimeError): + print("Failed to register signal handlers") + # add_signal_handler may not be available on some platforms/contexts; fallback below. + pass + + try: + await server.start(f) + print("Shutting down gracefully...") + except asyncio.CancelledError: + # Fallback in case the task was cancelled by the runner + try: + server.stop() + except Exception: + pass + return + + +if __name__ == "__main__": + async_handler = SimpleCat() + asyncio.run(start(async_handler)) diff --git a/packages/pynumaflow-lite/tests/examples/mapstream_cat.py b/packages/pynumaflow-lite/tests/examples/mapstream_cat.py new file mode 100644 index 00000000..ef6ddce6 --- /dev/null +++ b/packages/pynumaflow-lite/tests/examples/mapstream_cat.py @@ -0,0 +1,48 @@ +import asyncio +import signal +from collections.abc import AsyncIterator + +from pynumaflow_lite import mapstreamer +from pynumaflow_lite.mapstreamer import Message + + +async def async_handler(keys: list[str], datum: mapstreamer.Datum) -> AsyncIterator[Message]: + """ + A handler that splits the input datum value into multiple strings by `,` separator and + emits them as a stream. + """ + parts = datum.value.decode("utf-8").split(",") + if not parts: + yield Message.to_drop() + return + for s in parts: + yield Message(s.encode(), keys) + + +async def start(f: callable): + sock_file = "/tmp/var/run/numaflow/mapstream.sock" + server_info_file = "/tmp/var/run/numaflow/mapper-server-info" + server = mapstreamer.MapStreamAsyncServer(sock_file, server_info_file) + + # Register loop-level signal handlers to request graceful shutdown + loop = asyncio.get_running_loop() + try: + loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) + loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) + except (NotImplementedError, RuntimeError): + pass + + try: + await server.start(f) + print("Shutting down gracefully...") + except asyncio.CancelledError: + try: + server.stop() + except Exception: + pass + return + + +if __name__ == "__main__": + asyncio.run(start(async_handler)) + diff --git a/packages/pynumaflow-lite/tests/examples/mapstream_cat_class.py b/packages/pynumaflow-lite/tests/examples/mapstream_cat_class.py new file mode 100644 index 00000000..0929b522 --- /dev/null +++ b/packages/pynumaflow-lite/tests/examples/mapstream_cat_class.py @@ -0,0 +1,54 @@ +import asyncio +import signal +from collections.abc import AsyncIterator + +from pynumaflow_lite import mapstreamer +from pynumaflow_lite.mapstreamer import Message + + +class SimpleStreamCat(mapstreamer.MapStreamer): + async def handler(self, keys: list[str], datum: mapstreamer.Datum) -> AsyncIterator[Message]: + parts = datum.value.decode("utf-8").split(",") + if not parts: + yield Message.to_drop() + return + for s in parts: + yield Message(s.encode(), keys) + + +# Optional: ensure default signal handlers are in place so asyncio.run can handle them cleanly. +signal.signal(signal.SIGINT, signal.default_int_handler) +try: + signal.signal(signal.SIGTERM, signal.SIG_DFL) +except AttributeError: + pass + + +async def start(f: callable): + sock_file = "/tmp/var/run/numaflow/mapstream.sock" + server_info_file = "/tmp/var/run/numaflow/mapper-server-info" + server = mapstreamer.MapStreamAsyncServer(sock_file, server_info_file) + + # Register loop-level signal handlers so we control shutdown and avoid asyncio.run noise. + loop = asyncio.get_running_loop() + try: + loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) + loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) + except (NotImplementedError, RuntimeError): + pass + + try: + await server.start(f) + print("Shutting down gracefully...") + except asyncio.CancelledError: + try: + server.stop() + except Exception: + pass + return + + +if __name__ == "__main__": + async_handler = SimpleStreamCat() + asyncio.run(start(async_handler)) + diff --git a/packages/pynumaflow-lite/tests/examples/reduce_counter_class.py b/packages/pynumaflow-lite/tests/examples/reduce_counter_class.py new file mode 100644 index 00000000..020c49d9 --- /dev/null +++ b/packages/pynumaflow-lite/tests/examples/reduce_counter_class.py @@ -0,0 +1,60 @@ +import asyncio +import signal +from collections.abc import AsyncIterable + +from pynumaflow_lite import reducer + + +class ReduceCounter(reducer.Reducer): + def __init__(self, initial: int = 0) -> None: + self.counter = initial + + async def handler( + self, keys: list[str], datums: AsyncIterable[reducer.Datum], md: reducer.Metadata + ) -> reducer.Messages: + iw = md.interval_window + self.counter = 0 + async for _ in datums: + self.counter += 1 + msg = ( + f"counter:{self.counter} interval_window_start:{iw.start} interval_window_end:{iw.end}" + ).encode() + out = reducer.Messages() + out.append(reducer.Message(msg, keys)) + return out + + +# Optional: ensure default signal handlers are in place so asyncio.run can handle them cleanly. +signal.signal(signal.SIGINT, signal.default_int_handler) +try: + signal.signal(signal.SIGTERM, signal.SIG_DFL) +except AttributeError: + pass + + +async def start(creator: type, init_args: tuple): + sock_file = "/tmp/var/run/numaflow/reduce.sock" + server_info_file = "/tmp/var/run/numaflow/reducer-server-info" + server = reducer.ReduceAsyncServer(sock_file, server_info_file) + + loop = asyncio.get_running_loop() + try: + loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) + loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) + except (NotImplementedError, RuntimeError): + pass + + try: + await server.start(creator, init_args) + print("Shutting down gracefully...") + except asyncio.CancelledError: + try: + server.stop() + except Exception: + pass + return + + +if __name__ == "__main__": + asyncio.run(start(ReduceCounter, (0,))) + diff --git a/packages/pynumaflow-lite/tests/examples/reduce_counter_func.py b/packages/pynumaflow-lite/tests/examples/reduce_counter_func.py new file mode 100644 index 00000000..f893c965 --- /dev/null +++ b/packages/pynumaflow-lite/tests/examples/reduce_counter_func.py @@ -0,0 +1,56 @@ +import asyncio +import signal +from collections.abc import AsyncIterable + +from pynumaflow_lite import reducer + + +async def reduce_handler( + keys: list[str], datums: AsyncIterable[reducer.Datum], md: reducer.Metadata +) -> reducer.Messages: + interval_window = md.interval_window + counter = 0 + async for _ in datums: + counter += 1 + msg = ( + f"counter:{counter} interval_window_start:{interval_window.start} " + f"interval_window_end:{interval_window.end}" + ) + out = reducer.Messages() + out.append(reducer.Message(str.encode(msg), keys=keys)) + return out + + +# Optional: ensure default signal handlers are in place so asyncio.run can handle them cleanly. +signal.signal(signal.SIGINT, signal.default_int_handler) +try: + signal.signal(signal.SIGTERM, signal.SIG_DFL) +except AttributeError: + pass + + +async def start(handler: callable): + sock_file = "/tmp/var/run/numaflow/reduce.sock" + server_info_file = "/tmp/var/run/numaflow/reducer-server-info" + server = reducer.ReduceAsyncServer(sock_file, server_info_file) + + loop = asyncio.get_running_loop() + try: + loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) + loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) + except (NotImplementedError, RuntimeError): + pass + + try: + await server.start(handler) + print("Shutting down gracefully...") + except asyncio.CancelledError: + try: + server.stop() + except Exception: + pass + return + + +if __name__ == "__main__": + asyncio.run(start(reduce_handler)) diff --git a/packages/pynumaflow-lite/tests/examples/session_reduce_counter_class.py b/packages/pynumaflow-lite/tests/examples/session_reduce_counter_class.py new file mode 100644 index 00000000..003c52a7 --- /dev/null +++ b/packages/pynumaflow-lite/tests/examples/session_reduce_counter_class.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 +""" +Example session reduce handler that counts messages per session. + +This demonstrates: +- session_reduce: counts incoming messages and yields the count +- accumulator: returns current count as bytes +- merge_accumulator: merges counts from another session +""" + +import asyncio +import signal +from collections.abc import AsyncIterable, AsyncIterator + +from pynumaflow_lite import session_reducer + + +class SessionReduceCounter(session_reducer.SessionReducer): + """ + A session reducer that counts all messages in a session. + When sessions are merged, the counts are added together. + """ + + def __init__(self, initial: int = 0) -> None: + self.counter = initial + + async def session_reduce( + self, keys: list[str], datums: AsyncIterable[session_reducer.Datum] + ) -> AsyncIterator[session_reducer.Message]: + """ + Count all incoming messages in this session and yield the count. + """ + # Count all incoming messages in this session + async for _ in datums: + self.counter += 1 + + # Emit the current count as bytes with the same keys + yield session_reducer.Message(str(self.counter).encode(), keys) + + async def accumulator(self) -> bytes: + """ + Return current count as bytes for merging with other sessions. + """ + return str(self.counter).encode() + + async def merge_accumulator(self, accumulator: bytes) -> None: + """ + Parse serialized accumulator and add to our count. + """ + try: + self.counter += int(accumulator.decode("utf-8"), 10) + except Exception as e: + import sys + + print(f"Failed to parse accumulator value: {e}", file=sys.stderr) + + +async def main(): + """ + Start the session reduce server. + """ + sock_file = "/tmp/var/run/numaflow/sessionreduce.sock" + server_info_file = "/tmp/var/run/numaflow/sessionreducer-server-info" + server = session_reducer.SessionReduceAsyncServer(sock_file, server_info_file) + + # Set up signal handlers for graceful shutdown + loop = asyncio.get_running_loop() + try: + loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) + loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) + except (NotImplementedError, RuntimeError): + pass + + try: + await server.start(SessionReduceCounter) + print("Shutting down gracefully...") + except asyncio.CancelledError: + try: + server.stop() + except Exception: + pass + return + + +# Optional: ensure default signal handlers are in place so asyncio.run can handle them cleanly. +signal.signal(signal.SIGINT, signal.default_int_handler) +try: + signal.signal(signal.SIGTERM, signal.SIG_DFL) +except AttributeError: + pass + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/packages/pynumaflow-lite/tests/test_accumulator.py b/packages/pynumaflow-lite/tests/test_accumulator.py new file mode 100644 index 00000000..eeaa9a5b --- /dev/null +++ b/packages/pynumaflow-lite/tests/test_accumulator.py @@ -0,0 +1,23 @@ +from pathlib import Path + +import pytest + +from _test_utils import run_python_server_with_rust_client + +SOCK_PATH = Path("/tmp/var/run/numaflow/accumulator.sock") +SERVER_INFO = Path("/tmp/var/run/numaflow/accumulator-server-info") + +SCRIPTS = [ + "accumulator_stream_sorter.py", +] + + +@pytest.mark.parametrize("script", SCRIPTS) +def test_python_accumulator_server_and_rust_client(script: str, tmp_path: Path): + run_python_server_with_rust_client( + script=script, + sock_path=SOCK_PATH, + server_info_path=SERVER_INFO, + rust_bin_name="test_accumulator", + rust_bin_args=[str(SOCK_PATH)], + ) diff --git a/packages/pynumaflow-lite/tests/test_batchmap.py b/packages/pynumaflow-lite/tests/test_batchmap.py new file mode 100644 index 00000000..36201d1c --- /dev/null +++ b/packages/pynumaflow-lite/tests/test_batchmap.py @@ -0,0 +1,23 @@ +from pathlib import Path + +import pytest + +from _test_utils import run_python_server_with_rust_client + +SOCK_PATH = Path("/tmp/var/run/numaflow/batchmap.sock") +SERVER_INFO = Path("/tmp/var/run/numaflow/mapper-server-info") + +SCRIPTS = [ + "batchmap_cat.py", + "batchmap_cat_class.py", +] + + +@pytest.mark.parametrize("script", SCRIPTS) +def test_python_batch_server_and_rust_client(script: str, tmp_path: Path): + run_python_server_with_rust_client( + script=script, + sock_path=SOCK_PATH, + server_info_path=SERVER_INFO, + rust_bin_name="test_batchmap", + ) diff --git a/packages/pynumaflow-lite/tests/test_map.py b/packages/pynumaflow-lite/tests/test_map.py new file mode 100644 index 00000000..869f3f49 --- /dev/null +++ b/packages/pynumaflow-lite/tests/test_map.py @@ -0,0 +1,23 @@ +from pathlib import Path + +import pytest + +from _test_utils import run_python_server_with_rust_client + +SOCK_PATH = Path("/tmp/var/run/numaflow/map.sock") +SERVER_INFO = Path("/tmp/var/run/numaflow/mapper-server-info") + +SCRIPTS = [ + "map_cat.py", + "map_cat_class.py", +] + + +@pytest.mark.parametrize("script", SCRIPTS) +def test_python_server_and_rust_client(script: str, tmp_path: Path): + run_python_server_with_rust_client( + script=script, + sock_path=SOCK_PATH, + server_info_path=SERVER_INFO, + rust_bin_name="test_map", + ) diff --git a/packages/pynumaflow-lite/tests/test_mapstream.py b/packages/pynumaflow-lite/tests/test_mapstream.py new file mode 100644 index 00000000..7181f95a --- /dev/null +++ b/packages/pynumaflow-lite/tests/test_mapstream.py @@ -0,0 +1,23 @@ +from pathlib import Path + +import pytest + +from _test_utils import run_python_server_with_rust_client + +SOCK_PATH = Path("/tmp/var/run/numaflow/mapstream.sock") +SERVER_INFO = Path("/tmp/var/run/numaflow/mapper-server-info") + +SCRIPTS = [ + "mapstream_cat.py", + "mapstream_cat_class.py", +] + + +@pytest.mark.parametrize("script", SCRIPTS) +def test_python_stream_server_and_rust_client(script: str, tmp_path: Path): + run_python_server_with_rust_client( + script=script, + sock_path=SOCK_PATH, + server_info_path=SERVER_INFO, + rust_bin_name="test_mapstream", + ) diff --git a/packages/pynumaflow-lite/tests/test_reduce.py b/packages/pynumaflow-lite/tests/test_reduce.py new file mode 100644 index 00000000..8bc2ecca --- /dev/null +++ b/packages/pynumaflow-lite/tests/test_reduce.py @@ -0,0 +1,23 @@ +from pathlib import Path + +import pytest + +from _test_utils import run_python_server_with_rust_client + +SOCK_PATH = Path("/tmp/var/run/numaflow/reduce.sock") +SERVER_INFO = Path("/tmp/var/run/numaflow/reducer-server-info") + +SCRIPTS = [ + "reduce_counter_class.py", + "reduce_counter_func.py", +] + + +@pytest.mark.parametrize("script", SCRIPTS) +def test_python_reduce_server_and_rust_client(script: str, tmp_path: Path): + run_python_server_with_rust_client( + script=script, + sock_path=SOCK_PATH, + server_info_path=SERVER_INFO, + rust_bin_name="test_reduce", + ) diff --git a/packages/pynumaflow-lite/tests/test_session_reduce.py b/packages/pynumaflow-lite/tests/test_session_reduce.py new file mode 100644 index 00000000..a258bb5e --- /dev/null +++ b/packages/pynumaflow-lite/tests/test_session_reduce.py @@ -0,0 +1,23 @@ +from pathlib import Path + +import pytest + +from _test_utils import run_python_server_with_rust_client + +SOCK_PATH = Path("/tmp/var/run/numaflow/sessionreduce.sock") +SERVER_INFO = Path("/tmp/var/run/numaflow/sessionreducer-server-info") + +SCRIPTS = [ + "session_reduce_counter_class.py", +] + + +@pytest.mark.parametrize("script", SCRIPTS) +def test_python_session_reduce_server_and_rust_client(script: str, tmp_path: Path): + run_python_server_with_rust_client( + script=script, + sock_path=SOCK_PATH, + server_info_path=SERVER_INFO, + rust_bin_name="test_session_reduce", + rust_bin_args=[str(SOCK_PATH)], + ) diff --git a/packages/pynumaflow-lite/uv.lock b/packages/pynumaflow-lite/uv.lock new file mode 100644 index 00000000..ef06e0bf --- /dev/null +++ b/packages/pynumaflow-lite/uv.lock @@ -0,0 +1,199 @@ +version = 1 +revision = 1 +requires-python = ">=3.8" +resolution-markers = [ + "python_full_version >= '3.9'", + "python_full_version < '3.9'", +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, +] + +[[package]] +name = "exceptiongroup" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", version = "4.13.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9' and python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10", size = 16674 }, +] + +[[package]] +name = "iniconfig" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050 }, +] + +[[package]] +name = "packaging" +version = "25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469 }, +] + +[[package]] +name = "pluggy" +version = "1.5.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/96/2d/02d4312c973c6050a18b314a5ad0b3210edb65a906f868e31c111dede4a6/pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", size = 67955 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538 }, +] + +[[package]] +name = "pygments" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217 }, +] + +[[package]] +name = "pynumaflow-lite" +source = { editable = "." } + +[package.dev-dependencies] +dev = [ + { name = "pytest", version = "8.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, +] + +[package.metadata] + +[package.metadata.requires-dev] +dev = [{ name = "pytest", specifier = ">=8.3.5" }] + +[[package]] +name = "pytest" +version = "8.3.5" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +dependencies = [ + { name = "colorama", marker = "python_full_version < '3.9' and sys_platform == 'win32'" }, + { name = "exceptiongroup", marker = "python_full_version < '3.9'" }, + { name = "iniconfig", marker = "python_full_version < '3.9'" }, + { name = "packaging", marker = "python_full_version < '3.9'" }, + { name = "pluggy", version = "1.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "tomli", marker = "python_full_version < '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634 }, +] + +[[package]] +name = "pytest" +version = "8.4.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.9'", +] +dependencies = [ + { name = "colorama", marker = "python_full_version >= '3.9' and sys_platform == 'win32'" }, + { name = "exceptiongroup", marker = "python_full_version >= '3.9' and python_full_version < '3.11'" }, + { name = "iniconfig", marker = "python_full_version >= '3.9'" }, + { name = "packaging", marker = "python_full_version >= '3.9'" }, + { name = "pluggy", version = "1.6.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "pygments", marker = "python_full_version >= '3.9'" }, + { name = "tomli", marker = "python_full_version >= '3.9' and python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750 }, +] + +[[package]] +name = "tomli" +version = "2.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/18/87/302344fed471e44a87289cf4967697d07e532f2421fdaf868a303cbae4ff/tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff", size = 17175 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/ca/75707e6efa2b37c77dadb324ae7d9571cb424e61ea73fad7c56c2d14527f/tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249", size = 131077 }, + { url = "https://files.pythonhosted.org/packages/c7/16/51ae563a8615d472fdbffc43a3f3d46588c264ac4f024f63f01283becfbb/tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6", size = 123429 }, + { url = "https://files.pythonhosted.org/packages/f1/dd/4f6cd1e7b160041db83c694abc78e100473c15d54620083dbd5aae7b990e/tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a", size = 226067 }, + { url = "https://files.pythonhosted.org/packages/a9/6b/c54ede5dc70d648cc6361eaf429304b02f2871a345bbdd51e993d6cdf550/tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee", size = 236030 }, + { url = "https://files.pythonhosted.org/packages/1f/47/999514fa49cfaf7a92c805a86c3c43f4215621855d151b61c602abb38091/tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e", size = 240898 }, + { url = "https://files.pythonhosted.org/packages/73/41/0a01279a7ae09ee1573b423318e7934674ce06eb33f50936655071d81a24/tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4", size = 229894 }, + { url = "https://files.pythonhosted.org/packages/55/18/5d8bc5b0a0362311ce4d18830a5d28943667599a60d20118074ea1b01bb7/tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106", size = 245319 }, + { url = "https://files.pythonhosted.org/packages/92/a3/7ade0576d17f3cdf5ff44d61390d4b3febb8a9fc2b480c75c47ea048c646/tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8", size = 238273 }, + { url = "https://files.pythonhosted.org/packages/72/6f/fa64ef058ac1446a1e51110c375339b3ec6be245af9d14c87c4a6412dd32/tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff", size = 98310 }, + { url = "https://files.pythonhosted.org/packages/6a/1c/4a2dcde4a51b81be3530565e92eda625d94dafb46dbeb15069df4caffc34/tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b", size = 108309 }, + { url = "https://files.pythonhosted.org/packages/52/e1/f8af4c2fcde17500422858155aeb0d7e93477a0d59a98e56cbfe75070fd0/tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea", size = 132762 }, + { url = "https://files.pythonhosted.org/packages/03/b8/152c68bb84fc00396b83e7bbddd5ec0bd3dd409db4195e2a9b3e398ad2e3/tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8", size = 123453 }, + { url = "https://files.pythonhosted.org/packages/c8/d6/fc9267af9166f79ac528ff7e8c55c8181ded34eb4b0e93daa767b8841573/tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192", size = 233486 }, + { url = "https://files.pythonhosted.org/packages/5c/51/51c3f2884d7bab89af25f678447ea7d297b53b5a3b5730a7cb2ef6069f07/tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222", size = 242349 }, + { url = "https://files.pythonhosted.org/packages/ab/df/bfa89627d13a5cc22402e441e8a931ef2108403db390ff3345c05253935e/tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77", size = 252159 }, + { url = "https://files.pythonhosted.org/packages/9e/6e/fa2b916dced65763a5168c6ccb91066f7639bdc88b48adda990db10c8c0b/tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6", size = 237243 }, + { url = "https://files.pythonhosted.org/packages/b4/04/885d3b1f650e1153cbb93a6a9782c58a972b94ea4483ae4ac5cedd5e4a09/tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd", size = 259645 }, + { url = "https://files.pythonhosted.org/packages/9c/de/6b432d66e986e501586da298e28ebeefd3edc2c780f3ad73d22566034239/tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e", size = 244584 }, + { url = "https://files.pythonhosted.org/packages/1c/9a/47c0449b98e6e7d1be6cbac02f93dd79003234ddc4aaab6ba07a9a7482e2/tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98", size = 98875 }, + { url = "https://files.pythonhosted.org/packages/ef/60/9b9638f081c6f1261e2688bd487625cd1e660d0a85bd469e91d8db969734/tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4", size = 109418 }, + { url = "https://files.pythonhosted.org/packages/04/90/2ee5f2e0362cb8a0b6499dc44f4d7d48f8fff06d28ba46e6f1eaa61a1388/tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7", size = 132708 }, + { url = "https://files.pythonhosted.org/packages/c0/ec/46b4108816de6b385141f082ba99e315501ccd0a2ea23db4a100dd3990ea/tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c", size = 123582 }, + { url = "https://files.pythonhosted.org/packages/a0/bd/b470466d0137b37b68d24556c38a0cc819e8febe392d5b199dcd7f578365/tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13", size = 232543 }, + { url = "https://files.pythonhosted.org/packages/d9/e5/82e80ff3b751373f7cead2815bcbe2d51c895b3c990686741a8e56ec42ab/tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281", size = 241691 }, + { url = "https://files.pythonhosted.org/packages/05/7e/2a110bc2713557d6a1bfb06af23dd01e7dde52b6ee7dadc589868f9abfac/tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272", size = 251170 }, + { url = "https://files.pythonhosted.org/packages/64/7b/22d713946efe00e0adbcdfd6d1aa119ae03fd0b60ebed51ebb3fa9f5a2e5/tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140", size = 236530 }, + { url = "https://files.pythonhosted.org/packages/38/31/3a76f67da4b0cf37b742ca76beaf819dca0ebef26d78fc794a576e08accf/tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2", size = 258666 }, + { url = "https://files.pythonhosted.org/packages/07/10/5af1293da642aded87e8a988753945d0cf7e00a9452d3911dd3bb354c9e2/tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744", size = 243954 }, + { url = "https://files.pythonhosted.org/packages/5b/b9/1ed31d167be802da0fc95020d04cd27b7d7065cc6fbefdd2f9186f60d7bd/tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec", size = 98724 }, + { url = "https://files.pythonhosted.org/packages/c7/32/b0963458706accd9afcfeb867c0f9175a741bf7b19cd424230714d722198/tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69", size = 109383 }, + { url = "https://files.pythonhosted.org/packages/6e/c2/61d3e0f47e2b74ef40a68b9e6ad5984f6241a942f7cd3bbfbdbd03861ea9/tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", size = 14257 }, +] + +[[package]] +name = "typing-extensions" +version = "4.13.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/f6/37/23083fcd6e35492953e8d2aaaa68b860eb422b34627b13f2ce3eb6106061/typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef", size = 106967 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806 }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614 }, +]