ray-project · raulchen · Feb 28, 2024 · Feb 12, 2024 · Feb 13, 2024 · Feb 14, 2024
@@ -1,3 +1,4 @@
+import time
 from dataclasses import dataclass, field, fields
 from typing import TYPE_CHECKING, Any, Dict, Optional
 
@@ -112,6 +113,16 @@ class OpRuntimeMetrics:
         default=0, metadata={"map_only": True, "export_metric": True}
     )
 
+    # Time operator spent in task submission backpressure
+    task_submission_backpressure_time: float = field(
+        default=0, metadata={"export": False}
+    )
+
+    # Start time of current pause due to task submission backpressure
+    _task_submission_backpressure_start_time: float = field(
+        default=-1, metadata={"export": False}
+    )
+
     def __init__(self, op: "PhysicalOperator"):
         from ray.data._internal.execution.operators.map_operator import MapOperator
 
@@ -266,6 +277,17 @@ def on_output_dequeued(self, output: RefBundle):
         """Callback when an output is dequeued by the operator."""
         self.obj_store_mem_internal_outqueue -= output.size_bytes()
 
+    def on_toggle_task_submission_backpressure(self, in_backpressure):
+        if in_backpressure and self._task_submission_backpressure_start_time == -1:
+            # backpressure starting, start timer
+            self._task_submission_backpressure_start_time = time.perf_counter()
+        elif self._task_submission_backpressure_start_time != -1:
+            # backpressure stopping, stop timer
+            self.task_submission_backpressure_time += (
+                time.perf_counter() - self._task_submission_backpressure_start_time
+            )
+            self._task_submission_backpressure_start_time = -1
+
     def on_output_taken(self, output: RefBundle):
         """Callback when an output is taken from the operator."""
         self.num_outputs_taken += 1

@@ -179,6 +179,7 @@ def __init__(
         self._inputs_complete = not input_dependencies
         self._target_max_block_size = target_max_block_size
         self._started = False
+        self._in_task_submission_backpressure = False
         self._metrics = OpRuntimeMetrics(self)
         self._estimated_output_blocks = None
         self._execution_completed = False
@@ -404,3 +405,15 @@ def notify_resource_usage(
             under_resource_limits: Whether this operator is under resource limits.
         """
         pass
+
+    def notify_in_task_submission_backpressure(self, in_backpressure: bool) -> None:
+        """Called periodically from the executor to update internal in backpressure
+        status for stats collection purposes.
+
+        Args:
+            in_backpressure: Value this operator's in_backpressure should be set to.
+        """
+        # only update on change to in_backpressure
+        if self._in_task_submission_backpressure != in_backpressure:
+            self._metrics.on_toggle_task_submission_backpressure(in_backpressure)
+            self._in_task_submission_backpressure = in_backpressure
@@ -523,14 +523,19 @@ def select_operator_to_run(
     ops = []
     for op, state in topology.items():
         under_resource_limits = _execution_allowed(op, resource_manager)
+        in_backpressure = (
+            any(not p.can_add_input(op) for p in backpressure_policies)
+            or not under_resource_limits
+        )
         if (
-            under_resource_limits
+            not in_backpressure
             and not op.completed()
             and state.num_queued() > 0
             and op.should_add_input()
-            and all(p.can_add_input(op) for p in backpressure_policies)
         ):
             ops.append(op)
+        # Signal whether op in backpressure for stats collections
+        op.notify_in_task_submission_backpressure(in_backpressure)
         # Update the op in all cases to enable internal autoscaling, etc.
         op.notify_resource_usage(state.num_queued(), under_resource_limits)
 

@@ -120,7 +120,7 @@ def test_e2e_normal(self):
         map_func1 = self._get_map_func(actor, 1)
         map_func2 = self._get_map_func(actor, 2)
 
-        # Creat a dataset with 2 map ops. Each map op has N tasks, where N is
+        # Create a dataset with 2 map ops. Each map op has N tasks, where N is
         # the number of cluster CPUs.
         N = self.__class__._cluster_cpus
         ds = ray.data.range(N, parallelism=N)
@@ -138,6 +138,36 @@ def test_e2e_normal(self):
         start2, end2 = ray.get(actor.get_start_and_end_time_for_op.remote(2))
         assert start1 < start2 < end1 < end2, (start1, start2, end1, end2)
 
+    def test_e2e_time_backpressure(self):
+        """A simple E2E test with ConcurrencyCapBackpressurePolicy enabled."""
+        # TODO: merge this test with the above once we are exporting the
+        # backpressure_time in op_runtime_metrics.py. This test currently
+        # mutates the OpRuntimeMetrics dataclass to observe the backpressure time.
+        from ray.data._internal.execution.interfaces.op_runtime_metrics import (
+            OpRuntimeMetrics,
+        )
+
+        with patch.object(
+            OpRuntimeMetrics.__dataclass_fields__["task_submission_backpressure_time"],
+            "metadata",
+            {"export": True},
+        ):
+            actor = self._create_record_time_actor()
+            map_func1 = self._get_map_func(actor, 1)
+            map_func2 = self._get_map_func(actor, 2)
+
+            # Create a dataset with 2 map ops. Each map op has N tasks, where N is
+            # the number of cluster CPUs.
+            N = self.__class__._cluster_cpus
+            ds = ray.data.range(N, parallelism=N)
+            # Use different `num_cpus` to make sure they don't fuse.
+            ds = ds.map_batches(map_func1, batch_size=None, num_cpus=1, concurrency=1)
+            ds = ds.map_batches(map_func2, batch_size=None, num_cpus=1.1, concurrency=1)
+            ds.take_all()
+            assert (
+                0 < ds._plan.stats().extra_metrics["task_submission_backpressure_time"]
+            )
+
 
 class TestStreamOutputBackpressurePolicy(unittest.TestCase):
     """Tests for StreamOutputBackpressurePolicy."""