ray-project · raulchen · Feb 27, 2024 · Feb 14, 2024 · Feb 14, 2024 · Feb 14, 2024
@@ -450,14 +450,6 @@ py_test(
     deps = ["//:ray_lib", ":conftest"],
 )
 
-py_test(
-    name = "test_runtime_metrics_scheduling",
-    size = "small",
-    srcs = ["tests/test_runtime_metrics_scheduling.py"],
-    tags = ["team:data", "exclusive"],
-    deps = ["//:ray_lib", ":conftest"],
-)
-
 py_test(
     name = "test_size_estimation",
     size = "medium",
@@ -522,14 +514,6 @@ py_test(
     deps = ["//:ray_lib", ":conftest"],
 )
 
-py_test(
-    name = "test_streaming_backpressure_edge_case",
-    size = "medium",
-    srcs = ["tests/test_streaming_backpressure_edge_case.py"],
-    tags = ["team:data", "exclusive"],
-    deps = ["//:ray_lib", ":conftest"],
-)
-
 py_test(
     name = "test_transform_pyarrow",
     size = "small",
@@ -561,3 +545,11 @@ py_test(
     tags = ["team:data", "exclusive"],
     deps = ["//:ray_lib", ":conftest"],
 )
+
+py_test(
+    name = "test_backpressure_e2e",
+    size = "medium",
+    srcs = ["tests/test_backpressure_e2e.py"],
+    tags = ["team:data", "exclusive"],
+    deps = ["//:ray_lib", ":conftest"],
+)
@@ -3,15 +3,15 @@
 import ray
 from .backpressure_policy import BackpressurePolicy
 from .concurrency_cap_backpressure_policy import ConcurrencyCapBackpressurePolicy
-from .streaming_output_backpressure_policy import StreamingOutputBackpressurePolicy
 
 if TYPE_CHECKING:
     from ray.data._internal.execution.streaming_executor_state import Topology
 
 # Default enabled backpressure policies and its config key.
 # Use `DataContext.set_config` to config it.
-# TODO(hchen): Enable StreamingOutputBackpressurePolicy by default.
-ENABLED_BACKPRESSURE_POLICIES = [ConcurrencyCapBackpressurePolicy]
+ENABLED_BACKPRESSURE_POLICIES = [
+    ConcurrencyCapBackpressurePolicy,
+]
 ENABLED_BACKPRESSURE_POLICIES_CONFIG_KEY = "backpressure_policies.enabled"
 
 
@@ -27,7 +27,6 @@ def get_backpressure_policies(topology: "Topology"):
 __all__ = [
     "BackpressurePolicy",
     "ConcurrencyCapBackpressurePolicy",
-    "StreamingOutputBackpressurePolicy",
     "ENABLED_BACKPRESSURE_POLICIES_CONFIG_KEY",
     "get_backpressure_policies",
 ]
@@ -1,11 +1,11 @@
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Dict
+from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
     from ray.data._internal.execution.interfaces.physical_operator import (
         PhysicalOperator,
     )
-    from ray.data._internal.execution.streaming_executor_state import OpState, Topology
+    from ray.data._internal.execution.streaming_executor_state import Topology
 
 
 class BackpressurePolicy(ABC):
@@ -15,24 +15,6 @@ class BackpressurePolicy(ABC):
     def __init__(self, topology: "Topology"):
         ...
 
-    def calculate_max_blocks_to_read_per_op(
-        self, topology: "Topology"
-    ) -> Dict["OpState", int]:
-        """Determine how many blocks of data we can read from each operator.
-        The `DataOpTask`s of the operators will stop reading blocks when the limit is
-        reached. Then the execution of these tasks will be paused when the streaming
-        generator backpressure threshold is reached.
-        Used in `streaming_executor_state.py::process_completed_tasks()`.
-
-        Returns: A dict mapping from each operator's OpState to the desired number of
-            blocks to read. For operators that are not in the dict, all available blocks
-            will be read.
-
-        Note: Only one backpressure policy that implements this method can be enabled
-            at a time.
-        """
-        return {}
-
     def can_add_input(self, op: "PhysicalOperator") -> bool:
         """Determine if we can add a new input to the operator. If returns False, the
         operator will be backpressured and will not be able to run new tasks.

@@ -33,6 +33,18 @@ def inf(cls) -> "ExecutionResources":
         """Returns an ExecutionResources object with infinite resources."""
         return ExecutionResources(float("inf"), float("inf"), float("inf"))
 
+    def is_zero(self) -> bool:
+        """Returns True if all resources are zero."""
+        return self.cpu == 0.0 and self.gpu == 0.0 and self.object_store_memory == 0
+
+    def is_non_negative(self) -> bool:
+        """Returns True if all resources are non-negative."""
+        return (
+            (self.cpu is None or self.cpu >= 0)
+            and (self.gpu is None or self.gpu >= 0)
+            and (self.object_store_memory is None or self.object_store_memory >= 0)
+        )
+
     def object_store_memory_str(self) -> str:
         """Returns a human-readable string for the object store memory field."""
         if self.object_store_memory is None:
@@ -92,13 +104,24 @@ def max(self, other: "ExecutionResources") -> "ExecutionResources":
 
     def min(self, other: "ExecutionResources") -> "ExecutionResources":
         """Returns the minimum for each resource type."""
+        cpu1 = self.cpu if self.cpu is not None else float("inf")
+        cpu2 = other.cpu if other.cpu is not None else float("inf")
+        gpu1 = self.gpu if self.gpu is not None else float("inf")
+        gpu2 = other.gpu if other.gpu is not None else float("inf")
+        object_store_memory1 = (
+            self.object_store_memory
+            if self.object_store_memory is not None
+            else float("inf")
+        )
+        object_store_memory2 = (
+            other.object_store_memory
+            if other.object_store_memory is not None
+            else float("inf")
+        )
         return ExecutionResources(
-            cpu=min(self.cpu or float("inf"), other.cpu or float("inf")),
-            gpu=min(self.gpu or float("inf"), other.gpu or float("inf")),
-            object_store_memory=min(
-                self.object_store_memory or float("inf"),
-                other.object_store_memory or float("inf"),
-            ),
+            cpu=min(cpu1, cpu2),
+            gpu=min(gpu1, gpu2),
+            object_store_memory=min(object_store_memory1, object_store_memory2),
         )
 
     def satisfies_limit(self, limit: "ExecutionResources") -> bool:

@@ -65,16 +65,16 @@ def __init__(
     def get_waitable(self) -> ObjectRefGenerator:
         return self._streaming_gen
 
-    def on_data_ready(self, max_blocks_to_read: Optional[int]) -> int:
+    def on_data_ready(self, max_bytes_to_read: Optional[int]) -> int:
         """Callback when data is ready to be read from the streaming generator.
 
         Args:
-            max_blocks_to_read: Max number of blocks to read. If None, all available
+            max_bytes_to_read: Max bytes of blocks to read. If None, all available
                 will be read.
         Returns: The number of blocks read.
         """
-        num_blocks_read = 0
-        while max_blocks_to_read is None or num_blocks_read < max_blocks_to_read:
+        bytes_read = 0
+        while max_bytes_to_read is None or bytes_read < max_bytes_to_read:
             try:
                 block_ref = self._streaming_gen._next_sync(0)
                 if block_ref.is_nil():
@@ -103,8 +103,8 @@ def on_data_ready(self, max_blocks_to_read: Optional[int]) -> int:
             self._output_ready_callback(
                 RefBundle([(block_ref, meta)], owns_blocks=True)
             )
-            num_blocks_read += 1
-        return num_blocks_read
+            bytes_read += meta.size_bytes
+        return bytes_read
 
 
 class MetadataOpTask(OpTask):
@@ -386,11 +386,16 @@ def base_resource_usage(self) -> ExecutionResources:
         """
         return ExecutionResources()
 
-    def incremental_resource_usage(self) -> ExecutionResources:
+    def incremental_resource_usage(
+        self, consider_autoscaling=True
+    ) -> ExecutionResources:
         """Returns the incremental resources required for processing another input.
 
         For example, an operator that launches a task per input could return
         ExecutionResources(cpu=1) as its incremental usage.
+
+        Args:
+            consider_autoscaling: Whether to consider the possibility of autoscaling.
         """
         return ExecutionResources()
 

@@ -326,10 +326,12 @@ def current_processor_usage(self) -> ExecutionResources:
             gpu=self._ray_remote_args.get("num_gpus", 0) * num_active_workers,
         )
 
-    def incremental_resource_usage(self) -> ExecutionResources:
+    def incremental_resource_usage(
+        self, consider_autoscaling=True
+    ) -> ExecutionResources:
         # We would only have nonzero incremental CPU/GPU resources if a new task would
         # require scale-up to run.
-        if self._autoscaling_policy.should_scale_up(
+        if consider_autoscaling and self._autoscaling_policy.should_scale_up(
             num_total_workers=self._actor_pool.num_total_actors(),
             num_running_workers=self._actor_pool.num_running_actors(),
         ):
@@ -345,7 +347,8 @@ def incremental_resource_usage(self) -> ExecutionResources:
         return ExecutionResources(
             cpu=num_cpus,
             gpu=num_gpus,
-            object_store_memory=self._metrics.average_bytes_outputs_per_task,
+            object_store_memory=self._metrics.obj_store_mem_max_pending_output_per_task
+            or 0,
         )
 
     def _extra_metrics(self) -> Dict[str, Any]:

@@ -396,7 +396,9 @@ def base_resource_usage(self) -> ExecutionResources:
         raise NotImplementedError
 
     @abstractmethod
-    def incremental_resource_usage(self) -> ExecutionResources:
+    def incremental_resource_usage(
+        self, consider_autoscaling=True
+    ) -> ExecutionResources:
         raise NotImplementedError