1. More comprehensive after-all handling. Specifically, the pass now …

…handles after-alls with non-zero operands. 2. Add a case for merging two TupleOrToken labels. This case can arise when merging labels for multple conditional branch computations as they often return tuples. Added tests for both these fixes. PiperOrigin-RevId: 620859190
tensorflow · Apr 2, 2024 · 23b5b50 · 23b5b50
1 parent fb58bab
commit 23b5b50
Show file tree

Hide file tree

Showing 30 changed files with 484 additions and 43 deletions.
diff --git a/tensorflow/compiler/mlir/quantization/tensorflow/passes/convert_tpu_model_to_cpu.td b/tensorflow/compiler/mlir/quantization/tensorflow/passes/convert_tpu_model_to_cpu.td
@@ -26,7 +26,7 @@ def GetBatchFunctionOpArgOperands:
 // because `TF_BatchFunctionOp` doesn't have the `CallOpInterface` trait.
 def ReplaceBatchFunctionOpToPartitionedCallOp : Pat<
   (TF_BatchFunctionOp:$src_op_res
-      $_, $_, $f, $_, $_, $_, $_, $_, $_, $_, $_, $_, $_, $_, $_, $_),
+      $_, $_, $f, $_, $_, $_, $_, $_, $_, $_, $_, $_, $_, $_, $_, $_, $_),
   (TF_PartitionedCallOp
       (GetBatchFunctionOpArgOperands $src_op_res),
       $f,

diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
@@ -1136,6 +1136,7 @@ to be batched.}]>:$captured_tensors,
     DefaultValuedOptionalAttr<I64Attr, "0">:$low_priority_batch_timeout_micros,
     DefaultValuedOptionalAttr<I64ArrayAttr, "{}">:$low_priority_allowed_batch_sizes,
     DefaultValuedOptionalAttr<I64Attr, "0">:$low_priority_max_enqueued_batches,
+    DefaultValuedOptionalAttr<TF_AnyStrAttrOf<["low_priority_padding_with_max_batch_size", "low_priority_padding_with_next_allowed_batch_size"]>, "\"low_priority_padding_with_max_batch_size\"">:$mixed_priority_policy,
     DefaultValuedOptionalAttr<BoolAttr, "false">:$enable_large_batch_splitting
   );
 

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
@@ -703,11 +703,13 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core/kernels/batching_util:adaptive_shared_batch_scheduler",
         "//tensorflow/core/kernels/batching_util:batch_resource_base",
+        "//tensorflow/core/kernels/batching_util:batch_scheduler_hdrs",
         "//tensorflow/core/kernels/batching_util:bounded_executor",
         "//tensorflow/core/kernels/batching_util:concat_split_util",
         "//tensorflow/core/kernels/batching_util:periodic_function_dynamic",
         "//tensorflow/core/kernels/batching_util:warmup",
         "//tensorflow/core/platform:numbers",
+        "@com_google_absl//absl/status",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
         "@local_tsl//tsl/platform:types",
@@ -1662,8 +1664,10 @@ tf_cc_test(
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
         "//tensorflow/core/framework:types_proto_cc",
+        "//tensorflow/core/kernels/batching_util:batch_scheduler_hdrs",
         "//tensorflow/core/kernels/batching_util:warmup",
         "//tensorflow/core/platform:status",
+        "//tensorflow/core/platform:status_matchers",
         "//tensorflow/core/protobuf:for_core_protos_cc",
         "//tensorflow/core/public:version",
         "@com_google_absl//absl/status",

diff --git a/tensorflow/core/kernels/batch_kernels.cc b/tensorflow/core/kernels/batch_kernels.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <string>
 #include <utility>
 
+#include "absl/status/status.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
@@ -36,6 +37,7 @@ limitations under the License.
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h"
 #include "tensorflow/core/kernels/batching_util/batch_resource_base.h"
+#include "tensorflow/core/kernels/batching_util/batch_scheduler.h"
 #include "tensorflow/core/kernels/batching_util/bounded_executor.h"
 #include "tensorflow/core/kernels/batching_util/concat_split_util.h"
 #include "tensorflow/core/kernels/batching_util/periodic_function.h"
@@ -49,6 +51,8 @@ limitations under the License.
 #include "tensorflow/core/platform/numbers.h"
 #include "tensorflow/core/platform/status.h"
 #include "tensorflow/core/platform/threadpool.h"
+#include "tsl/platform/errors.h"
+#include "tsl/platform/statusor.h"
 
 namespace tensorflow {
 namespace {
@@ -295,6 +299,8 @@ BatchFunctionKernel::BatchFunctionKernel(OpKernelConstruction* c)
                                &low_priority_allowed_batch_sizes_));
   OP_REQUIRES_OK(c, c->GetAttr("low_priority_max_enqueued_batches",
                                &low_priority_max_enqueued_batches_));
+  OP_REQUIRES_OK(c,
+                 c->GetAttr("mixed_priority_policy", &mixed_priority_policy_));
 
   OP_REQUIRES_OK(c, c->GetAttr("f", &func_));
 
@@ -416,6 +422,16 @@ void BatchFunctionKernel::ComputeAsync(OpKernelContext* c, DoneCallback done) {
   } else {
     creator = [this,
                session_metadata = c->session_metadata()](BatchResource** r) {
+      // TODO(b/316379576): Remove this check when batching policy is used.
+      TF_ASSIGN_OR_RETURN(
+          serving::MixedPriorityBatchingPolicy batching_policy,
+          serving::GetMixedPriorityBatchingPolicy(mixed_priority_policy_));
+      if (batching_policy != serving::MixedPriorityBatchingPolicy::
+                                 kLowPriorityPaddingWithMaxBatchSize)
+        return absl::InvalidArgumentError(
+            "mixed_priority_policy must be "
+            "low_priority_padding_with_max_batch_size");
+
       std::unique_ptr<BatchResource> new_resource;
       TF_RETURN_IF_ERROR(BatchResource::Create(
           /*has_process_batch_function=*/true, num_batch_threads_,

diff --git a/tensorflow/core/kernels/batch_kernels.h b/tensorflow/core/kernels/batch_kernels.h
@@ -17,6 +17,7 @@ limitations under the License.
 #define TENSORFLOW_CORE_KERNELS_BATCH_KERNELS_H_
 
 #include <cstdint>
+#include <string>
 
 #include "absl/strings/string_view.h"
 #include "absl/types/optional.h"
@@ -109,6 +110,7 @@ class BatchFunctionKernel : public AsyncOpKernel {
   int32 low_priority_batch_timeout_micros_;
   int32 low_priority_max_enqueued_batches_;
   std::vector<int32> low_priority_allowed_batch_sizes_;
+  std::string mixed_priority_policy_;
   NameAttrList func_;
   absl::optional<FunctionLibraryRuntime::Handle> fhandle_ TF_GUARDED_BY(mu_);
   bool enable_large_batch_splitting_ = false;

diff --git a/tensorflow/core/kernels/batch_kernels_test.cc b/tensorflow/core/kernels/batch_kernels_test.cc
@@ -20,9 +20,11 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include <gmock/gmock.h>
 #include <gtest/gtest.h>
 #include "absl/status/status.h"
 #include "absl/strings/match.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/rendezvous_mgr.h"
 #include "tensorflow/core/framework/device_factory.h"
 #include "tensorflow/core/framework/function.h"
@@ -31,10 +33,12 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_testutil.h"
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/kernels/batch_kernel_test_util.h"
+#include "tensorflow/core/kernels/batching_util/batch_scheduler.h"
 #include "tensorflow/core/kernels/batching_util/warmup.h"
 #include "tensorflow/core/kernels/ops_testutil.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/status.h"
+#include "tensorflow/core/platform/status_matchers.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/protobuf/config.pb.h"
 #include "tensorflow/core/public/version.h"
@@ -90,6 +94,7 @@ class BatchFunctionTestState : public SharedBatchFunctionTestState {
   // Init test fixture with a batch kernel instance. The caller guarantees that
   // the device pointer is valid throughout the life of this class.
   absl::Status Init(Device *device, bool enable_low_priority_queue,
+                    absl::string_view mixed_priority_policy,
                     int64_t expected_batch_size) {
     // Override the per-test/per-op device with a given device so that it can
     // be shared between ops.
@@ -134,6 +139,7 @@ class BatchFunctionTestState : public SharedBatchFunctionTestState {
                                                            : std::vector<int>())
                            .Attr("low_priority_max_enqueued_batches",
                                  enable_low_priority_queue ? 2 : 0)
+                           .Attr("mixed_priority_policy", mixed_priority_policy)
                            .Attr("Tin", {DataType::DT_INT64})
                            .Input(inputs)
                            .Attr("Tcaptured", std::vector<DataType>{})
@@ -176,9 +182,10 @@ TEST_P(BatchFunctionTest, BatchingWorksWithoutCriticality) {
 
         BatchFunctionTestState test_state;
         test_state.set_session_metadata(session_metadata);
-        TF_ASSERT_OK(test_state.Init(cpu_device_.get(),
-                                     enable_low_priority_queue,
-                                     /*expected_batch_size=*/4));
+        TF_ASSERT_OK(test_state.Init(
+            cpu_device_.get(), enable_low_priority_queue,
+            serving::kLowPriorityPaddingWithMaxBatchSizeAttrValue,
+            /*expected_batch_size=*/4));
         test_state.AddInputFromList<int64_t>(TensorShape({1, 2}), {123, 456});
         TF_EXPECT_OK(test_state.RunOpKernel());
 
@@ -211,9 +218,10 @@ TEST_P(BatchFunctionTest, PaddingWorksWithoutCriticality) {
 
         BatchFunctionTestState test_state;
         test_state.set_session_metadata(session_metadata);
-        TF_ASSERT_OK(test_state.Init(cpu_device_.get(),
-                                     enable_low_priority_queue,
-                                     /*expected_batch_size=*/4));
+        TF_ASSERT_OK(test_state.Init(
+            cpu_device_.get(), enable_low_priority_queue,
+            serving::kLowPriorityPaddingWithMaxBatchSizeAttrValue,
+            /*expected_batch_size=*/4));
         test_state.AddInputFromList<int64_t>(TensorShape({1, 2}), {123, 456});
         TF_EXPECT_OK(test_state.RunOpKernel());
 
@@ -249,9 +257,10 @@ TEST_P(BatchFunctionTest, LowPriorityTaskPaddingHighPriorityBatch) {
 
         BatchFunctionTestState test_state;
         test_state.set_session_metadata(session_metadata);
-        TF_ASSERT_OK(test_state.Init(cpu_device_.get(),
-                                     enable_low_priority_queue,
-                                     /*expected_batch_size=*/4));
+        TF_ASSERT_OK(test_state.Init(
+            cpu_device_.get(), enable_low_priority_queue,
+            serving::kLowPriorityPaddingWithMaxBatchSizeAttrValue,
+            /*expected_batch_size=*/4));
         test_state.AddInputFromList<int64_t>(TensorShape({1, 2}), {123, 456});
         TF_EXPECT_OK(test_state.RunOpKernel());
 
@@ -271,9 +280,10 @@ TEST_P(BatchFunctionTest, LowPriorityTaskPaddingHighPriorityBatch) {
 
         BatchFunctionTestState test_state;
         test_state.set_session_metadata(session_metadata);
-        TF_ASSERT_OK(test_state.Init(cpu_device_.get(),
-                                     enable_low_priority_queue,
-                                     /*expected_batch_size=*/4));
+        TF_ASSERT_OK(test_state.Init(
+            cpu_device_.get(), enable_low_priority_queue,
+            serving::kLowPriorityPaddingWithMaxBatchSizeAttrValue,
+            /*expected_batch_size=*/4));
         test_state.AddInputFromList<int64_t>(TensorShape({1, 2}), {234, 567});
         TF_EXPECT_OK(test_state.RunOpKernel());
 
@@ -308,8 +318,10 @@ TEST_P(BatchFunctionTest,
 
       BatchFunctionTestState test_state;
       test_state.set_session_metadata(session_metadata);
-      TF_ASSERT_OK(test_state.Init(cpu_device_.get(), enable_low_priority_queue,
-                                   /*expected_batch_size=*/4));
+      TF_ASSERT_OK(
+          test_state.Init(cpu_device_.get(), enable_low_priority_queue,
+                          serving::kLowPriorityPaddingWithMaxBatchSizeAttrValue,
+                          /*expected_batch_size=*/4));
       test_state.AddInputFromList<int64_t>(TensorShape({1, 2}), {123, 456});
       TF_EXPECT_OK(test_state.RunOpKernel());
 
@@ -327,8 +339,10 @@ TEST_P(BatchFunctionTest,
 
       BatchFunctionTestState test_state;
       test_state.set_session_metadata(session_metadata);
-      TF_ASSERT_OK(test_state.Init(cpu_device_.get(), enable_low_priority_queue,
-                                   /*expected_batch_size=*/4));
+      TF_ASSERT_OK(
+          test_state.Init(cpu_device_.get(), enable_low_priority_queue,
+                          serving::kLowPriorityPaddingWithMaxBatchSizeAttrValue,
+                          /*expected_batch_size=*/4));
       test_state.AddInputFromList<int64_t>(TensorShape({1, 2}), {234, 567});
       TF_EXPECT_OK(test_state.RunOpKernel());
 
@@ -366,9 +380,11 @@ TEST_F(BatchFunctionTest, LowPriorityOnlyBatchAtMaxLowPriorityBatchSize) {
 
         BatchFunctionTestState test_state;
         test_state.set_session_metadata(session_metadata);
-        TF_ASSERT_OK(test_state.Init(cpu_device_.get(),
-                                     /*enable_low_priority_queue=*/true,
-                                     /*expected_batch_size=*/8));
+        TF_ASSERT_OK(test_state.Init(
+            cpu_device_.get(),
+            /*enable_low_priority_queue=*/true,
+            serving::kLowPriorityPaddingWithMaxBatchSizeAttrValue,
+            /*expected_batch_size=*/8));
         test_state.AddInputFromList<int64_t>(TensorShape({1, 2}), {234, 567});
         TF_EXPECT_OK(test_state.RunOpKernel());
 
@@ -402,9 +418,11 @@ TEST_F(BatchFunctionTest, LowPriorityBatchPaddedToLowPriorityAllowedBatchSize) {
 
         BatchFunctionTestState test_state;
         test_state.set_session_metadata(session_metadata);
-        TF_ASSERT_OK(test_state.Init(cpu_device_.get(),
-                                     /*enable_low_priority_queue=*/true,
-                                     /*expected_batch_size=*/8));
+        TF_ASSERT_OK(test_state.Init(
+            cpu_device_.get(),
+            /*enable_low_priority_queue=*/true,
+            serving::kLowPriorityPaddingWithMaxBatchSizeAttrValue,
+            /*expected_batch_size=*/8));
         test_state.AddInputFromList<int64_t>(TensorShape({1, 2}), {234, 567});
         TF_EXPECT_OK(test_state.RunOpKernel());
 
@@ -420,6 +438,27 @@ TEST_F(BatchFunctionTest, LowPriorityBatchPaddedToLowPriorityAllowedBatchSize) {
 }
 #endif
 
+TEST_F(BatchFunctionTest, NonDefaultBatchingPolicyNotAllowed) {
+  SessionMetadata session_metadata;
+  session_metadata.set_name("test_model");
+  session_metadata.set_version(123);
+
+  BatchFunctionTestState test_state;
+  test_state.set_session_metadata(session_metadata);
+  TF_ASSERT_OK(test_state.Init(
+      cpu_device_.get(),
+      /*enable_low_priority_queue=*/true,
+      serving::kLowPriorityPaddingWithNextAllowedBatchSizeAttrValue,
+      /*expected_batch_size=*/1));
+  test_state.AddInputFromList<int64_t>(TensorShape({1, 2}), {234, 567});
+  EXPECT_THAT(
+      test_state.RunOpKernel(),
+      testing::StatusIs(
+          absl::StatusCode::kInvalidArgument,
+          ::testing::HasSubstr("mixed_priority_policy must be "
+                               "low_priority_padding_with_max_batch_size")));
+}
+
 class BatchFunctionKernelParallelWarmupTestState
     : public SharedBatchFunctionTestState {
  public:

diff --git a/tensorflow/core/kernels/batching_util/BUILD b/tensorflow/core/kernels/batching_util/BUILD
@@ -100,16 +100,23 @@ cc_library(
         "//tensorflow/core/lib/core:status",
         "//tensorflow/core/platform:thread_annotations",
         "//tensorflow/core/profiler/lib:traceme",
+        "@com_google_absl//absl/status:statusor",
+        "@com_google_absl//absl/strings:string_view",
         "@local_tsl//tsl/platform:criticality",
     ],
 )
 
 cc_library(
     name = "batch_scheduler",
+    srcs = ["batch_scheduler.cc"],
     hdrs = ["batch_scheduler.h"],
     deps = [
         "//tensorflow/core:lib",
         "//tensorflow/core/profiler/lib:traceme",
+        "@com_google_absl//absl/status",
+        "@com_google_absl//absl/status:statusor",
+        "@com_google_absl//absl/strings:str_format",
+        "@com_google_absl//absl/strings:string_view",
         "@local_tsl//tsl/platform:criticality",
     ],
 )
@@ -134,6 +141,8 @@ tf_cc_test(
         "//tensorflow/core:lib",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "//tensorflow/core/platform:status_matchers",
+        "@com_google_absl//absl/status",
         "@com_google_googletest//:gtest_main",
     ],
 )

diff --git a/tensorflow/core/kernels/batching_util/batch_scheduler.cc b/tensorflow/core/kernels/batching_util/batch_scheduler.cc
@@ -0,0 +1,40 @@
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/batching_util/batch_scheduler.h"
+
+#include "absl/status/status.h"
+#include "absl/status/statusor.h"
+#include "absl/strings/str_format.h"
+#include "absl/strings/string_view.h"
+
+namespace tensorflow {
+namespace serving {
+
+absl::StatusOr<MixedPriorityBatchingPolicy> GetMixedPriorityBatchingPolicy(
+    absl::string_view attr_value) {
+  if (attr_value == kLowPriorityPaddingWithMaxBatchSizeAttrValue) {
+    return MixedPriorityBatchingPolicy::kLowPriorityPaddingWithMaxBatchSize;
+  } else if (attr_value ==
+             kLowPriorityPaddingWithNextAllowedBatchSizeAttrValue) {
+    return MixedPriorityBatchingPolicy::
+        kLowPriorityPaddingWithNextAllowedBatchSize;
+  }
+  return absl::InvalidArgumentError(absl::StrFormat(
+      "Unknown mixed priority batching policy: %s", attr_value));
+}
+
+}  // namespace serving
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/batching_util/batch_scheduler.h b/tensorflow/core/kernels/batching_util/batch_scheduler.h
@@ -38,6 +38,8 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/status/statusor.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/notification.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/logging.h"
@@ -51,6 +53,19 @@ limitations under the License.
 namespace tensorflow {
 namespace serving {
 
+const absl::string_view kLowPriorityPaddingWithMaxBatchSizeAttrValue =
+    "low_priority_padding_with_max_batch_size";
+const absl::string_view kLowPriorityPaddingWithNextAllowedBatchSizeAttrValue =
+    "low_priority_padding_with_next_allowed_batch_size";
+
+enum class MixedPriorityBatchingPolicy {
+  kLowPriorityPaddingWithMaxBatchSize,
+  kLowPriorityPaddingWithNextAllowedBatchSize
+};
+
+absl::StatusOr<MixedPriorityBatchingPolicy> GetMixedPriorityBatchingPolicy(
+    absl::string_view attr_value);
+
 // The abstract superclass for a unit of work to be done as part of a batch.
 //
 // An implementing subclass typically contains (or points to):