From 9f03195cecadbd85f0c9a7b2afcb63befe80c327 Mon Sep 17 00:00:00 2001
From: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com>
Date: Wed, 30 Jul 2025 15:09:25 -0700
Subject: [PATCH] [ET-VK] 6/n Split dispatches between multiple command
 buffers. Replaced `encode_execute` function with
 `invalidate_execute_encoding` and moved encoding logic to execute function().

This diff removes `encode_execute()` function.

Differential Revision: [D78581574](https://our.internmc.facebook.com/intern/diff/D78581574/)

[ghstack-poisoned]
---
 backends/vulkan/runtime/VulkanBackend.cpp     |  7 -----
 .../vulkan/runtime/graph/ComputeGraph.cpp     | 23 +++++++-------
 backends/vulkan/runtime/graph/ComputeGraph.h  |  1 -
 .../test/op_tests/choose_qparams_test.cpp     |  2 --
 .../vulkan/test/op_tests/dequantize_test.cpp  |  3 --
 .../vulkan/test/op_tests/quantize_test.cpp    |  3 --
 .../test/op_tests/quantized_linear_test.cpp   |  5 +---
 .../test/op_tests/rotary_embedding_test.cpp   |  1 -
 backends/vulkan/test/op_tests/sdpa_test.cpp   |  1 -
 .../test/op_tests/utils/gen_computegraph.py   |  1 -
 .../vulkan/test/vulkan_compute_api_test.cpp   | 30 ++-----------------
 11 files changed, 15 insertions(+), 62 deletions(-)
diff --git a/backends/vulkan/runtime/VulkanBackend.cpp b/backends/vulkan/runtime/VulkanBackend.cpp
index ecdd4f6d2d4..4ff0f9e93d6 100644
--- a/backends/vulkan/runtime/VulkanBackend.cpp
+++ b/backends/vulkan/runtime/VulkanBackend.cpp
@@ -509,13 +509,6 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
 
     compute_graph->prepack();
 
-    // If dynamic shapes are not expected, then the command buffer only needs to
-    // be encoded once. Otherwise, wait until the first inference to encode the
-    // the command buffer, when actual input shapes are known.
-    if (!compute_graph->graphconfig().expect_dynamic_shapes) {
-      compute_graph->encode_execute();
-    }
-
     return Error::Ok;
   }
 
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp
index ee5621d9c12..a1dd4a287c1 100644
--- a/backends/vulkan/runtime/graph/ComputeGraph.cpp
+++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp
@@ -860,21 +860,20 @@ void ComputeGraph::prepack() {
   staging_nbytes_in_cmd_ = 0;
 }
 
-void ComputeGraph::encode_execute() {
-  clear_deferred_cmds();
-  context_->flush();
-  context_->set_cmd(/*reusable = */ true);
+void ComputeGraph::execute() {
+  if (deferred_cmd_list_.empty()) {
+    context_->flush();
+    context_->set_cmd(/*reusable = */ true);
 
-  context_->cmd_reset_querypool();
+    context_->cmd_reset_querypool();
 
-  for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
-    node->encode(this);
-  }
+    for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
+      node->encode(this);
+    }
 
-  deferred_cmd_list_.emplace_back(std::move(context_->extract_cmd()));
-}
+    deferred_cmd_list_.emplace_back(std::move(context_->extract_cmd()));
+  }
 
-void ComputeGraph::execute() {
   submit_deferred_cmds_and_wait();
   execute_count_++;
 }
@@ -898,7 +897,7 @@ void ComputeGraph::propagate_resize() {
   }
   // Only re-encode on resize if dynamic shapes are expected
   if (config_.expect_dynamic_shapes) {
-    encode_execute();
+    clear_deferred_cmds();
   }
 }
 
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h
index 4b1089b0de8..7bac9bf92db 100644
--- a/backends/vulkan/runtime/graph/ComputeGraph.h
+++ b/backends/vulkan/runtime/graph/ComputeGraph.h
@@ -892,7 +892,6 @@ class ComputeGraph final {
   // Graph Execution
   //
 
-  void encode_execute();
   void execute();
 
   //
diff --git a/backends/vulkan/test/op_tests/choose_qparams_test.cpp b/backends/vulkan/test/op_tests/choose_qparams_test.cpp
index f45d4f82448..3b1094a1e84 100644
--- a/backends/vulkan/test/op_tests/choose_qparams_test.cpp
+++ b/backends/vulkan/test/op_tests/choose_qparams_test.cpp
@@ -458,7 +458,6 @@ void test_vulkan_choose_qparams_tensor_impl(
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   // Run Vulkan choose_qparams_tensor
   graph.copy_into_staging(
@@ -678,7 +677,6 @@ void test_vulkan_choose_qparams_per_token_asymmetric_impl(
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   // Run Vulkan choose_qparams_per_token_asymmetric
   graph.copy_into_staging(
diff --git a/backends/vulkan/test/op_tests/dequantize_test.cpp b/backends/vulkan/test/op_tests/dequantize_test.cpp
index 91d49406fbb..9fca2c632d3 100644
--- a/backends/vulkan/test/op_tests/dequantize_test.cpp
+++ b/backends/vulkan/test/op_tests/dequantize_test.cpp
@@ -1140,7 +1140,6 @@ void test_vulkan_dequantize_per_token_impl(
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   // Copy input data to GPU
   graph.copy_into_staging(
@@ -1671,7 +1670,6 @@ void test_vulkan_dequantize_per_channel_impl(
 
   graph.prepare();
   graph.prepack();
-  graph.encode_execute();
 
   // Copy input data to GPU
   graph.copy_into_staging(
@@ -2345,7 +2343,6 @@ void test_vulkan_dequantize_per_tensor_tensor_impl(
 
   graph.prepare();
   graph.prepack();
-  graph.encode_execute();
 
   // Run Vulkan dequantize_per_tensor.tensor
   graph.copy_into_staging(
diff --git a/backends/vulkan/test/op_tests/quantize_test.cpp b/backends/vulkan/test/op_tests/quantize_test.cpp
index 43c97071874..86eebcf9b14 100644
--- a/backends/vulkan/test/op_tests/quantize_test.cpp
+++ b/backends/vulkan/test/op_tests/quantize_test.cpp
@@ -931,7 +931,6 @@ void test_vulkan_quantize_per_token_impl(
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   // Copy input data to GPU
   graph.copy_into_staging(
@@ -1413,7 +1412,6 @@ void test_vulkan_quantize_per_channel_impl(
 
   graph.prepare();
   graph.prepack();
-  graph.encode_execute();
 
   // Copy input data to GPU
   graph.copy_into_staging(
@@ -2042,7 +2040,6 @@ void test_vulkan_quantize_per_tensor_tensor_impl(
 
   graph.prepare();
   graph.prepack();
-  graph.encode_execute();
 
   // Run Vulkan quantize_per_tensor.tensor
   graph.copy_into_staging(
diff --git a/backends/vulkan/test/op_tests/quantized_linear_test.cpp b/backends/vulkan/test/op_tests/quantized_linear_test.cpp
index 26316344b0e..db95f4a793f 100644
--- a/backends/vulkan/test/op_tests/quantized_linear_test.cpp
+++ b/backends/vulkan/test/op_tests/quantized_linear_test.cpp
@@ -456,7 +456,6 @@ void test_vulkan_linear_qga4w_impl(
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   //
   // Run model
@@ -551,7 +550,6 @@ void test_vulkan_linear_qcs4w_impl(
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   //
   // Run model
@@ -685,7 +683,6 @@ void test_vulkan_linear_qta8a_qga4w_impl(
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   //
   // Run model
@@ -900,4 +897,4 @@ TEST_F(VulkanLinearQTA8AQGA4WTest, test_vulkan_linear_quant_gemv) {
       /*M = */ 1,
       /*K = */ 256,
       /*N = */ 256);
-}
\ No newline at end of file
+}
diff --git a/backends/vulkan/test/op_tests/rotary_embedding_test.cpp b/backends/vulkan/test/op_tests/rotary_embedding_test.cpp
index 2955a54e5f3..9f9bdef24aa 100644
--- a/backends/vulkan/test/op_tests/rotary_embedding_test.cpp
+++ b/backends/vulkan/test/op_tests/rotary_embedding_test.cpp
@@ -114,7 +114,6 @@ void test_reference(
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   //
   // Run model
diff --git a/backends/vulkan/test/op_tests/sdpa_test.cpp b/backends/vulkan/test/op_tests/sdpa_test.cpp
index 303dc9c85ec..1c5a3ea7b62 100644
--- a/backends/vulkan/test/op_tests/sdpa_test.cpp
+++ b/backends/vulkan/test/op_tests/sdpa_test.cpp
@@ -352,7 +352,6 @@ void test_vulkan_sdpa(
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   //
   // Run model
diff --git a/backends/vulkan/test/op_tests/utils/gen_computegraph.py b/backends/vulkan/test/op_tests/utils/gen_computegraph.py
index 08eb3b61c36..4fba14ca16e 100644
--- a/backends/vulkan/test/op_tests/utils/gen_computegraph.py
+++ b/backends/vulkan/test/op_tests/utils/gen_computegraph.py
@@ -682,7 +682,6 @@ def gen_graph_build_code(self, include_declarations: bool = True) -> str:
 
         graph_build += f"{self.graph}{self.dot}prepare();\n"
         graph_build += f"{self.graph}{self.dot}prepack();\n"
-        graph_build += f"{self.graph}{self.dot}encode_execute();\n"
 
         graph_build += "\n"
         return graph_build
diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp
index f3fed8b6622..82df7e7d96f 100644
--- a/backends/vulkan/test/vulkan_compute_api_test.cpp
+++ b/backends/vulkan/test/vulkan_compute_api_test.cpp
@@ -1153,7 +1153,6 @@ TEST(VulkanComputeGraphTest, empty_init_graphnode_test) {
   // Encode an empty ExecuteNode and check that command buffer encoding does not
   // crash.
   graph.execute_nodes().emplace_back(new ExecuteNode(nullptr, {}));
-  EXPECT_NO_FATAL_FAILURE(graph.encode_execute());
 }
 
 TEST(VulkanComputeGraphTest, test_zero_dim_tensor) {
@@ -1178,7 +1177,6 @@ TEST(VulkanComputeGraphTest, test_zero_dim_tensor) {
   out.staging = graph.set_output_tensor(out.value);
 
   graph.prepare();
-  graph.encode_execute();
 
   // Run graph
 
@@ -1221,7 +1219,6 @@ TEST(VulkanComputeGraphTest, test_simple_graph_with_buffer) {
   out.staging = graph.set_output_tensor(out.value);
 
   graph.prepare();
-  graph.encode_execute();
 
   // Run graph
 
@@ -1307,7 +1304,6 @@ TEST(VulkanComputeGraphTest, test_simple_graph) {
   out.staging = graph.set_output_tensor(out.value);
 
   graph.prepare();
-  graph.encode_execute();
 
   // Run graph
 
@@ -1366,7 +1362,6 @@ TEST(VulkanComputeGraphTest, test_simple_graph_with_symint) {
   out.staging = graph.set_output_tensor(out.value);
 
   graph.prepare();
-  graph.encode_execute();
 
   // Run graph
 
@@ -1437,8 +1432,6 @@ TEST(VulkanComputeGraphTest, test_simple_prepacked_graph) {
 
   graph.prepack();
 
-  graph.encode_execute();
-
   // Run graph
 
   for (float i = 5.0f; i < 30.0f; i += 10.0f) {
@@ -1465,6 +1458,7 @@ TEST(VulkanComputeGraphTest, test_simple_prepacked_graph) {
 
 TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) {
   GraphConfig config;
+  config.expect_dynamic_shapes = true;
   ComputeGraph graph(config);
   size_t expected_vma_allocation_count = 0;
 
@@ -1526,7 +1520,6 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) {
   EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count);
 
   graph.prepare();
-  graph.encode_execute();
 
   // +3: shared memory allocations for tensors
   expected_vma_allocation_count += 3;
@@ -1667,7 +1660,6 @@ TEST(VulkanComputeGraphTest, test_simple_graph_with_tmp_tensors) {
   out.staging = graph.set_output_tensor(out.value);
 
   graph.prepare();
-  graph.encode_execute();
 
   // Run graph
 
@@ -1698,6 +1690,7 @@ TEST(VulkanComputeGraphTest, test_simple_graph_with_tmp_tensors) {
 TEST(VulkanComputeGraphTest, test_large_graph) {
   auto build_start_time = std::chrono::system_clock::now();
   GraphConfig config;
+  config.expect_dynamic_shapes = true;
   ComputeGraph graph(config);
 
   int64_t input_w = 256;
@@ -1733,7 +1726,6 @@ TEST(VulkanComputeGraphTest, test_large_graph) {
   out.staging = graph.set_output_tensor(out.value);
 
   graph.prepare();
-  graph.encode_execute();
 
   auto build_end_time = std::chrono::system_clock::now();
 
@@ -1810,7 +1802,6 @@ void test_clone(
   out.staging = graph.set_output_tensor(out.value);
 
   graph.prepare();
-  graph.encode_execute();
 
   fill_vtensor(graph, a, 0.0f, /*iota = */ true);
 
@@ -1895,7 +1886,6 @@ TEST(VulkanComputeGraphTest, test_etvk_copy_offset_node) {
   out.staging = graph.set_output_tensor(out.value);
 
   graph.prepare();
-  graph.encode_execute();
 
   fill_vtensor(graph, a, 0.0f, /*iota = */ true);
 
@@ -1959,7 +1949,6 @@ TEST(VulkanComputeGraphTest, DISABLED_test_etvk_copy_channel_offset_node) {
   out.staging = graph.set_output_tensor(out.value);
 
   graph.prepare();
-  graph.encode_execute();
 
   fill_vtensor(graph, a, 0.0f, true);
 
@@ -2050,7 +2039,6 @@ TEST(
   out.staging = graph.set_output_tensor(out.value);
 
   graph.prepare();
-  graph.encode_execute();
 
   float a_value = 1.0f;
   float b_value = 2.0f;
@@ -2163,7 +2151,6 @@ TEST(VulkanComputeGraphTest, test_etvk_copy_offset_int_node) {
   out.staging = graph.set_output_tensor(out.value);
 
   graph.prepare();
-  graph.encode_execute();
 
   fill_vtensor(graph, a, 0, /*iota = */ true);
 
@@ -2227,7 +2214,6 @@ TEST(VulkanComputeGraphTest, DISABLED_test_etvk_copy_channel_offset_int_node) {
   out.staging = graph.set_output_tensor(out.value);
 
   graph.prepare();
-  graph.encode_execute();
 
   fill_vtensor(graph, a, 0.0f, true);
 
@@ -2287,7 +2273,6 @@ TEST(VulkanComputeGraphTest, test_view_change_packing) {
     out.staging = graph.set_output_tensor(out.value);
 
     graph.prepare();
-    graph.encode_execute();
 
     fill_vtensor(graph, in, 0.0, true);
 
@@ -2446,7 +2431,6 @@ void compute_graph_round_trip_test(
   ValueRef r_staging_out = graph.set_output_tensor(r_tensor);
 
   graph.prepare();
-  graph.encode_execute();
 
   vTensorPtr tensor = graph.get_tensor(r_tensor);
 
@@ -2569,7 +2553,6 @@ void test_binary_op(
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   for (int i = 1; i < 4; i++) {
     float val_arg1 = i + 1.5;
@@ -2644,7 +2627,6 @@ void test_mm(
   graph.prepack();
 
   for (int i = 1; i < 4; i++) {
-    graph.encode_execute();
     if (prepack) {
       float val_mat1 = i;
       float val_out = K * (val_mat1 * 2.0f);
@@ -2723,7 +2705,6 @@ void test_mm_with_resize_reencode(
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   for (int i = 1; i < 4; i++) {
     float val_mat1 = i;
@@ -2801,7 +2782,6 @@ void test_max_pool2d(
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   // Run graph
 
@@ -2880,7 +2860,6 @@ void test_grid_priors(
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   vTensorPtr t_in = graph.get_tensor(in.value);
   vTensorPtr t_out = graph.get_tensor(out.value);
@@ -3050,7 +3029,6 @@ void test_to_copy() {
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
   graph.propagate_resize();
   graph.execute();
 
@@ -3204,6 +3182,7 @@ void add_dynamic_dispatch_test_node(
 vkcompute::ComputeGraph build_dynamic_dispatch_test_graph(int M, int N) {
   using namespace vkcompute;
   GraphConfig config;
+  config.expect_dynamic_shapes = true;
   ComputeGraph graph(config);
 
   vkapi::ScalarType dtype = vkapi::kFloat;
@@ -3237,7 +3216,6 @@ void test_dynamic_dispatch(int M, int N) {
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   for (int i = 1; i < 4; i++) {
     float val_mat1 = i;
@@ -3255,8 +3233,6 @@ void test_dynamic_dispatch(int M, int N) {
   graph.resize_input(1, new_mat2_size);
   graph.propagate_resize();
 
-  graph.encode_execute();
-
   for (int i = 1; i < 4; i++) {
     float val_mat1 = i;
     float val_mat2 = i + 1;