From 9f03195cecadbd85f0c9a7b2afcb63befe80c327 Mon Sep 17 00:00:00 2001 From: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com> Date: Wed, 30 Jul 2025 15:09:25 -0700 Subject: [PATCH] [ET-VK] 6/n Split dispatches between multiple command buffers. Replaced `encode_execute` function with `invalidate_execute_encoding` and moved encoding logic to execute function(). This diff removes `encode_execute()` function. Differential Revision: [D78581574](https://our.internmc.facebook.com/intern/diff/D78581574/) [ghstack-poisoned] --- backends/vulkan/runtime/VulkanBackend.cpp | 7 ----- .../vulkan/runtime/graph/ComputeGraph.cpp | 23 +++++++------- backends/vulkan/runtime/graph/ComputeGraph.h | 1 - .../test/op_tests/choose_qparams_test.cpp | 2 -- .../vulkan/test/op_tests/dequantize_test.cpp | 3 -- .../vulkan/test/op_tests/quantize_test.cpp | 3 -- .../test/op_tests/quantized_linear_test.cpp | 5 +--- .../test/op_tests/rotary_embedding_test.cpp | 1 - backends/vulkan/test/op_tests/sdpa_test.cpp | 1 - .../test/op_tests/utils/gen_computegraph.py | 1 - .../vulkan/test/vulkan_compute_api_test.cpp | 30 ++----------------- 11 files changed, 15 insertions(+), 62 deletions(-) diff --git a/backends/vulkan/runtime/VulkanBackend.cpp b/backends/vulkan/runtime/VulkanBackend.cpp index ecdd4f6d2d4..4ff0f9e93d6 100644 --- a/backends/vulkan/runtime/VulkanBackend.cpp +++ b/backends/vulkan/runtime/VulkanBackend.cpp @@ -509,13 +509,6 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface { compute_graph->prepack(); - // If dynamic shapes are not expected, then the command buffer only needs to - // be encoded once. Otherwise, wait until the first inference to encode the - // the command buffer, when actual input shapes are known. - if (!compute_graph->graphconfig().expect_dynamic_shapes) { - compute_graph->encode_execute(); - } - return Error::Ok; } diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp index ee5621d9c12..a1dd4a287c1 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.cpp +++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp @@ -860,21 +860,20 @@ void ComputeGraph::prepack() { staging_nbytes_in_cmd_ = 0; } -void ComputeGraph::encode_execute() { - clear_deferred_cmds(); - context_->flush(); - context_->set_cmd(/*reusable = */ true); +void ComputeGraph::execute() { + if (deferred_cmd_list_.empty()) { + context_->flush(); + context_->set_cmd(/*reusable = */ true); - context_->cmd_reset_querypool(); + context_->cmd_reset_querypool(); - for (std::unique_ptr& node : execute_nodes_) { - node->encode(this); - } + for (std::unique_ptr& node : execute_nodes_) { + node->encode(this); + } - deferred_cmd_list_.emplace_back(std::move(context_->extract_cmd())); -} + deferred_cmd_list_.emplace_back(std::move(context_->extract_cmd())); + } -void ComputeGraph::execute() { submit_deferred_cmds_and_wait(); execute_count_++; } @@ -898,7 +897,7 @@ void ComputeGraph::propagate_resize() { } // Only re-encode on resize if dynamic shapes are expected if (config_.expect_dynamic_shapes) { - encode_execute(); + clear_deferred_cmds(); } } diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h index 4b1089b0de8..7bac9bf92db 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.h +++ b/backends/vulkan/runtime/graph/ComputeGraph.h @@ -892,7 +892,6 @@ class ComputeGraph final { // Graph Execution // - void encode_execute(); void execute(); // diff --git a/backends/vulkan/test/op_tests/choose_qparams_test.cpp b/backends/vulkan/test/op_tests/choose_qparams_test.cpp index f45d4f82448..3b1094a1e84 100644 --- a/backends/vulkan/test/op_tests/choose_qparams_test.cpp +++ b/backends/vulkan/test/op_tests/choose_qparams_test.cpp @@ -458,7 +458,6 @@ void test_vulkan_choose_qparams_tensor_impl( graph.prepare(); graph.prepack(); - graph.encode_execute(); // Run Vulkan choose_qparams_tensor graph.copy_into_staging( @@ -678,7 +677,6 @@ void test_vulkan_choose_qparams_per_token_asymmetric_impl( graph.prepare(); graph.prepack(); - graph.encode_execute(); // Run Vulkan choose_qparams_per_token_asymmetric graph.copy_into_staging( diff --git a/backends/vulkan/test/op_tests/dequantize_test.cpp b/backends/vulkan/test/op_tests/dequantize_test.cpp index 91d49406fbb..9fca2c632d3 100644 --- a/backends/vulkan/test/op_tests/dequantize_test.cpp +++ b/backends/vulkan/test/op_tests/dequantize_test.cpp @@ -1140,7 +1140,6 @@ void test_vulkan_dequantize_per_token_impl( graph.prepare(); graph.prepack(); - graph.encode_execute(); // Copy input data to GPU graph.copy_into_staging( @@ -1671,7 +1670,6 @@ void test_vulkan_dequantize_per_channel_impl( graph.prepare(); graph.prepack(); - graph.encode_execute(); // Copy input data to GPU graph.copy_into_staging( @@ -2345,7 +2343,6 @@ void test_vulkan_dequantize_per_tensor_tensor_impl( graph.prepare(); graph.prepack(); - graph.encode_execute(); // Run Vulkan dequantize_per_tensor.tensor graph.copy_into_staging( diff --git a/backends/vulkan/test/op_tests/quantize_test.cpp b/backends/vulkan/test/op_tests/quantize_test.cpp index 43c97071874..86eebcf9b14 100644 --- a/backends/vulkan/test/op_tests/quantize_test.cpp +++ b/backends/vulkan/test/op_tests/quantize_test.cpp @@ -931,7 +931,6 @@ void test_vulkan_quantize_per_token_impl( graph.prepare(); graph.prepack(); - graph.encode_execute(); // Copy input data to GPU graph.copy_into_staging( @@ -1413,7 +1412,6 @@ void test_vulkan_quantize_per_channel_impl( graph.prepare(); graph.prepack(); - graph.encode_execute(); // Copy input data to GPU graph.copy_into_staging( @@ -2042,7 +2040,6 @@ void test_vulkan_quantize_per_tensor_tensor_impl( graph.prepare(); graph.prepack(); - graph.encode_execute(); // Run Vulkan quantize_per_tensor.tensor graph.copy_into_staging( diff --git a/backends/vulkan/test/op_tests/quantized_linear_test.cpp b/backends/vulkan/test/op_tests/quantized_linear_test.cpp index 26316344b0e..db95f4a793f 100644 --- a/backends/vulkan/test/op_tests/quantized_linear_test.cpp +++ b/backends/vulkan/test/op_tests/quantized_linear_test.cpp @@ -456,7 +456,6 @@ void test_vulkan_linear_qga4w_impl( graph.prepare(); graph.prepack(); - graph.encode_execute(); // // Run model @@ -551,7 +550,6 @@ void test_vulkan_linear_qcs4w_impl( graph.prepare(); graph.prepack(); - graph.encode_execute(); // // Run model @@ -685,7 +683,6 @@ void test_vulkan_linear_qta8a_qga4w_impl( graph.prepare(); graph.prepack(); - graph.encode_execute(); // // Run model @@ -900,4 +897,4 @@ TEST_F(VulkanLinearQTA8AQGA4WTest, test_vulkan_linear_quant_gemv) { /*M = */ 1, /*K = */ 256, /*N = */ 256); -} \ No newline at end of file +} diff --git a/backends/vulkan/test/op_tests/rotary_embedding_test.cpp b/backends/vulkan/test/op_tests/rotary_embedding_test.cpp index 2955a54e5f3..9f9bdef24aa 100644 --- a/backends/vulkan/test/op_tests/rotary_embedding_test.cpp +++ b/backends/vulkan/test/op_tests/rotary_embedding_test.cpp @@ -114,7 +114,6 @@ void test_reference( graph.prepare(); graph.prepack(); - graph.encode_execute(); // // Run model diff --git a/backends/vulkan/test/op_tests/sdpa_test.cpp b/backends/vulkan/test/op_tests/sdpa_test.cpp index 303dc9c85ec..1c5a3ea7b62 100644 --- a/backends/vulkan/test/op_tests/sdpa_test.cpp +++ b/backends/vulkan/test/op_tests/sdpa_test.cpp @@ -352,7 +352,6 @@ void test_vulkan_sdpa( graph.prepare(); graph.prepack(); - graph.encode_execute(); // // Run model diff --git a/backends/vulkan/test/op_tests/utils/gen_computegraph.py b/backends/vulkan/test/op_tests/utils/gen_computegraph.py index 08eb3b61c36..4fba14ca16e 100644 --- a/backends/vulkan/test/op_tests/utils/gen_computegraph.py +++ b/backends/vulkan/test/op_tests/utils/gen_computegraph.py @@ -682,7 +682,6 @@ def gen_graph_build_code(self, include_declarations: bool = True) -> str: graph_build += f"{self.graph}{self.dot}prepare();\n" graph_build += f"{self.graph}{self.dot}prepack();\n" - graph_build += f"{self.graph}{self.dot}encode_execute();\n" graph_build += "\n" return graph_build diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp index f3fed8b6622..82df7e7d96f 100644 --- a/backends/vulkan/test/vulkan_compute_api_test.cpp +++ b/backends/vulkan/test/vulkan_compute_api_test.cpp @@ -1153,7 +1153,6 @@ TEST(VulkanComputeGraphTest, empty_init_graphnode_test) { // Encode an empty ExecuteNode and check that command buffer encoding does not // crash. graph.execute_nodes().emplace_back(new ExecuteNode(nullptr, {})); - EXPECT_NO_FATAL_FAILURE(graph.encode_execute()); } TEST(VulkanComputeGraphTest, test_zero_dim_tensor) { @@ -1178,7 +1177,6 @@ TEST(VulkanComputeGraphTest, test_zero_dim_tensor) { out.staging = graph.set_output_tensor(out.value); graph.prepare(); - graph.encode_execute(); // Run graph @@ -1221,7 +1219,6 @@ TEST(VulkanComputeGraphTest, test_simple_graph_with_buffer) { out.staging = graph.set_output_tensor(out.value); graph.prepare(); - graph.encode_execute(); // Run graph @@ -1307,7 +1304,6 @@ TEST(VulkanComputeGraphTest, test_simple_graph) { out.staging = graph.set_output_tensor(out.value); graph.prepare(); - graph.encode_execute(); // Run graph @@ -1366,7 +1362,6 @@ TEST(VulkanComputeGraphTest, test_simple_graph_with_symint) { out.staging = graph.set_output_tensor(out.value); graph.prepare(); - graph.encode_execute(); // Run graph @@ -1437,8 +1432,6 @@ TEST(VulkanComputeGraphTest, test_simple_prepacked_graph) { graph.prepack(); - graph.encode_execute(); - // Run graph for (float i = 5.0f; i < 30.0f; i += 10.0f) { @@ -1465,6 +1458,7 @@ TEST(VulkanComputeGraphTest, test_simple_prepacked_graph) { TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) { GraphConfig config; + config.expect_dynamic_shapes = true; ComputeGraph graph(config); size_t expected_vma_allocation_count = 0; @@ -1526,7 +1520,6 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) { EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count); graph.prepare(); - graph.encode_execute(); // +3: shared memory allocations for tensors expected_vma_allocation_count += 3; @@ -1667,7 +1660,6 @@ TEST(VulkanComputeGraphTest, test_simple_graph_with_tmp_tensors) { out.staging = graph.set_output_tensor(out.value); graph.prepare(); - graph.encode_execute(); // Run graph @@ -1698,6 +1690,7 @@ TEST(VulkanComputeGraphTest, test_simple_graph_with_tmp_tensors) { TEST(VulkanComputeGraphTest, test_large_graph) { auto build_start_time = std::chrono::system_clock::now(); GraphConfig config; + config.expect_dynamic_shapes = true; ComputeGraph graph(config); int64_t input_w = 256; @@ -1733,7 +1726,6 @@ TEST(VulkanComputeGraphTest, test_large_graph) { out.staging = graph.set_output_tensor(out.value); graph.prepare(); - graph.encode_execute(); auto build_end_time = std::chrono::system_clock::now(); @@ -1810,7 +1802,6 @@ void test_clone( out.staging = graph.set_output_tensor(out.value); graph.prepare(); - graph.encode_execute(); fill_vtensor(graph, a, 0.0f, /*iota = */ true); @@ -1895,7 +1886,6 @@ TEST(VulkanComputeGraphTest, test_etvk_copy_offset_node) { out.staging = graph.set_output_tensor(out.value); graph.prepare(); - graph.encode_execute(); fill_vtensor(graph, a, 0.0f, /*iota = */ true); @@ -1959,7 +1949,6 @@ TEST(VulkanComputeGraphTest, DISABLED_test_etvk_copy_channel_offset_node) { out.staging = graph.set_output_tensor(out.value); graph.prepare(); - graph.encode_execute(); fill_vtensor(graph, a, 0.0f, true); @@ -2050,7 +2039,6 @@ TEST( out.staging = graph.set_output_tensor(out.value); graph.prepare(); - graph.encode_execute(); float a_value = 1.0f; float b_value = 2.0f; @@ -2163,7 +2151,6 @@ TEST(VulkanComputeGraphTest, test_etvk_copy_offset_int_node) { out.staging = graph.set_output_tensor(out.value); graph.prepare(); - graph.encode_execute(); fill_vtensor(graph, a, 0, /*iota = */ true); @@ -2227,7 +2214,6 @@ TEST(VulkanComputeGraphTest, DISABLED_test_etvk_copy_channel_offset_int_node) { out.staging = graph.set_output_tensor(out.value); graph.prepare(); - graph.encode_execute(); fill_vtensor(graph, a, 0.0f, true); @@ -2287,7 +2273,6 @@ TEST(VulkanComputeGraphTest, test_view_change_packing) { out.staging = graph.set_output_tensor(out.value); graph.prepare(); - graph.encode_execute(); fill_vtensor(graph, in, 0.0, true); @@ -2446,7 +2431,6 @@ void compute_graph_round_trip_test( ValueRef r_staging_out = graph.set_output_tensor(r_tensor); graph.prepare(); - graph.encode_execute(); vTensorPtr tensor = graph.get_tensor(r_tensor); @@ -2569,7 +2553,6 @@ void test_binary_op( graph.prepare(); graph.prepack(); - graph.encode_execute(); for (int i = 1; i < 4; i++) { float val_arg1 = i + 1.5; @@ -2644,7 +2627,6 @@ void test_mm( graph.prepack(); for (int i = 1; i < 4; i++) { - graph.encode_execute(); if (prepack) { float val_mat1 = i; float val_out = K * (val_mat1 * 2.0f); @@ -2723,7 +2705,6 @@ void test_mm_with_resize_reencode( graph.prepare(); graph.prepack(); - graph.encode_execute(); for (int i = 1; i < 4; i++) { float val_mat1 = i; @@ -2801,7 +2782,6 @@ void test_max_pool2d( graph.prepare(); graph.prepack(); - graph.encode_execute(); // Run graph @@ -2880,7 +2860,6 @@ void test_grid_priors( graph.prepare(); graph.prepack(); - graph.encode_execute(); vTensorPtr t_in = graph.get_tensor(in.value); vTensorPtr t_out = graph.get_tensor(out.value); @@ -3050,7 +3029,6 @@ void test_to_copy() { graph.prepare(); graph.prepack(); - graph.encode_execute(); graph.propagate_resize(); graph.execute(); @@ -3204,6 +3182,7 @@ void add_dynamic_dispatch_test_node( vkcompute::ComputeGraph build_dynamic_dispatch_test_graph(int M, int N) { using namespace vkcompute; GraphConfig config; + config.expect_dynamic_shapes = true; ComputeGraph graph(config); vkapi::ScalarType dtype = vkapi::kFloat; @@ -3237,7 +3216,6 @@ void test_dynamic_dispatch(int M, int N) { graph.prepare(); graph.prepack(); - graph.encode_execute(); for (int i = 1; i < 4; i++) { float val_mat1 = i; @@ -3255,8 +3233,6 @@ void test_dynamic_dispatch(int M, int N) { graph.resize_input(1, new_mat2_size); graph.propagate_resize(); - graph.encode_execute(); - for (int i = 1; i < 4; i++) { float val_mat1 = i; float val_mat2 = i + 1;