pytorch · facebook-github-bot · Aug 1, 2025 · Jul 30, 2025 · Jul 31, 2025 · Jul 31, 2025
@@ -509,13 +509,6 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
 
     compute_graph->prepack();
 
-    // If dynamic shapes are not expected, then the command buffer only needs to
-    // be encoded once. Otherwise, wait until the first inference to encode the
-    // the command buffer, when actual input shapes are known.
-    if (!compute_graph->graphconfig().expect_dynamic_shapes) {
-      compute_graph->encode_execute();
-    }
-
     return Error::Ok;
   }
 

@@ -860,21 +860,20 @@ void ComputeGraph::prepack() {
   staging_nbytes_in_cmd_ = 0;
 }
 
-void ComputeGraph::encode_execute() {
-  clear_deferred_cmds();
-  context_->flush();
-  context_->set_cmd(/*reusable = */ true);
+void ComputeGraph::execute() {
+  if (deferred_cmd_list_.empty()) {
+    context_->flush();
+    context_->set_cmd(/*reusable = */ true);
 
-  context_->cmd_reset_querypool();
+    context_->cmd_reset_querypool();
 
-  for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
-    node->encode(this);
-  }
+    for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
+      node->encode(this);
+    }
 
-  deferred_cmd_list_.emplace_back(std::move(context_->extract_cmd()));
-}
+    deferred_cmd_list_.emplace_back(std::move(context_->extract_cmd()));
+  }
 
-void ComputeGraph::execute() {
   submit_deferred_cmds_and_wait();
   execute_count_++;
 }
@@ -898,7 +897,7 @@ void ComputeGraph::propagate_resize() {
   }
   // Only re-encode on resize if dynamic shapes are expected
   if (config_.expect_dynamic_shapes) {
-    encode_execute();
+    clear_deferred_cmds();
   }
 }
 

@@ -892,7 +892,6 @@ class ComputeGraph final {
   // Graph Execution
   //
 
-  void encode_execute();
   void execute();
 
   //

@@ -458,7 +458,6 @@ void test_vulkan_choose_qparams_tensor_impl(
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   // Run Vulkan choose_qparams_tensor
   graph.copy_into_staging(
@@ -678,7 +677,6 @@ void test_vulkan_choose_qparams_per_token_asymmetric_impl(
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   // Run Vulkan choose_qparams_per_token_asymmetric
   graph.copy_into_staging(

@@ -1140,7 +1140,6 @@ void test_vulkan_dequantize_per_token_impl(
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   // Copy input data to GPU
   graph.copy_into_staging(
@@ -1671,7 +1670,6 @@ void test_vulkan_dequantize_per_channel_impl(
 
   graph.prepare();
   graph.prepack();
-  graph.encode_execute();
 
   // Copy input data to GPU
   graph.copy_into_staging(
@@ -2345,7 +2343,6 @@ void test_vulkan_dequantize_per_tensor_tensor_impl(
 
   graph.prepare();
   graph.prepack();
-  graph.encode_execute();
 
   // Run Vulkan dequantize_per_tensor.tensor
   graph.copy_into_staging(

@@ -491,7 +491,6 @@ void test_vulkan_quantize_affine_impl(
 
   graph.prepare();
   graph.prepack();
-  graph.encode_execute();
 
   // Copy input data to GPU
   graph.copy_into_staging(
@@ -789,7 +788,6 @@ void test_vulkan_dequantize_affine_impl(
 
   graph.prepare();
   graph.prepack();
-  graph.encode_execute();
 
   // Copy input data to GPU
   graph.copy_into_staging(
@@ -1079,7 +1077,6 @@ void test_vulkan_choose_qparams_affine_impl(
 
   graph.prepare();
   graph.prepack();
-  graph.encode_execute();
 
   // Copy input data to GPU
   graph.copy_into_staging(

@@ -931,7 +931,6 @@ void test_vulkan_quantize_per_token_impl(
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   // Copy input data to GPU
   graph.copy_into_staging(
@@ -1413,7 +1412,6 @@ void test_vulkan_quantize_per_channel_impl(
 
   graph.prepare();
   graph.prepack();
-  graph.encode_execute();
 
   // Copy input data to GPU
   graph.copy_into_staging(
@@ -2042,7 +2040,6 @@ void test_vulkan_quantize_per_tensor_tensor_impl(
 
   graph.prepare();
   graph.prepack();
-  graph.encode_execute();
 
   // Run Vulkan quantize_per_tensor.tensor
   graph.copy_into_staging(

@@ -456,7 +456,6 @@ void test_vulkan_linear_qga4w_impl(
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   //
   // Run model
@@ -551,7 +550,6 @@ void test_vulkan_linear_qcs4w_impl(
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   //
   // Run model
@@ -685,7 +683,6 @@ void test_vulkan_linear_qta8a_qga4w_impl(
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   //
   // Run model
@@ -900,4 +897,4 @@ TEST_F(VulkanLinearQTA8AQGA4WTest, test_vulkan_linear_quant_gemv) {
       /*M = */ 1,
       /*K = */ 256,
       /*N = */ 256);
-}
+}
@@ -114,7 +114,6 @@ void test_reference(
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   //
   // Run model

@@ -352,7 +352,6 @@ void test_vulkan_sdpa(
   graph.prepare();
 
   graph.prepack();
-  graph.encode_execute();
 
   //
   // Run model
@@ -586,7 +585,6 @@ void test_vulkan_flash_attention(
   graph.prepare();
   graph.encode_prepack();
   graph.prepack();
-  graph.encode_execute();
 
   // Copy inputs and run
   graph.copy_into_staging(r_q.staging, q.const_data_ptr(), q.numel());
@@ -845,7 +843,6 @@ void test_reference_flash_attention(
   graph.prepare();
   graph.encode_prepack();
   graph.prepack();
-  graph.encode_execute();
 
   graph.copy_into_staging(r_q.staging, q.const_data_ptr(), q.numel());
   graph.copy_into_staging(r_k.staging, k.const_data_ptr(), k.numel());

@@ -682,7 +682,6 @@ def gen_graph_build_code(self, include_declarations: bool = True) -> str:
 
         graph_build += f"{self.graph}{self.dot}prepare();\n"
         graph_build += f"{self.graph}{self.dot}prepack();\n"
-        graph_build += f"{self.graph}{self.dot}encode_execute();\n"
 
         graph_build += "\n"
         return graph_build
-Original file line number
+Diff line change
@@ Expand Up / @@ -892,7 +892,6 @@ class ComputeGraph final { @@
       // Graph Execution
       //
-      void encode_execute();
       void execute();
       //
@@ Expand Down @@