pytorch · facebook-github-bot · Sep 4, 2025 · Sep 3, 2025
@@ -5,22 +5,13 @@
 #include <thread>
 #include <vector>
 
-#include <executorch/backends/xnnpack/runtime/XNNPACKBackend.h>
-
-#include <executorch/runtime/backend/interface.h>
-#include <executorch/runtime/backend/options.h>
 #include <executorch/runtime/executor/program.h>
 #include <executorch/runtime/platform/runtime.h>
 
 #include <executorch/extension/data_loader/file_data_loader.h>
 #include <executorch/extension/memory_allocator/malloc_memory_allocator.h>
 #include <executorch/extension/runner_util/inputs.h>
 
-using executorch::backends::xnnpack::workspace_sharing_mode_option_key;
-using executorch::backends::xnnpack::WorkspaceSharingMode;
-using executorch::backends::xnnpack::xnnpack_backend_key;
-
-using executorch::runtime::BackendOptions;
 using executorch::runtime::Error;
 using executorch::runtime::EValue;
 using executorch::runtime::HierarchicalAllocator;
@@ -135,61 +126,34 @@ class XNNPACKMultiDelegateTest : public ETPTEMethodRunBaseTest {
     num_threads = 40;
     kMethodName = "forward";
   }
+};
 
-  // This test is to validate the assumption that the delegate is thread safe.
-  // That includes the following:
-  // 1. The delegate can be initilized by multiple threads in parallel.
-  // 2. The delegate can be executed by multiple threads in parallel.
-  // 3. The delegate can be destroyed by multiple threads in parallel.
-  // Regardless of the underlying implementation of the delegate.
-  // This is particularly important when we have shared resources across
-  // delegate instances through a singleton backend instance.
-  void runStressTest() {
-    ASSERT_NE(kTestPTE1Path.size(), 0);
-    ASSERT_NE(kTestPTE2Path.size(), 0);
-    ASSERT_NE(num_threads, 0);
-    ASSERT_NE(kMethodName.size(), 0);
-
-    std::vector<std::thread> threads(num_threads);
-    std::atomic<size_t> count{0};
-
-    for (int i = 0; i < num_threads; i++) {
-      threads[i] = std::thread([&, i]() {
-        run(i, i % 7 ? kTestPTE1Path : kTestPTE2Path, kMethodName, count);
-      });
-    }
-    for (int i = 0; i < num_threads; i++) {
-      threads[i].join();
-    }
-    ASSERT_EQ(count, num_threads);
+// This test is to validate the assumption that the delegate is thread safe.
+// That includes the following:
+// 1. The delegate can be initilized by multiple threads in parallel.
+// 2. The delegate can be executed by multiple threads in parallel.
+// 3. The delegate can be destroyed by multiple threads in parallel.
+// Regardless of the underlying implementation of the delegate.
+// This is particularly important when we have shared resources across
+// delegate instances through a singleton backend instance.
+TEST_F(XNNPACKMultiDelegateTest, MultipleThreads) {
+  ASSERT_NE(kTestPTE1Path.size(), 0);
+  ASSERT_NE(kTestPTE2Path.size(), 0);
+  ASSERT_NE(num_threads, 0);
+  ASSERT_NE(kMethodName.size(), 0);
+
+  std::vector<std::thread> threads(num_threads);
+  std::atomic<size_t> count{0};
+
+  for (int i = 0; i < num_threads; i++) {
+    threads[i] = std::thread([&, i]() {
+      run(i, i % 7 ? kTestPTE1Path : kTestPTE2Path, kMethodName, count);
+    });
   }
-
-  void setWorkspaceSharingMode(WorkspaceSharingMode mode) {
-    executorch::runtime::runtime_init();
-
-    BackendOptions<1> backend_options;
-    backend_options.set_option(
-        workspace_sharing_mode_option_key, static_cast<int>(mode));
-
-    auto status = executorch::runtime::set_option(
-        xnnpack_backend_key, backend_options.view());
-    ASSERT_EQ(status, Error::Ok);
+  for (int i = 0; i < num_threads; i++) {
+    threads[i].join();
   }
-};
-
-TEST_F(XNNPACKMultiDelegateTest, MultipleThreadsSharingDisabled) {
-  setWorkspaceSharingMode(WorkspaceSharingMode::Disabled);
-  runStressTest();
-}
-
-TEST_F(XNNPACKMultiDelegateTest, MultipleThreadsPerModelSharing) {
-  setWorkspaceSharingMode(WorkspaceSharingMode::PerModel);
-  runStressTest();
-}
-
-TEST_F(XNNPACKMultiDelegateTest, MultipleThreadsGlobalSharing) {
-  setWorkspaceSharingMode(WorkspaceSharingMode::Global);
-  runStressTest();
+  ASSERT_EQ(count, num_threads);
 }
 
 // TODO(T208989291): Add more tests here. For example,

@@ -1895,16 +1895,24 @@ ET_NODISCARD Error XNNCompiler::compileModel(
   xnn_weights_cache_t weights_cache_ptr = nullptr;
 #endif
 
-  // NOLINTBEGIN(facebook-hte-NullableDereference) - weights cache is allowed to
-  // be null
+#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE
+  ET_CHECK_OR_RETURN_ERROR(
+      workspace != nullptr, Internal, "Failed to initialize XNNPACK workspace");
   status = xnn_create_runtime_v4(
       subgraph.get(),
       weights_cache_ptr,
       workspace,
       ::executorch::extension::threadpool::get_pthreadpool(),
       runtime_flags,
       &runtime_ptr);
-  // NOLINTEND(facebook-hte-NullableDereference)
+#else
+  status = xnn_create_runtime_v3(
+      subgraph.get(),
+      weights_cache_ptr,
+      ::executorch::extension::threadpool::get_pthreadpool(),
+      runtime_flags,
+      &runtime_ptr);
+#endif
 
   ET_CHECK_OR_RETURN_ERROR(
       xnn_status_success == status,

@@ -9,13 +9,13 @@
 #pragma once
 
 #include <executorch/backends/xnnpack/runtime/XNNStatus.h>
-#include <executorch/backends/xnnpack/runtime/XNNWorkspace.h>
 #include <executorch/backends/xnnpack/runtime/profiling/XNNProfiler.h>
 #include <executorch/runtime/backend/interface.h>
 #include <executorch/runtime/core/error.h>
 #include <executorch/runtime/core/exec_aten/util/tensor_util.h>
 
 #include <xnnpack.h>
+#include <map>
 #include <memory>
 #include <vector>
 
@@ -35,11 +35,9 @@ class XNNExecutor {
   std::vector<uint32_t> output_ids_;
   std::vector<xnn_external_value> externals_;
   std::vector<std::string> packed_data_names_;
-  std::shared_ptr<XNNWorkspace> workspace_;
 
  public:
-  XNNExecutor(std::shared_ptr<XNNWorkspace> workspace)
-      : workspace_(workspace) {}
+  XNNExecutor() = default;
 
   inline size_t getNumInputs() {
     return input_ids_.size();
@@ -53,10 +51,6 @@ class XNNExecutor {
     return packed_data_names_;
   }
 
-  inline XNNWorkspace& get_workspace() {
-    return *workspace_;
-  }
-
   /**
    * Initialize the XNNExecutor with a given runtime and input/output ids.
    * The input/output ids are expected to be sorted in order of their