Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 25 additions & 61 deletions backends/test/multi_method_delegate_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,13 @@
#include <thread>
#include <vector>

#include <executorch/backends/xnnpack/runtime/XNNPACKBackend.h>

#include <executorch/runtime/backend/interface.h>
#include <executorch/runtime/backend/options.h>
#include <executorch/runtime/executor/program.h>
#include <executorch/runtime/platform/runtime.h>

#include <executorch/extension/data_loader/file_data_loader.h>
#include <executorch/extension/memory_allocator/malloc_memory_allocator.h>
#include <executorch/extension/runner_util/inputs.h>

using executorch::backends::xnnpack::workspace_sharing_mode_option_key;
using executorch::backends::xnnpack::WorkspaceSharingMode;
using executorch::backends::xnnpack::xnnpack_backend_key;

using executorch::runtime::BackendOptions;
using executorch::runtime::Error;
using executorch::runtime::EValue;
using executorch::runtime::HierarchicalAllocator;
Expand Down Expand Up @@ -135,61 +126,34 @@ class XNNPACKMultiDelegateTest : public ETPTEMethodRunBaseTest {
num_threads = 40;
kMethodName = "forward";
}
};

// This test is to validate the assumption that the delegate is thread safe.
// That includes the following:
// 1. The delegate can be initilized by multiple threads in parallel.
// 2. The delegate can be executed by multiple threads in parallel.
// 3. The delegate can be destroyed by multiple threads in parallel.
// Regardless of the underlying implementation of the delegate.
// This is particularly important when we have shared resources across
// delegate instances through a singleton backend instance.
void runStressTest() {
ASSERT_NE(kTestPTE1Path.size(), 0);
ASSERT_NE(kTestPTE2Path.size(), 0);
ASSERT_NE(num_threads, 0);
ASSERT_NE(kMethodName.size(), 0);

std::vector<std::thread> threads(num_threads);
std::atomic<size_t> count{0};

for (int i = 0; i < num_threads; i++) {
threads[i] = std::thread([&, i]() {
run(i, i % 7 ? kTestPTE1Path : kTestPTE2Path, kMethodName, count);
});
}
for (int i = 0; i < num_threads; i++) {
threads[i].join();
}
ASSERT_EQ(count, num_threads);
// This test is to validate the assumption that the delegate is thread safe.
// That includes the following:
// 1. The delegate can be initilized by multiple threads in parallel.
// 2. The delegate can be executed by multiple threads in parallel.
// 3. The delegate can be destroyed by multiple threads in parallel.
// Regardless of the underlying implementation of the delegate.
// This is particularly important when we have shared resources across
// delegate instances through a singleton backend instance.
TEST_F(XNNPACKMultiDelegateTest, MultipleThreads) {
ASSERT_NE(kTestPTE1Path.size(), 0);
ASSERT_NE(kTestPTE2Path.size(), 0);
ASSERT_NE(num_threads, 0);
ASSERT_NE(kMethodName.size(), 0);

std::vector<std::thread> threads(num_threads);
std::atomic<size_t> count{0};

for (int i = 0; i < num_threads; i++) {
threads[i] = std::thread([&, i]() {
run(i, i % 7 ? kTestPTE1Path : kTestPTE2Path, kMethodName, count);
});
}

void setWorkspaceSharingMode(WorkspaceSharingMode mode) {
executorch::runtime::runtime_init();

BackendOptions<1> backend_options;
backend_options.set_option(
workspace_sharing_mode_option_key, static_cast<int>(mode));

auto status = executorch::runtime::set_option(
xnnpack_backend_key, backend_options.view());
ASSERT_EQ(status, Error::Ok);
for (int i = 0; i < num_threads; i++) {
threads[i].join();
}
};

TEST_F(XNNPACKMultiDelegateTest, MultipleThreadsSharingDisabled) {
setWorkspaceSharingMode(WorkspaceSharingMode::Disabled);
runStressTest();
}

TEST_F(XNNPACKMultiDelegateTest, MultipleThreadsPerModelSharing) {
setWorkspaceSharingMode(WorkspaceSharingMode::PerModel);
runStressTest();
}

TEST_F(XNNPACKMultiDelegateTest, MultipleThreadsGlobalSharing) {
setWorkspaceSharingMode(WorkspaceSharingMode::Global);
runStressTest();
ASSERT_EQ(count, num_threads);
}

// TODO(T208989291): Add more tests here. For example,
Expand Down
14 changes: 11 additions & 3 deletions backends/xnnpack/runtime/XNNCompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1895,16 +1895,24 @@ ET_NODISCARD Error XNNCompiler::compileModel(
xnn_weights_cache_t weights_cache_ptr = nullptr;
#endif

// NOLINTBEGIN(facebook-hte-NullableDereference) - weights cache is allowed to
// be null
#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE
ET_CHECK_OR_RETURN_ERROR(
workspace != nullptr, Internal, "Failed to initialize XNNPACK workspace");
status = xnn_create_runtime_v4(
subgraph.get(),
weights_cache_ptr,
workspace,
::executorch::extension::threadpool::get_pthreadpool(),
runtime_flags,
&runtime_ptr);
// NOLINTEND(facebook-hte-NullableDereference)
#else
status = xnn_create_runtime_v3(
subgraph.get(),
weights_cache_ptr,
::executorch::extension::threadpool::get_pthreadpool(),
runtime_flags,
&runtime_ptr);
#endif

ET_CHECK_OR_RETURN_ERROR(
xnn_status_success == status,
Expand Down
10 changes: 2 additions & 8 deletions backends/xnnpack/runtime/XNNExecutor.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
#pragma once

#include <executorch/backends/xnnpack/runtime/XNNStatus.h>
#include <executorch/backends/xnnpack/runtime/XNNWorkspace.h>
#include <executorch/backends/xnnpack/runtime/profiling/XNNProfiler.h>
#include <executorch/runtime/backend/interface.h>
#include <executorch/runtime/core/error.h>
#include <executorch/runtime/core/exec_aten/util/tensor_util.h>

#include <xnnpack.h>
#include <map>
#include <memory>
#include <vector>

Expand All @@ -35,11 +35,9 @@ class XNNExecutor {
std::vector<uint32_t> output_ids_;
std::vector<xnn_external_value> externals_;
std::vector<std::string> packed_data_names_;
std::shared_ptr<XNNWorkspace> workspace_;

public:
XNNExecutor(std::shared_ptr<XNNWorkspace> workspace)
: workspace_(workspace) {}
XNNExecutor() = default;

inline size_t getNumInputs() {
return input_ids_.size();
Expand All @@ -53,10 +51,6 @@ class XNNExecutor {
return packed_data_names_;
}

inline XNNWorkspace& get_workspace() {
return *workspace_;
}

/**
* Initialize the XNNExecutor with a given runtime and input/output ids.
* The input/output ids are expected to be sorted in order of their
Expand Down
Loading
Loading