From 9473642464cc876aac45d52d766e134aa3819f49 Mon Sep 17 00:00:00 2001 From: shewu-quic Date: Thu, 14 Aug 2025 11:38:03 +0800 Subject: [PATCH] Qualcomm AI Engine Direct - Fix mem_handel register twice issue Summary: - Insert registered handle in pre_registered_handles_ map to avoid register multiple times for the same data_ptr Background: When running llama in lookahead mode using the same AR-N model for both the prompt processor and token generator. The input and output are the same, and the kv cache is shared between both components.. This causes a "register twice" error message from QNN when a shared buffer (Smart Mask) is used. --- backends/qualcomm/runtime/QnnManager.cpp | 3 ++- backends/qualcomm/runtime/backends/QnnMemManager.cpp | 6 +++++- backends/qualcomm/runtime/backends/QnnMemManager.h | 5 ++++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/backends/qualcomm/runtime/QnnManager.cpp b/backends/qualcomm/runtime/QnnManager.cpp index be9e5fcd58f..5e3220f25d9 100644 --- a/backends/qualcomm/runtime/QnnManager.cpp +++ b/backends/qualcomm/runtime/QnnManager.cpp @@ -291,7 +291,8 @@ Error QnnManager::RegisterCustomMem( data_ptr, unaligned_custom_mem_base, total_custom_mem_size, - tensor_offset) == Error::Ok, + tensor_offset, + info) == Error::Ok, Internal, "Fail to register to shared memory."); diff --git a/backends/qualcomm/runtime/backends/QnnMemManager.cpp b/backends/qualcomm/runtime/backends/QnnMemManager.cpp index e09d071075b..3b99dd10868 100644 --- a/backends/qualcomm/runtime/backends/QnnMemManager.cpp +++ b/backends/qualcomm/runtime/backends/QnnMemManager.cpp @@ -56,13 +56,16 @@ Error QnnMemManager::RegisterIonMem( return Error::Ok; } +// TODO: Find a better way to unify RegisterCustomMem and +// PreRegisterCustomMemHandle Error QnnMemManager::RegisterCustomMem( const std::shared_ptr& tensor_wrapper, int32_t mem_fd, void* mem_ptr, void* unaligned_custom_mem_base, size_t total_custom_mem_size, - size_t tensor_offset) { + size_t tensor_offset, + const CustomMemTensorInfo& info) { const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); Qnn_MemDescriptor_t descriptor = { {tensor_wrapper->GetRank(), tensor_wrapper->GetDims(), nullptr}, @@ -94,6 +97,7 @@ Error QnnMemManager::RegisterCustomMem( return Error::Internal; } tensor_wrapper->SetMemHandle(handle); + pre_registered_handles_.insert({info, handle}); registered_map_.insert({handle, mem_ptr}); if (log_level_ >= QnnExecuTorchLogLevel::kLogLevelInfo) { QNN_EXECUTORCH_LOG_INFO( diff --git a/backends/qualcomm/runtime/backends/QnnMemManager.h b/backends/qualcomm/runtime/backends/QnnMemManager.h index 30bb64d78ad..6a7f00b016a 100644 --- a/backends/qualcomm/runtime/backends/QnnMemManager.h +++ b/backends/qualcomm/runtime/backends/QnnMemManager.h @@ -41,7 +41,8 @@ class QnnMemManager { void* mem_ptr, void* unaligned_custom_mem_base, size_t total_custom_mem_size, - size_t tensor_offset); + size_t tensor_offset, + const CustomMemTensorInfo& info); // Pre-register custom mem handle from SharedBuffer. Bring forward the // memHandle creating time from execution to initialization. @@ -67,7 +68,9 @@ class QnnMemManager { const QnnImplementation& implementation_; QnnContext* context_; QnnExecuTorchLogLevel log_level_; + // Store the registered Qnn_MemHandle_t for de-registration std::unordered_map registered_map_; + // Store the pre-registered custom mem handles std::unordered_map pre_registered_handles_; std::unordered_map scalar_type_to_qnn_dtype_ = {