From 3cd186896ffd98a83f73ff396e0f2298ed22789a Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Wed, 20 May 2026 12:17:44 -0700 Subject: [PATCH] =?UTF-8?q?Revert=20"Qualcomm=20AI=20Engine=20Direct=20-?= =?UTF-8?q?=20heap=20profiling=20at=20runtime=20with=20HTP=20backen?= =?UTF-8?q?=E2=80=A6"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 885ebb9588bf297f85964e0a1a14d68e21ab81df. --- backends/qualcomm/debugger/README.md | 78 +------ backends/qualcomm/export_utils.py | 5 - .../qualcomm/runtime/QnnBackendOptions.cpp | 8 - backends/qualcomm/runtime/QnnBackendOptions.h | 13 -- backends/qualcomm/runtime/QnnExecuTorch.h | 1 - .../qualcomm/runtime/QnnExecuTorchBackend.cpp | 12 -- .../qualcomm/runtime/QnnExecuTorchBackend.h | 1 - .../runtime/backends/QnnBackendFactory.cpp | 3 +- .../runtime/backends/QnnContextCommon.cpp | 73 +------ .../runtime/backends/QnnContextCommon.h | 23 +- .../qualcomm/runtime/backends/QnnProfiler.cpp | 196 +++++------------- .../qualcomm/runtime/backends/QnnProfiler.h | 8 - .../runtime/backends/gpu/GpuContext.cpp | 8 +- .../runtime/backends/htp/HtpContext.h | 15 +- .../backends/htp/HtpContextCustomConfig.h | 11 +- .../htp/host/HtpContextCustomConfig.cpp | 1 - .../htp/target/HtpContextCustomConfig.cpp | 11 - .../backends/ir/host/QnnDlcManager.cpp | 3 +- .../runtime/backends/lpai/LpaiContext.cpp | 8 +- backends/qualcomm/tests/test_qnn_delegate.py | 73 ------- backends/qualcomm/tests/utils.py | 37 ---- .../executor_runner/qnn_executor_runner.cpp | 21 +- 22 files changed, 70 insertions(+), 539 deletions(-) diff --git a/backends/qualcomm/debugger/README.md b/backends/qualcomm/debugger/README.md index 8300920d1d5..bda3937bc53 100644 --- a/backends/qualcomm/debugger/README.md +++ b/backends/qualcomm/debugger/README.md @@ -78,7 +78,7 @@ qairt_visualizer.view(reports=[optrace, qhas]) - `model`: Path to your QNN model file (e.g., `path_to_your_model.dlc`). - **`reports`**: List of report file paths, including the optrace (`optrace.json`) and QHAS (`optrace_qnn_htp_analysis_summary.json`). -Note: Files ending with `.bin` do not support graph visualization in qairt_visualizer. +Note: Files ending with `.bin ` do not support graph visualization in qairt_visualizer. ## Demo @@ -266,79 +266,3 @@ python -m examples.qualcomm.util_scripts.qnn_intermediate_debugger_demo -b build 3. Does not support graphs with partitions (partial delegation). 4. Does not support LLM models. 5. Does not support graphs with multiple methods. - - -## ExecuTorch QNN HTP Heap Profiling - -Measures DSP memory usage when using context binary models on the HTP backend. - -### Introduction - -DSP heap profiling is available for `QnnContext_createFromBinary` use-cases. It captures total DSP heap usage at two checkpoints: - -- **Before the first context is created** (`before_context_created`) -- **After the last context is freed** (`after_context_freed`) - -The difference between the two values represents heap consumed during context execution. The value after freeing is typically equal to or greater than before creation. - -### Instructions - -#### Run the example test - -```bash -python backends/qualcomm/tests/test_qnn_delegate.py \ - TestQNNQuantizedUtils.test_qnn_backend_runtime_option_heap_profile \ - -b build-android -H ${HOST} -s ${SN} -m ${SOC_MODEL} -``` - -See [test_qnn_delegate.py](../tests/test_qnn_delegate.py) for the full test implementation. - -#### Setting - -```python -from executorch.backends.qualcomm.utils.utils import generate_htp_compiler_spec -from executorch.backends.qualcomm.utils.utils import generate_qnn_executorch_compiler_spec - -backend_options = generate_htp_compiler_spec( - use_multi_contexts=True, -) - -compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.chipset_table[TestQNN.soc_model], - backend_options=backend_options, - profile_level=2, -) - -# ... - -self.verify_output( - module, - sample_input, - exec_prog, - save_heap_result=True, -) -``` - -#### Output file format - -The result is written to a text file (default: `htp_heap_usage.txt`) with two lines: - -``` -DSP:before_context_created (bytes), -DSP:after_context_freed (bytes), -``` - -#### Reference result - -Measured on SM8850. A difference of 0 means no additional heap is consumed during context binary execution. - -```console -First value (before_context_created): 928212 bytes -Second value (after_context_freed): 928212 bytes -difference: 0.00 bytes -``` - -### Limitations - -1. Only supported HTP backend on Android and QNX platforms. -2. By enabling this feature, initialization and cleanup time might be impacted. diff --git a/backends/qualcomm/export_utils.py b/backends/qualcomm/export_utils.py index 1bca168ad3f..9c86d2d6640 100644 --- a/backends/qualcomm/export_utils.py +++ b/backends/qualcomm/export_utils.py @@ -493,11 +493,6 @@ def pull_debug_output(self, etdump_path, debug_ouput_path, callback=None): if callback: callback() - def pull_heap_output(self, src_file_path, dst_folder, callback=None): - self._adb(["pull", src_file_path, dst_folder]) - if callback: - callback() - def build_executorch_binary( model: torch.nn.Module, # noqa: B006 diff --git a/backends/qualcomm/runtime/QnnBackendOptions.cpp b/backends/qualcomm/runtime/QnnBackendOptions.cpp index 2117932bddc..0eb678b45e2 100644 --- a/backends/qualcomm/runtime/QnnBackendOptions.cpp +++ b/backends/qualcomm/runtime/QnnBackendOptions.cpp @@ -52,14 +52,6 @@ template QnnExecuTorchProfileLevel get_option( QnnExecuTorchProfileLevel, const char*); -executorch::runtime::Error get_runtime_option( - const char* key, - executorch::runtime::BackendOption& backend_option) { - std::strncpy(backend_option.key, key, runtime::kMaxOptionKeyLength); - backend_option.key[runtime::kMaxOptionKeyLength - 1] = '\0'; - return get_option(QNN_BACKEND, backend_option); -} - } // namespace qnn } // namespace backends } // namespace executorch diff --git a/backends/qualcomm/runtime/QnnBackendOptions.h b/backends/qualcomm/runtime/QnnBackendOptions.h index 93e0de1fb61..c366755edd0 100644 --- a/backends/qualcomm/runtime/QnnBackendOptions.h +++ b/backends/qualcomm/runtime/QnnBackendOptions.h @@ -37,19 +37,6 @@ struct RuntimeOption { template T get_option(T aot_option, const char* aot_key); -/** - * @brief - * Get the backend option. - * This method checks runtime option only. - * - * @param key The key of runtime option. - * @param backend_option The backend_option to be restored in runtime. - */ - -executorch::runtime::Error get_runtime_option( - const char* key, - executorch::runtime::BackendOption& backend_option); - } // namespace qnn } // namespace backends } // namespace executorch diff --git a/backends/qualcomm/runtime/QnnExecuTorch.h b/backends/qualcomm/runtime/QnnExecuTorch.h index 9699e5b4735..8a0ee3fed4b 100644 --- a/backends/qualcomm/runtime/QnnExecuTorch.h +++ b/backends/qualcomm/runtime/QnnExecuTorch.h @@ -25,7 +25,6 @@ #define QNN_RUNTIME_LPAI_CLIENT_PERF_TYPE "qnn_runtime_lpai_client_perf_type" #define QNN_RUNTIME_LPAI_AFFINITY "qnn_runtime_lpai_affinity" #define QNN_RUNTIME_LPAI_CORE_SELECTION "qnn_runtime_lpai_core_selection" -#define QNN_RUNTIME_HEAP_PROFILING_PATH "qnn_runtime_heap_profiling_path" #ifdef __cplusplus extern "C" { diff --git a/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp b/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp index fdd70c0a8db..33cca5350d9 100644 --- a/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp +++ b/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp @@ -245,13 +245,6 @@ executorch::runtime::Error QnnExecuTorchBackend::set_option( qnn_runtime_lpai_core_selection_.value = *val; qnn_runtime_lpai_core_selection_.is_set = true; } - } else if (strcmp(option.key, QNN_RUNTIME_HEAP_PROFILING_PATH) == 0) { - if (auto* val = - std::get_if>( - &option.value)) { - qnn_runtime_heap_profiling_path_.value = *val; - qnn_runtime_heap_profiling_path_.is_set = true; - } } else { ET_LOG( Error, @@ -275,7 +268,6 @@ executorch::runtime::Error QnnExecuTorchBackend::get_option( executorch::runtime::BackendOptionContext& context, executorch::runtime::Span& backend_options) { - std::lock_guard guard(runtime_option_mutex_); size_t matches = backend_options.size(); for (size_t i = 0; i < backend_options.size(); ++i) { // Set the value to what was stored by set_option @@ -311,10 +303,6 @@ executorch::runtime::Error QnnExecuTorchBackend::get_option( strcmp(backend_options[i].key, QNN_RUNTIME_LPAI_CORE_SELECTION) == 0 && qnn_runtime_lpai_core_selection_.is_set) { backend_options[i].value = qnn_runtime_lpai_core_selection_.value; - } else if ( - strcmp(backend_options[i].key, QNN_RUNTIME_HEAP_PROFILING_PATH) == 0 && - qnn_runtime_heap_profiling_path_.is_set) { - backend_options[i].value = qnn_runtime_heap_profiling_path_.value; } else { // either runtime never called set_option or key does not exist matches--; diff --git a/backends/qualcomm/runtime/QnnExecuTorchBackend.h b/backends/qualcomm/runtime/QnnExecuTorchBackend.h index e3548c8752b..942e61e2267 100644 --- a/backends/qualcomm/runtime/QnnExecuTorchBackend.h +++ b/backends/qualcomm/runtime/QnnExecuTorchBackend.h @@ -71,7 +71,6 @@ class QnnExecuTorchBackend final RuntimeOption qnn_runtime_lpai_client_perf_type_{false, 0}; RuntimeOption qnn_runtime_lpai_affinity_{false, 0}; RuntimeOption qnn_runtime_lpai_core_selection_{false, 0}; - RuntimeOption qnn_runtime_heap_profiling_path_{false, {}}; }; } // namespace qnn diff --git a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp index 4e819a43121..fa2008befd5 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp +++ b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp @@ -71,8 +71,7 @@ std::unique_ptr QnnBackendFactory::Create( qnn_device_ptr, backend_params->qnn_backend_cache_ptr_.get(), htp_options, - qnn_dlc_manager, - get_option(options->profile_level(), QNN_RUNTIME_PROFILE_LEVEL)); + qnn_dlc_manager); backend_params->qnn_graph_ptr_ = std::make_unique( implementation_ptr, diff --git a/backends/qualcomm/runtime/backends/QnnContextCommon.cpp b/backends/qualcomm/runtime/backends/QnnContextCommon.cpp index e81f92a8003..e16a173db6c 100644 --- a/backends/qualcomm/runtime/backends/QnnContextCommon.cpp +++ b/backends/qualcomm/runtime/backends/QnnContextCommon.cpp @@ -6,7 +6,6 @@ * LICENSE file in the root directory of this source tree. */ -#include #include #include @@ -14,46 +13,12 @@ namespace executorch { namespace backends { namespace qnn { -std::mutex QnnContext::htp_context_mutex_; -int QnnContext::htp_context_count_{0}; - -void QnnContext::WriteHeapProfile() { - executorch::runtime::BackendOption backend_option; - std::string heap_profiling_path; - if (get_runtime_option(QNN_RUNTIME_HEAP_PROFILING_PATH, backend_option) == - Error::Ok) { - auto* arr = std::get_if>( - &backend_option.value); - if (arr) { - heap_profiling_path = arr->data(); - } - } - Qnn_ErrorHandle_t error_profile = - qnn_profiler_->ProfileDataToFile(heap_profiling_path); - if (error_profile != QNN_SUCCESS) { - QNN_EXECUTORCH_LOG_ERROR( - "Failed to profile. Cannot get profile from handle. Error %d", - QNN_GET_ERROR_CODE(error_profile)); - } -} - QnnContext::~QnnContext() { const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); Qnn_ErrorHandle_t error = QNN_SUCCESS; - if (handle_ != nullptr) { QNN_EXECUTORCH_LOG_INFO("Destroy Qnn context"); - - bool do_heap_profile = false; - { - std::lock_guard lock(htp_context_mutex_); - if (is_htp_backend_ && htp_context_count_ > 0 && need_to_profile_) { - --htp_context_count_; - do_heap_profile = (htp_context_count_ == 0); - } - } - error = qnn_interface.qnn_context_free( - handle_, do_heap_profile ? qnn_profiler_->GetHandle() : nullptr); + error = qnn_interface.qnn_context_free(handle_, /*profile=*/nullptr); if (error != QNN_SUCCESS) { QNN_EXECUTORCH_LOG_ERROR( "Failed to free QNN " @@ -61,8 +26,6 @@ QnnContext::~QnnContext() { "ID %u, error %d", qnn_interface.GetBackendId(), QNN_GET_ERROR_CODE(error)); - } else if (do_heap_profile) { - WriteHeapProfile(); } handle_ = nullptr; } @@ -82,51 +45,21 @@ Error QnnContext::Configure() { if (cache_->GetCacheState() == QnnBackendCache::DESERIALIZE) { const QnnExecuTorchContextBinary& qnn_context_blob = cache_->GetQnnContextBlob(); - /* - Total DSP heap usage can be measured in two conditions, first context - creation and last context free. By the QNN documentation, we need to insert - profileHandle in qnn_context_create_from_binary when creating first context - and closing last context. - - Limitations are two: - 1.Only supported on Android and QNX platforms. - 2.By enabling this feature initialization and cleanup time might be - impacted. - */ - - bool do_heap_profile = false; - { - std::lock_guard lock(htp_context_mutex_); - do_heap_profile = - is_htp_backend_ && (htp_context_count_ == 0) && need_to_profile_; - if (is_htp_backend_) { - ++htp_context_count_; - } - } error = qnn_interface.qnn_context_create_from_binary( backend_->GetHandle(), device_->GetHandle(), - (temp_context_config.empty() ? nullptr : temp_context_config.data()), + temp_context_config.empty() ? nullptr : temp_context_config.data(), static_cast(qnn_context_blob.buffer), qnn_context_blob.nbytes, &handle_, - do_heap_profile ? qnn_profiler_->GetHandle() : nullptr); + /*profile=*/nullptr); if (error != QNN_SUCCESS) { QNN_EXECUTORCH_LOG_ERROR( "Can't create context from " "binary. Error %d.", QNN_GET_ERROR_CODE(error)); - // Rollback the count since context creation failed - { - std::lock_guard lock(htp_context_mutex_); - if (is_htp_backend_ && htp_context_count_ > 0) { - --htp_context_count_; - } - } return Error::Internal; - } else if (do_heap_profile) { - WriteHeapProfile(); } } else if ( cache_->GetCacheState() == QnnBackendCache::SERIALIZE || diff --git a/backends/qualcomm/runtime/backends/QnnContextCommon.h b/backends/qualcomm/runtime/backends/QnnContextCommon.h index c0351b857b7..7d507a4a50c 100644 --- a/backends/qualcomm/runtime/backends/QnnContextCommon.h +++ b/backends/qualcomm/runtime/backends/QnnContextCommon.h @@ -13,10 +13,7 @@ #include #include -#include - #include -#include namespace executorch { namespace backends { @@ -31,22 +28,13 @@ class QnnContext { QnnBackend* backend, QnnDevice* device, QnnBackendCache* cache, - QnnDlcManager* qnn_dlc_manager, - const QnnExecuTorchProfileLevel& profile_level) + QnnDlcManager* qnn_dlc_manager) : handle_(nullptr), implementation_(implementation), backend_(backend), device_(device), cache_(cache), - qnn_dlc_manager_(qnn_dlc_manager), - is_htp_backend_( - implementation->GetQnnInterface().GetBackendId() == - QNN_BACKEND_ID_HTP), - need_to_profile_( - profile_level != QnnExecuTorchProfileLevel::kProfileOff) { - qnn_profiler_ = - std::make_unique(implementation_, backend_, profile_level); - } + qnn_dlc_manager_(qnn_dlc_manager) {} virtual ~QnnContext(); @@ -85,7 +73,6 @@ class QnnContext { }; private: - void WriteHeapProfile(); Qnn_ContextHandle_t handle_; QnnImplementation* implementation_; QnnBackend* backend_; @@ -93,12 +80,6 @@ class QnnContext { QnnBackendCache* cache_; QnnContextCustomProtocol qnn_context_custom_protocol_; QnnDlcManager* qnn_dlc_manager_; - - std::unique_ptr qnn_profiler_; - bool is_htp_backend_; - bool need_to_profile_; - static std::mutex htp_context_mutex_; - static int htp_context_count_; }; } // namespace qnn } // namespace backends diff --git a/backends/qualcomm/runtime/backends/QnnProfiler.cpp b/backends/qualcomm/runtime/backends/QnnProfiler.cpp index 195c967a674..b4650b30796 100644 --- a/backends/qualcomm/runtime/backends/QnnProfiler.cpp +++ b/backends/qualcomm/runtime/backends/QnnProfiler.cpp @@ -8,34 +8,10 @@ #include -#include -#include - namespace executorch { namespace backends { namespace qnn { -#define DEFINE_HEAP_BEFORE_CREATION "DSP:before_context_created" -#define DEFINE_HEAP_AFTER_FREED "DSP:after_context_freed" - -namespace { -const char* get_event_unit(QnnProfile_EventUnit_t unit) { - switch (unit) { - case QNN_PROFILE_EVENTUNIT_MICROSEC: - return " (us)"; - case QNN_PROFILE_EVENTUNIT_BYTES: - return " (bytes)"; - case QNN_PROFILE_EVENTUNIT_COUNT: - return " (count)"; - case QNN_PROFILE_EVENTUNIT_BACKEND: - // cycle unit is default appeared - case QNN_PROFILE_EVENTUNIT_CYCLES: - default: - return ""; - } -} -} // namespace - QnnProfile::QnnProfile( QnnImplementation* implementation, QnnBackend* backend, @@ -95,36 +71,36 @@ QnnProfile::QnnProfile( } } -Qnn_ErrorHandle_t QnnProfile::FetchEvents( - const QnnProfile_EventId_t** events_ptr, - std::uint32_t* num_events) { - if (handle_ == nullptr) { - QNN_EXECUTORCH_LOG_WARN("Profile handle is null, skipping FetchEvents"); - *num_events = 0; - return QNN_SUCCESS; - } - const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); - Qnn_ErrorHandle_t error = - qnn_interface.qnn_profile_get_events(handle_, events_ptr, num_events); - if (error != QNN_SUCCESS) { - QNN_EXECUTORCH_LOG_ERROR( - "Failed to get profile events: %d", QNN_GET_ERROR_CODE(error)); - } - return error; -} - Qnn_ErrorHandle_t QnnProfile::ProfileData( executorch::runtime::EventTracer* event_tracer) { + const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); const QnnProfile_EventId_t* events_ptr = nullptr; + const QnnProfile_EventId_t* sub_events_ptr = nullptr; std::uint32_t num_events = 0; - Qnn_ErrorHandle_t error = FetchEvents(&events_ptr, &num_events); + std::uint32_t num_sub_events = 0; + Qnn_ErrorHandle_t error = + qnn_interface.qnn_profile_get_events(handle_, &events_ptr, &num_events); if (error != QNN_SUCCESS) { QNN_EXECUTORCH_LOG_ERROR( - "Failed to profile data in function FetchEvents: %d", - QNN_GET_ERROR_CODE(error)); + "ProfileData failed to get events: %d", QNN_GET_ERROR_CODE(error)); return error; } - const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); + + auto get_unit = [](QnnProfile_EventUnit_t unit) { + switch (unit) { + case QNN_PROFILE_EVENTUNIT_MICROSEC: + return " (us)"; + case QNN_PROFILE_EVENTUNIT_BYTES: + return " (bytes)"; + case QNN_PROFILE_EVENTUNIT_COUNT: + return " (count)"; + case QNN_PROFILE_EVENTUNIT_BACKEND: + // cycle unit is default appeared + case QNN_PROFILE_EVENTUNIT_CYCLES: + default: + return ""; + } + }; QnnProfile_EventData_t event_data; for (std::uint32_t i = 0; i < num_events; ++i) { error = @@ -139,7 +115,7 @@ Qnn_ErrorHandle_t QnnProfile::ProfileData( } // add events for other important metrics, e.g. RPC execution time std::string identifier = - std::string(event_data.identifier) + get_event_unit(event_data.unit); + std::string(event_data.identifier) + get_unit(event_data.unit); executorch::runtime::event_tracer_log_profiling_delegate( event_tracer, identifier.c_str(), @@ -149,114 +125,48 @@ Qnn_ErrorHandle_t QnnProfile::ProfileData( event_data.value); // Check an event's sub events only if it relates to graph execution time // (and its sub events are the individual op executions): - if (!backend_->IsProfileEventTypeParentOfNodeTime(event_data.type)) { - continue; - } - const QnnProfile_EventId_t* sub_events_ptr = nullptr; - std::uint32_t num_sub_events = 0; - error = qnn_interface.qnn_profile_get_sub_events( - events_ptr[i], &sub_events_ptr, &num_sub_events); - if (error != QNN_SUCCESS) { - QNN_EXECUTORCH_LOG_ERROR( - "ProfileData failed to get sub events " - "for event %d: %d", - i, - QNN_GET_ERROR_CODE(error)); - return error; - } - - QnnProfile_EventData_t sub_event_data; - for (std::uint32_t j = 0; j < num_sub_events; ++j) { - error = qnn_interface.qnn_profile_get_event_data( - sub_events_ptr[j], &sub_event_data); + if (backend_->IsProfileEventTypeParentOfNodeTime(event_data.type)) { + error = qnn_interface.qnn_profile_get_sub_events( + events_ptr[i], &sub_events_ptr, &num_sub_events); if (error != QNN_SUCCESS) { QNN_EXECUTORCH_LOG_ERROR( - "ProfileData failed to get sub " - "event data for sub event %d of event %d: %d", - j, + "ProfileData failed to get sub events " + "for event %d: %d", i, QNN_GET_ERROR_CODE(error)); return error; } - if (sub_event_data.type == QNN_PROFILE_EVENTTYPE_NODE && - (sub_event_data.unit == QNN_PROFILE_EVENTUNIT_MICROSEC || - sub_event_data.unit == QNN_PROFILE_EVENTUNIT_CYCLES)) { - executorch::runtime::event_tracer_log_profiling_delegate( - event_tracer, - sub_event_data.identifier, - /*delegate_debug_id=*/ - static_cast(-1), - 0, - sub_event_data.value); + + QnnProfile_EventData_t sub_event_data; + for (std::uint32_t j = 0; j < num_sub_events; ++j) { + error = qnn_interface.qnn_profile_get_event_data( + sub_events_ptr[j], &sub_event_data); + if (error != QNN_SUCCESS) { + QNN_EXECUTORCH_LOG_ERROR( + "ProfileData failed to get sub " + "event data for sub event %d of event %d: %d", + j, + i, + QNN_GET_ERROR_CODE(error)); + return error; + } + if (sub_event_data.type == QNN_PROFILE_EVENTTYPE_NODE && + (sub_event_data.unit == QNN_PROFILE_EVENTUNIT_MICROSEC || + sub_event_data.unit == QNN_PROFILE_EVENTUNIT_CYCLES)) { + executorch::runtime::event_tracer_log_profiling_delegate( + event_tracer, + sub_event_data.identifier, + /*delegate_debug_id=*/ + static_cast(-1), + 0, + sub_event_data.value); + } } } } return error; } -Qnn_ErrorHandle_t QnnProfile::ProfileDataToFile( - const std::string& profile_filename) { - if (handle_ == nullptr) { - QNN_EXECUTORCH_LOG_WARN( - "Profile handle is null, skipping ProfileDataToFile"); - return QNN_SUCCESS; - } - if (profile_filename.empty()) { - QNN_EXECUTORCH_LOG_WARN( - "Heap profiling path is empty. Please provide profiling filename from runtime option."); - return QNN_SUCCESS; - } - const QnnProfile_EventId_t* events_ptr = nullptr; - std::uint32_t num_events = 0; - Qnn_ErrorHandle_t error = FetchEvents(&events_ptr, &num_events); - if (error != QNN_SUCCESS) { - QNN_EXECUTORCH_LOG_ERROR( - "Failed to profile data in function FetchEvents: %d", - QNN_GET_ERROR_CODE(error)); - return error; - } - const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); - QnnProfile_EventData_t event_data; - std::uint32_t count_num_events = 0; - for (std::uint32_t i = 0; i < num_events; ++i) { - error = - qnn_interface.qnn_profile_get_event_data(events_ptr[i], &event_data); - if (error != QNN_SUCCESS) { - QNN_EXECUTORCH_LOG_ERROR( - "ProfileData failed to get event data " - "for event %d: %d", - i, - QNN_GET_ERROR_CODE(error)); - return error; - } - - std::ios_base::openmode open_mode = std::ios::app; - if (strcmp(event_data.identifier, DEFINE_HEAP_BEFORE_CREATION) == 0) { - open_mode = std::ios::trunc; - } else if (strcmp(event_data.identifier, DEFINE_HEAP_AFTER_FREED) == 0) { - open_mode = std::ios::app; - } else { - count_num_events++; - continue; - } - std::string identifier = - std::string(event_data.identifier) + get_event_unit(event_data.unit); - std::ofstream ofs(profile_filename, open_mode); - if (!ofs) { - QNN_EXECUTORCH_LOG_ERROR( - "Error when opening profile file: %s", profile_filename.c_str()); - return QNN_COMMON_ERROR_GENERAL; - } - ofs << identifier << ", " << event_data.value << "\n"; - } - if (count_num_events == num_events) { - QNN_EXECUTORCH_LOG_WARN( - "Not HTP backend but enable htp profiling. Please check setting."); - return QNN_SUCCESS; - } - return QNN_SUCCESS; -} - QnnProfile::~QnnProfile() { const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); if (handle_ != nullptr) { diff --git a/backends/qualcomm/runtime/backends/QnnProfiler.h b/backends/qualcomm/runtime/backends/QnnProfiler.h index e8f2a3c0502..de8fbd1d9d5 100644 --- a/backends/qualcomm/runtime/backends/QnnProfiler.h +++ b/backends/qualcomm/runtime/backends/QnnProfiler.h @@ -12,9 +12,6 @@ #include #include #include "QnnProfile.h" - -#include - namespace executorch { namespace backends { namespace qnn { @@ -27,7 +24,6 @@ class QnnProfile { const QnnExecuTorchProfileLevel& profile_level); ~QnnProfile(); Qnn_ErrorHandle_t ProfileData(executorch::runtime::EventTracer* event_tracer); - Qnn_ErrorHandle_t ProfileDataToFile(const std::string& profile_filename); Qnn_ProfileHandle_t GetHandle() { return handle_; @@ -37,10 +33,6 @@ class QnnProfile { Qnn_ProfileHandle_t handle_; QnnImplementation* implementation_; QnnBackend* backend_; - - Qnn_ErrorHandle_t FetchEvents( - const QnnProfile_EventId_t** events_ptr, - std::uint32_t* num_events); }; } // namespace qnn } // namespace backends diff --git a/backends/qualcomm/runtime/backends/gpu/GpuContext.cpp b/backends/qualcomm/runtime/backends/gpu/GpuContext.cpp index c6c6ace2bdf..07952e77eef 100644 --- a/backends/qualcomm/runtime/backends/gpu/GpuContext.cpp +++ b/backends/qualcomm/runtime/backends/gpu/GpuContext.cpp @@ -21,13 +21,7 @@ GpuContext::GpuContext( QnnBackendCache* cache, QnnDlcManager* qnn_dlc_manager, const QnnExecuTorchGpuBackendOptions* gpu_options) - : QnnContext( - implementation, - backend, - device, - cache, - qnn_dlc_manager, - QnnExecuTorchProfileLevel::kProfileOff) { + : QnnContext(implementation, backend, device, cache, qnn_dlc_manager) { gpu_context_custom_config_ = std::make_unique(gpu_options); } diff --git a/backends/qualcomm/runtime/backends/htp/HtpContext.h b/backends/qualcomm/runtime/backends/htp/HtpContext.h index f00b709f607..a0389ea5983 100644 --- a/backends/qualcomm/runtime/backends/htp/HtpContext.h +++ b/backends/qualcomm/runtime/backends/htp/HtpContext.h @@ -25,17 +25,10 @@ class HtpContext : public QnnContext { QnnDevice* device, QnnBackendCache* cache, const QnnExecuTorchHtpBackendOptions* htp_options, - QnnDlcManager* qnn_dlc_manager, - const QnnExecuTorchProfileLevel& profile_level) - : QnnContext( - implementation, - backend, - device, - cache, - qnn_dlc_manager, - profile_level) { - htp_context_custom_config_ = std::make_unique( - this, htp_options, profile_level); + QnnDlcManager* qnn_dlc_manager) + : QnnContext(implementation, backend, device, cache, qnn_dlc_manager) { + htp_context_custom_config_ = + std::make_unique(this, htp_options); } ~HtpContext() {} diff --git a/backends/qualcomm/runtime/backends/htp/HtpContextCustomConfig.h b/backends/qualcomm/runtime/backends/htp/HtpContextCustomConfig.h index 61a395fcb5b..f0d4873b0d2 100644 --- a/backends/qualcomm/runtime/backends/htp/HtpContextCustomConfig.h +++ b/backends/qualcomm/runtime/backends/htp/HtpContextCustomConfig.h @@ -26,19 +26,12 @@ class HtpContextCustomConfig { public: explicit HtpContextCustomConfig( const QnnContext* context, - const QnnExecuTorchHtpBackendOptions* htp_options, - const QnnExecuTorchProfileLevel& profile_level) - : profile_level_(profile_level), - context_(context), - htp_options_(htp_options) {} + const QnnExecuTorchHtpBackendOptions* htp_options) + : context_(context), htp_options_(htp_options) {} std::vector CreateContextCustomConfig(); private: - // profile_level_ is consumed only by the target build; the host build never - // reads it. Marked [[maybe_unused]] so the host build doesn't warn while the - // field stays available for the target side. - [[maybe_unused]] QnnExecuTorchProfileLevel profile_level_; QnnHtpContext_CustomConfig_t* AllocContextCustomConfig() { htp_context_config_.emplace_back( std::make_unique()); diff --git a/backends/qualcomm/runtime/backends/htp/host/HtpContextCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/host/HtpContextCustomConfig.cpp index 037998132a8..4850afa14a2 100644 --- a/backends/qualcomm/runtime/backends/htp/host/HtpContextCustomConfig.cpp +++ b/backends/qualcomm/runtime/backends/htp/host/HtpContextCustomConfig.cpp @@ -6,7 +6,6 @@ * LICENSE file in the root directory of this source tree. */ -#include #include namespace executorch { diff --git a/backends/qualcomm/runtime/backends/htp/target/HtpContextCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/target/HtpContextCustomConfig.cpp index 8488bf21e79..676795797f8 100644 --- a/backends/qualcomm/runtime/backends/htp/target/HtpContextCustomConfig.cpp +++ b/backends/qualcomm/runtime/backends/htp/target/HtpContextCustomConfig.cpp @@ -19,17 +19,6 @@ HtpContextCustomConfig::CreateContextCustomConfig() { QnnHtpContext_CustomConfig_t* p_custom_config = nullptr; const HtpContext* htp_ctx = static_cast(context_); - // TODO: Verify heap profile works with kProfileBasic once enabled. - if (profile_level_ != QnnExecuTorchProfileLevel::kProfileOff) { - QnnHtpContext_CustomConfig_t* p_custom_config_profile = nullptr; - p_custom_config_profile = AllocContextCustomConfig(); - p_custom_config_profile->option = - QNN_HTP_CONTEXT_CONFIG_OPTION_DSP_MEMORY_PROFILING_ENABLED; - p_custom_config_profile->dspMemoryProfilingEnabled = true; - ret.push_back( - static_cast(p_custom_config_profile)); - } - if (htp_options_->use_multi_contexts() && htp_options_->max_sf_buf_size() != 0) { p_custom_config = AllocContextCustomConfig(); diff --git a/backends/qualcomm/runtime/backends/ir/host/QnnDlcManager.cpp b/backends/qualcomm/runtime/backends/ir/host/QnnDlcManager.cpp index 62d01c78706..47d583b5c15 100644 --- a/backends/qualcomm/runtime/backends/ir/host/QnnDlcManager.cpp +++ b/backends/qualcomm/runtime/backends/ir/host/QnnDlcManager.cpp @@ -47,8 +47,7 @@ Error QnnDlcManager::Create() { backend_bundle_ptr_->qnn_backend_ptr.get(), backend_bundle_ptr_->qnn_device_ptr.get(), backend_params_ptr_->qnn_backend_cache_ptr_.get(), - nullptr, - QnnExecuTorchProfileLevel::kProfileOff); + nullptr); backend_params_ptr_->qnn_graph_ptr_ = std::make_unique( backend_bundle_ptr_->implementation.get(), diff --git a/backends/qualcomm/runtime/backends/lpai/LpaiContext.cpp b/backends/qualcomm/runtime/backends/lpai/LpaiContext.cpp index e0c9d3ed3d8..d5203898f6b 100644 --- a/backends/qualcomm/runtime/backends/lpai/LpaiContext.cpp +++ b/backends/qualcomm/runtime/backends/lpai/LpaiContext.cpp @@ -20,13 +20,7 @@ LpaiContext::LpaiContext( QnnDevice* device, QnnBackendCache* cache, QnnDlcManager* qnn_dlc_manager) - : QnnContext( - implementation, - backend, - device, - cache, - qnn_dlc_manager, - QnnExecuTorchProfileLevel::kProfileOff) { + : QnnContext(implementation, backend, device, cache, qnn_dlc_manager) { lpai_context_custom_config_ = std::make_unique(); } diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py index 940c54c2f8d..0d96cce2e06 100644 --- a/backends/qualcomm/tests/test_qnn_delegate.py +++ b/backends/qualcomm/tests/test_qnn_delegate.py @@ -5819,42 +5819,6 @@ def test_qnn_backend_profile_op(self): ) TestQNN.profile_level = 0 - def test_qnn_backend_runtime_option_heap_profile(self): - module = SimpleModel() # noqa: F405 - sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28)) - - backend_options = generate_htp_compiler_spec( - use_fp16=True, - use_multi_contexts=True, - ) - - compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.chipset_table[TestQNN.soc_model], - backend_options=backend_options, - profile_level=2, # if 0 for closing heap profiling - ) - - pass_jobs = get_capture_program_passes() - split_graph_pass, setting = self.split_graph(4) - pass_jobs[split_graph_pass] = setting - dep_table = get_passes_dependency_for_capture_program() - dep_table[split_graph_pass] = [FoldQDQ] - - edge_prog_mgr = to_edge_transform_and_lower_to_qnn( - module=module, - inputs=sample_input, - compiler_specs=compiler_specs, - dep_table=dep_table, - passes_job=pass_jobs, - ) - exec_prog = edge_prog_mgr.to_executorch() - self.verify_output( - module, - sample_input, - exec_prog, - save_heap_result=True, - ) - def test_qnn_backend_runtime_option_htp_performance(self): backend_options = generate_htp_compiler_spec(use_fp16=True) TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec( @@ -6754,43 +6718,6 @@ def test_qnn_backend_profile_op(self): ) TestQNN.profile_level = 0 - def test_qnn_backend_runtime_option_heap_profile(self): - module = SimpleModel() # noqa: F405 - sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28)) - module1 = self.get_qdq_module(module, sample_input) - - backend_options = generate_htp_compiler_spec( - use_fp16=False, - use_multi_contexts=True, - ) - - compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.chipset_table[TestQNN.soc_model], - backend_options=backend_options, - profile_level=2, # if 0 for closing heap profiling - ) - - pass_jobs = get_capture_program_passes() - split_graph_pass, setting = self.split_graph(4) - pass_jobs[split_graph_pass] = setting - dep_table = get_passes_dependency_for_capture_program() - dep_table[split_graph_pass] = [FoldQDQ] - - edge_prog_mgr = to_edge_transform_and_lower_to_qnn( - module=module1, - inputs=sample_input, - compiler_specs=compiler_specs, - dep_table=dep_table, - passes_job=pass_jobs, - ) - exec_prog = edge_prog_mgr.to_executorch() - self.verify_output( - module1, - sample_input, - exec_prog, - save_heap_result=True, - ) - def test_qnn_backend_runtime_option_htp_performance(self): backend_options = generate_htp_compiler_spec(use_fp16=False) TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec( diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py index d8802f74e68..5827ff050aa 100644 --- a/backends/qualcomm/tests/utils.py +++ b/backends/qualcomm/tests/utils.py @@ -8,7 +8,6 @@ import subprocess import tempfile import unittest -from pathlib import Path from typing import Callable, Dict, List, Optional, OrderedDict, Tuple import numpy as np @@ -220,7 +219,6 @@ class TestQNN(unittest.TestCase): inference_speed_output_path = "outputs/inference_speed.txt" static_llm_eval_method = "" direct_build_folder: str = "" - dsp_heap_profile_filename = "htp_heap_usage.txt" @classmethod def setUpClass(cls): @@ -361,7 +359,6 @@ def verify_output( # noqa: C901 save_inference_speed: bool = False, expected_compared_events: int = -1, qnn_intermediate_debugger: QNNIntermediateDebugger = None, - save_heap_result: bool = False, ): with tempfile.TemporaryDirectory() as tmp_dir: ( @@ -415,29 +412,6 @@ def validate_profile(): len(inspector.to_dataframe().index) >= expected_profile_events ) - def validate_heap_profile(): - file_path = f"{tmp_dir}/{self.dsp_heap_profile_filename}" - self.assertTrue( - Path(file_path).exists(), f"File not found: {file_path}" - ) - with open(file_path, "r") as f: - values = [ - int(line.split(",")[1].strip()) for line in f if line.strip() - ] - self.assertEqual(len(values), 2, f"Expected 2 entries, got {values}") - before, after = values - difference = after - before - - print(f"before_context_created: {before} bytes") - print(f"after_context_freed: {after} bytes") - print(f"difference: {difference:.2f} bytes") - - self.assertGreaterEqual( - after, - before, - "after_context_freed should be >= before_context_created", - ) - def validate_intermediate_tensor(): qnn_intermediate_debugger.setup_inspector( etdump_path=etdump_path, @@ -627,11 +601,6 @@ def validate_intermediate_tensor(): adb.extra_cmds += ( f" --performance_output_path {self.inference_speed_output_path}" ) - - if save_heap_result: - adb.extra_cmds += ( - f" --heap_profiling_path {self.dsp_heap_profile_filename}" - ) adb.execute(custom_runner_cmd=f"rm -rf {adb.output_folder}") adb.execute(method_index=method_index, output_callback=output_callback) adb.pull(host_output_path=tmp_dir, callback=post_process) @@ -651,12 +620,6 @@ def validate_intermediate_tensor(): f"{tmp_dir}/{self.inference_speed_output_path}", "r" ) as f: self.inference_speed = float(f.read()) - if save_heap_result: - adb.pull_heap_output( - f"{adb.workspace}/{self.dsp_heap_profile_filename}", - f"{tmp_dir}/{self.dsp_heap_profile_filename}", - callback=validate_heap_profile, - ) def lower_module_and_test_output( self, diff --git a/examples/qualcomm/executor_runner/qnn_executor_runner.cpp b/examples/qualcomm/executor_runner/qnn_executor_runner.cpp index a35a496f22b..df6a8ecbd79 100644 --- a/examples/qualcomm/executor_runner/qnn_executor_runner.cpp +++ b/examples/qualcomm/executor_runner/qnn_executor_runner.cpp @@ -143,13 +143,6 @@ DEFINE_int32( "This is a runtime option and will override the profile level set during AOT. " "Refer to QnnExecuTorchProfileLevel under qc_compiler_spec.fbs for more info."); -DEFINE_string( - heap_profiling_path, - "", - "Output path for QNN heap-profiling dump. " - "Empty disables heap profiling. " - "This is a runtime option and will override the path set during AOT."); - using executorch::aten::Tensor; using executorch::aten::TensorImpl; using executorch::etdump::ETDumpGen; @@ -220,7 +213,7 @@ int main(int argc, char** argv) { } // Set runtime options - executorch::runtime::BackendOptions<9> backend_options; + executorch::runtime::BackendOptions<8> backend_options; if (!gflags::GetCommandLineFlagInfoOrDie("log_level").is_default) { ET_LOG(Info, "Setting runtime log level: %d", FLAGS_log_level); ET_CHECK_MSG( @@ -298,18 +291,6 @@ int main(int argc, char** argv) { "Failed to set backend options: %s", QNN_RUNTIME_LPAI_CORE_SELECTION); } - if (!gflags::GetCommandLineFlagInfoOrDie("heap_profiling_path").is_default) { - ET_LOG( - Info, - "Setting runtime heap_profiling_path: %s", - FLAGS_heap_profiling_path.c_str()); - ET_CHECK_MSG( - backend_options.set_option( - QNN_RUNTIME_HEAP_PROFILING_PATH, - FLAGS_heap_profiling_path.c_str()) == Error::Ok, - "Failed to set backend options: %s", - QNN_RUNTIME_HEAP_PROFILING_PATH); - } ET_CHECK_MSG( set_option(QNN_BACKEND, backend_options.view()) == Error::Ok, "Failed to set runtime options.");