diff --git a/.ci/scripts/build-qnn-sdk.sh b/.ci/scripts/build-qnn-sdk.sh index deeaed34ac3..ff42164dcf4 100644 --- a/.ci/scripts/build-qnn-sdk.sh +++ b/.ci/scripts/build-qnn-sdk.sh @@ -11,7 +11,7 @@ set -o xtrace build_qnn_backend() { echo "Start building qnn backend." export ANDROID_NDK_ROOT=/opt/ndk - export QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728 + export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)" bash backends/qualcomm/scripts/build.sh --skip_aarch64 --job_number 2 --release diff --git a/.ci/scripts/setup-qnn-deps.sh b/.ci/scripts/setup-qnn-deps.sh index 12809748129..45588e291e9 100644 --- a/.ci/scripts/setup-qnn-deps.sh +++ b/.ci/scripts/setup-qnn-deps.sh @@ -16,9 +16,9 @@ install_qnn() { QNN_INSTALLATION_DIR=/tmp/qnn mkdir -p "${QNN_INSTALLATION_DIR}" - curl -Lo /tmp/v2.25.0.24.07.28.zip "https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.25.0.240728.zip" + curl -Lo /tmp/v2.28.0.24.10.29.zip "https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.28.0.241029.zip" echo "Finishing downloading qnn sdk." - unzip -qo /tmp/v2.25.0.24.07.28.zip -d /tmp + unzip -qo /tmp/v2.28.0.24.10.29.zip -d /tmp echo "Finishing unzip qnn sdk." diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh index ddc7ad46185..550a09e4c6f 100644 --- a/.ci/scripts/test_llama.sh +++ b/.ci/scripts/test_llama.sh @@ -121,7 +121,7 @@ echo "COREML option ${COREML}" if [[ "${MODE}" =~ .*qnn.* ]]; then QNN=ON export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)" - export QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728 + export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang" export PYTHONPATH=".." cp schema/program.fbs exir/_serialize/program.fbs diff --git a/backends/qualcomm/runtime/backends/QnnBackendCache.cpp b/backends/qualcomm/runtime/backends/QnnBackendCache.cpp index 3d5a432431c..699e0646697 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendCache.cpp +++ b/backends/qualcomm/runtime/backends/QnnBackendCache.cpp @@ -51,6 +51,11 @@ Error QnnBackendCache::GetQnnGraphInfoFromBinary( } else if (binaryinfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_2) { num_graphs = binaryinfo->contextBinaryInfoV2.numGraphs; graphs = binaryinfo->contextBinaryInfoV2.graphs; +#if (QNN_API_VERSION_MAJOR >= 2 && QNN_API_VERSION_MINOR >= 21) + } else if (binaryinfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3) { + num_graphs = binaryinfo->contextBinaryInfoV3.numGraphs; + graphs = binaryinfo->contextBinaryInfoV3.graphs; +#endif } else { QNN_EXECUTORCH_LOG_WARN( "Unknown QNN BinaryInfo version %d.", binaryinfo->version); @@ -62,6 +67,10 @@ Error QnnBackendCache::GetQnnGraphInfoFromBinary( RetrieveGraphInfo(graphs[i].graphInfoV1); } else if (graphs->version == QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_2) { RetrieveGraphInfo(graphs[i].graphInfoV2); +#if (QNN_API_VERSION_MAJOR >= 2 && QNN_API_VERSION_MINOR >= 21) + } else if (graphs->version == QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3) { + RetrieveGraphInfo(graphs[i].graphInfoV3); +#endif } else { QNN_EXECUTORCH_LOG_WARN( "Unknown QNN GraphInfo version %d.", binaryinfo->version); diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.cpp b/backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.cpp index 757034baa8e..030b5666daf 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.cpp +++ b/backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.cpp @@ -17,6 +17,9 @@ using executorch::runtime::Error; Error HtpBackendCache::RetrieveBackendBinaryInfo( const QnnSystemContext_BinaryInfo_t* binaryinfo) { QnnHtpSystemContext_HwBlobInfo_t* htp_hwblobinfo = nullptr; +#if (QNN_API_VERSION_MAJOR >= 2 && QNN_API_VERSION_MINOR >= 21) + QnnHtpSystemContext_GraphBlobInfo_t* htp_graphblobinfo = nullptr; +#endif if (binaryinfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_1) { htp_hwblobinfo = static_cast( @@ -24,27 +27,43 @@ Error HtpBackendCache::RetrieveBackendBinaryInfo( } else if (binaryinfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_2) { htp_hwblobinfo = static_cast( binaryinfo->contextBinaryInfoV2.hwInfoBlob); +#if (QNN_API_VERSION_MAJOR >= 2 && QNN_API_VERSION_MINOR >= 21) + } else if (binaryinfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3) { + htp_graphblobinfo = static_cast( + binaryinfo->contextBinaryInfoV3.graphs->graphInfoV3.graphBlobInfo); +#endif } else { QNN_EXECUTORCH_LOG_WARN( "Unknown QNN BinaryInfo version %d.", binaryinfo->version); return Error::Internal; } - if (htp_hwblobinfo == nullptr) { - QNN_EXECUTORCH_LOG_WARN( - "Htp hardware blob information is not found in binary information."); - return Error::Ok; + if (htp_hwblobinfo) { + if (htp_hwblobinfo->version == + QNN_SYSTEM_CONTEXT_HTP_HW_INFO_BLOB_VERSION_V1) { + spill_fill_buf_ = + (*htp_hwblobinfo).contextBinaryHwInfoBlobV1_t.spillFillBufferSize; + } else { + QNN_EXECUTORCH_LOG_WARN( + "Unknown QNN Htp hw blob info version %d.", htp_hwblobinfo->version); + return Error::Internal; + } } - if (htp_hwblobinfo->version == - QNN_SYSTEM_CONTEXT_HTP_HW_INFO_BLOB_VERSION_V1) { - spill_fill_buf_ = - (*htp_hwblobinfo).contextBinaryHwInfoBlobV1_t.spillFillBufferSize; - } else { - QNN_EXECUTORCH_LOG_WARN( - "Unknown QNN Htp hw blob info version %d.", htp_hwblobinfo->version); - return Error::Internal; +#if (QNN_API_VERSION_MAJOR >= 2 && QNN_API_VERSION_MINOR >= 21) + if (htp_graphblobinfo) { + if (htp_graphblobinfo->version == + QNN_SYSTEM_CONTEXT_HTP_GRAPH_INFO_BLOB_VERSION_V1) { + spill_fill_buf_ = + (*htp_graphblobinfo).contextBinaryGraphBlobInfoV1.spillFillBufferSize; + } else { + QNN_EXECUTORCH_LOG_WARN( + "Unknown QNN Htp graph blob info version %d.", + htp_graphblobinfo->version); + return Error::Internal; + } } +#endif return Error::Ok; } diff --git a/docs/source/build-run-qualcomm-ai-engine-direct-backend.md b/docs/source/build-run-qualcomm-ai-engine-direct-backend.md index 5e43a63c760..acfede66e66 100644 --- a/docs/source/build-run-qualcomm-ai-engine-direct-backend.md +++ b/docs/source/build-run-qualcomm-ai-engine-direct-backend.md @@ -59,7 +59,7 @@ This example is verified with SM8550 and SM8450. - Click the "Get Software" button to download a version of QNN SDK. - However, at the moment of updating this tutorial, the above website doesn't provide QNN SDK newer than 2.22.6. - The below is public links to download various QNN versions. Hope they can be publicly discoverable soon. - - [QNN 2.26.0](https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.26.0.240828.zip) + - [QNN 2.28.0](https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.28.0.241029.zip) The directory with installed Qualcomm AI Engine Direct SDK looks like: ``` diff --git a/docs/source/llm/build-run-llama3-qualcomm-ai-engine-direct-backend.md b/docs/source/llm/build-run-llama3-qualcomm-ai-engine-direct-backend.md index 133f9ec50bb..7ed768baf23 100644 --- a/docs/source/llm/build-run-llama3-qualcomm-ai-engine-direct-backend.md +++ b/docs/source/llm/build-run-llama3-qualcomm-ai-engine-direct-backend.md @@ -9,7 +9,7 @@ This tutorial demonstrates how to export Llama 3 8B Instruct for Qualcomm AI Eng - Follow [the README for executorch llama](https://github.com/pytorch/executorch/tree/main/examples/models/llama) to know how to run a llama model on mobile via ExecuTorch. - A Qualcomm device with 16GB RAM - We are continuing to optimize our memory usage to ensure compatibility with lower memory devices. -- The version of [Qualcomm AI Engine Direct SDK](https://developer.qualcomm.com/software/qualcomm-ai-engine-direct-sdk) is 2.26.0 or above. +- The version of [Qualcomm AI Engine Direct SDK](https://developer.qualcomm.com/software/qualcomm-ai-engine-direct-sdk) is 2.28.0 or above. ## Instructions diff --git a/shim/xplat/executorch/backends/qualcomm/qnn_version.bzl b/shim/xplat/executorch/backends/qualcomm/qnn_version.bzl index 75019982af2..5cb801489ed 100644 --- a/shim/xplat/executorch/backends/qualcomm/qnn_version.bzl +++ b/shim/xplat/executorch/backends/qualcomm/qnn_version.bzl @@ -1,2 +1,2 @@ def get_qnn_library_verision(): - return "2.26" + return "2.28"