From caa484ffef11bf776125e28e51c5d5715b442fdf Mon Sep 17 00:00:00 2001 From: chiwwang Date: Wed, 7 Aug 2024 17:23:23 +0800 Subject: [PATCH 1/4] Qualcomm AI Engine Direct -- update documents 1. Updated QNN download links. 2. Unified setup.md and build-run-qualcomm-ai-engine-direct-backend.md 3. Fixed build commands in build-run-qualcomm-ai-engine-direct-backend.md --- backends/qualcomm/README.md | 9 +- backends/qualcomm/scripts/build.sh | 4 +- backends/qualcomm/setup.md | 187 +----------------- ...d-run-qualcomm-ai-engine-direct-backend.md | 131 +++++++----- examples/qualcomm/README.md | 4 +- 5 files changed, 100 insertions(+), 235 deletions(-) diff --git a/backends/qualcomm/README.md b/backends/qualcomm/README.md index 618a1f3e321..8fa87caf899 100644 --- a/backends/qualcomm/README.md +++ b/backends/qualcomm/README.md @@ -1,12 +1,14 @@ # Qualcomm AI Engine Direct Backend Disclaimer: At present, we do not offer any backward compatibility guarantees -for any APIs. We are currently in a pre-alpha development phase, and as such, +for any APIs. We are currently in a development phase, and as such, we reserve the right to modify interfaces and implementations. This backend is implemented on the top of [Qualcomm AI Engine Direct SDK](https://developer.qualcomm.com/software/qualcomm-ai-engine-direct-sdk). -Please follow [tutorial](https://pytorch.org/executorch/stable/build-run-qualcomm-ai-engine-direct-backend.html) to setup environment, build, and run executorch models by this backend (Qualcomm AI Engine Direct is also referred to as QNN in the source and documentation). +Please follow [tutorial](../../docs/source/build-run-qualcomm-ai-engine-direct-backend.md) to setup environment, build, and run executorch models by this backend (Qualcomm AI Engine Direct is also referred to as QNN in the source and documentation). + +A website version of the tutorial is [here](https://pytorch.org/executorch/stable/build-run-qualcomm-ai-engine-direct-backend.html). ## Delegate Options @@ -29,7 +31,7 @@ Add SoC model into QcomChipset enum in [schema](./serialization/schema.fbs) and Insert new SoC information into _soc_info_table in [qnn_compile_spec_schema](./serialization/qnn_compile_spec_schema.py). #### Step 3: Recompile the .pte file -Follow [setup](setup.md) to setup environment and build runtime with new schema header. +Follow [setup](../../docs/source/build-run-qualcomm-ai-engine-direct-backend.md) to setup environment and build runtime with new schema header. ### Supported Inference Type - Quantized @@ -46,6 +48,7 @@ backends/qualcomm ├── partition # QNN Partitioner (AoT Part). ├── passes # Various passes helping lower models to QNN backend (AoT Part). ├── python # Places to put pybind artifacts for accessing QNN APIs, structures, etc (AoT Part). +├── quantizer # QNN Quantizer ├── runtime # Here is QNN runtime responsbile for compiling a model on x64. | | # Meanwhile, this is also the runtime responsbile for executing compiled | | # models on a device. diff --git a/backends/qualcomm/scripts/build.sh b/backends/qualcomm/scripts/build.sh index d6b1da62fcd..9576da8dea1 100755 --- a/backends/qualcomm/scripts/build.sh +++ b/backends/qualcomm/scripts/build.sh @@ -16,7 +16,7 @@ usage() { echo "Usage: Build the aarch64 version of executor runner or the python interface of Qnn Manager" echo "First, you need to set the environment variable for QNN_SDK_ROOT" echo ", and if you want to build the aarch64 version of executor runner" - echo ", you need to set ANDROID_NDK_ROOT" + echo ", you need to export ANDROID_NDK_ROOT=/path/to/android_ndkXX" echo "e.g.: executorch$ ./backends/qualcomm/scripts/build.sh --skip_x86_64" exit 1 } @@ -59,7 +59,7 @@ PRJ_ROOT="$( cd "$(dirname "$0")/../../.." ; pwd -P)" if [ "$BUILD_AARCH64" = true ]; then if [[ -z ${ANDROID_NDK_ROOT} ]]; then - echo "Please export ANDROID_NDK_ROOT=/path/to/android_ndk" + echo "Please export ANDROID_NDK_ROOT=/path/to/android_ndkXX" exit -1 fi BUILD_ROOT=$PRJ_ROOT/$CMAKE_AARCH64 diff --git a/backends/qualcomm/setup.md b/backends/qualcomm/setup.md index b4b0f2ea72d..b3bac83771d 100644 --- a/backends/qualcomm/setup.md +++ b/backends/qualcomm/setup.md @@ -1,189 +1,8 @@ # Setting up QNN Backend -This is a tutorial for building and running Qualcomm AI Engine Direct backend, +Please refer to [Building and Running ExecuTorch with Qualcomm AI Engine Direct Backend](../../docs/source/build-run-qualcomm-ai-engine-direct-backend.md). + +That is a tutorial for building and running Qualcomm AI Engine Direct backend, including compiling a model on a x64 host and running the inference on a Android device. - -## Prerequisite - -Please finish tutorial [Setting up executorch](../../docs/source/getting-started-setup.md). - - -## Conventions - -`$QNN_SDK_ROOT` refers to the root of Qualcomm AI Engine Direct SDK, -i.e., the directory containing `QNN_README.txt`. - -`$ANDROID_NDK_ROOT` refers to the root of Android NDK. - -`$EXECUTORCH_ROOT` refers to the root of executorch git repository. - - -## Environment Setup - -### Download Qualcomm AI Engine Direct SDK - -Navigate to [Qualcomm AI Engine Direct SDK](https://developer.qualcomm.com/software/qualcomm-ai-engine-direct-sdk) and follow the download button. - -You might need to apply for a Qualcomm account to download the SDK. - -After logging in, search Qualcomm AI Stack at the *Tool* panel. -You can find Qualcomm AI Engine Direct SDK under the AI Stack group. - -Please download the Linux version, and follow instructions on the page to -extract the file. - -The SDK should be installed to somewhere `/opt/qcom/aistack/qnn` by default. - -### Download Android NDK - -Please navigate to [Android NDK](https://developer.android.com/ndk) and download -a version of NDK. We recommend LTS version, currently r25c. - -### Setup environment variables - -We need to make sure Qualcomm AI Engine Direct libraries can be found by -the dynamic linker on x64. Hence we set `LD_LIBRARY_PATH`. In production, -we recommend users to put libraries in default search path or use `rpath` -to indicate the location of libraries. - -Further, we set up `$PYTHONPATH` because it's easier to develop and import executorch Python APIs. Users might also build and install executorch package as usual python package. - -```bash -export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/:$LD_LIBRARY_PATH -export PYTHONPATH=$EXECUTORCH_ROOT/.. -``` - -Note: Since we set `PYTHONPATH`, we may have issue with finding `program.fbs` -and `scalar_type.fbs` when we export a model, because they are installed into -`pip-out` directory with the same package name pattern. A workaround is that -we copy `$EXECUTORCH_ROOT/pip-out/lib.linux-x86_64-cpython-310/executorch/exir/_serialize/program.fbs` -and `$EXECUTORCH_ROOT/pip-out/lib.linux-x86_64-cpython-310/executorch/exir/_serialize/scalar_type.fbs` -to `$EXECUTORCH_ROOT/exir/_serialize/`. - - -## End to End Inference - -### Step 1: Build Python APIs for AOT compilation on x64 - -Python APIs on x64 are required to compile models to Qualcomm AI Engine Direct binary. -Make sure `buck2` is under a directory in `PATH`. - -```bash -cd $EXECUTORCH_ROOT -mkdir build_x86_64 -cd build_x86_64 -cmake .. -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=${QNN_SDK_ROOT} -cmake --build . -t "PyQnnManagerAdaptor" "PyQnnWrapperAdaptor" -j8 - -# install Python APIs to correct import path -# The filename might vary depending on your Python and host version. -cp -f backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so $EXECUTORCH_ROOT/backends/qualcomm/python -cp -f backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so $EXECUTORCH_ROOT/backends/qualcomm/python -``` - - -### Step 2: Build `qnn_executor_runner` for Android - -`qnn_executor_runner` is an executable running the compiled model. - -You might want to ensure the correct `flatc`. `flatc` can be built along with the above step. For example, we can find `flatc` in `build_x86_64/third-party/flatbuffers/`. - -We can prepend `$EXECUTORCH_ROOT/build_x86_64/third-party/flatbuffers` to `PATH`. Then below cross-compiling can find the correct flatbuffer compiler. - -Commands to build `qnn_executor_runner` for Android: - -```bash -cd $EXECUTORCH_ROOT -mkdir build_android -cd build_android -# build executorch & qnn_executorch_backend -cmake .. \ - -DCMAKE_INSTALL_PREFIX=$PWD \ - -DEXECUTORCH_BUILD_QNN=ON \ - -DEXECUTORCH_BUILD_SDK=ON \ - -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ - -DQNN_SDK_ROOT=$QNN_SDK_ROOT \ - -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \ - -DANDROID_ABI='arm64-v8a' \ - -DANDROID_NATIVE_API_LEVEL=23 \ - -B$PWD - -cmake --build $PWD -j16 --target install - -cmake ../examples/qualcomm \ - -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \ - -DANDROID_ABI='arm64-v8a' \ - -DANDROID_NATIVE_API_LEVEL=23 \ - -DCMAKE_PREFIX_PATH="$PWD/lib/cmake/ExecuTorch;$PWD/third-party/gflags;" \ - -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \ - -Bexamples/qualcomm - -cmake --build examples/qualcomm -j16 -``` -**Note:** If you want to build for release, add `-DCMAKE_BUILD_TYPE=Release` to the `cmake` command options. - -You can find `qnn_executor_runner` under `build_android/examples/qualcomm/`. - - -### Step 3: Compile a model - -``` -python -m examples.qualcomm.scripts.export_example --model_name mv2 -``` - -Then the generated `mv2.pte` can be run on the device by -`build_android/backends/qualcomm/qnn_executor_runner` with Qualcomm AI Engine -Direct backend. - -[**Note**] To get proper accuracy, please apply calibrations with representative -dataset, which could be learnt more from examples under `examples/qualcomm/`. - - -### Step 4: Model Inference - -The backend rely on Qualcomm AI Engine Direct SDK libraries. - -You might want to follow docs in Qualcomm AI Engine Direct SDK to setup the device environment. -Or see below for a quick setup for testing: - -```bash -# make sure you have write-permission on below path. -DEVICE_DIR=/data/local/tmp/executorch_test/ -adb shell "mkdir -p ${DEVICE_DIR}" -adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtp.so ${DEVICE_DIR} -adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV69Stub.so ${DEVICE_DIR} -adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV73Stub.so ${DEVICE_DIR} -adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV75Stub.so ${DEVICE_DIR} -adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnSystem.so ${DEVICE_DIR} -adb push ${QNN_SDK_ROOT}/lib/hexagon-v69/unsigned/libQnnHtpV69Skel.so ${DEVICE_DIR} -adb push ${QNN_SDK_ROOT}/lib/hexagon-v73/unsigned/libQnnHtpV73Skel.so ${DEVICE_DIR} -adb push ${QNN_SDK_ROOT}/lib/hexagon-v75/unsigned/libQnnHtpV75Skel.so ${DEVICE_DIR} -``` - -We also need to indicate dynamic linkers on Android and Hexagon where to find these libraries -by setting `ADSP_LIBRARY_PATH` and `LD_LIBRARY_PATH`. - -So, we can run `qnn_executor_runner` like -```bash -adb push mv2.pte ${DEVICE_DIR} -adb push ${EXECUTORCH_ROOT}/build_android/examples/qualcomm/qnn_executor_runner ${DEVICE_DIR} -adb shell "cd ${DEVICE_DIR} \ - && export LD_LIBRARY_PATH=${DEVICE_DIR} \ - && export ADSP_LIBRARY_PATH=${DEVICE_DIR} \ - && ./qnn_executor_runner --model_path ./mv2_qnn.pte" -``` - -You should see the following result. -Note that no output file will be generated in this example. -``` -I 00:00:00.133366 executorch:qnn_executor_runner.cpp:156] Method loaded. -I 00:00:00.133590 executorch:util.h:104] input already initialized, refilling. -I 00:00:00.135162 executorch:qnn_executor_runner.cpp:161] Inputs prepared. -I 00:00:00.136768 executorch:qnn_executor_runner.cpp:278] Model executed successfully. -[INFO][Qnn ExecuTorch] Destroy Qnn backend parameters -[INFO][Qnn ExecuTorch] Destroy Qnn context -[INFO][Qnn ExecuTorch] Destroy Qnn device -[INFO][Qnn ExecuTorch] Destroy Qnn backend -``` diff --git a/docs/source/build-run-qualcomm-ai-engine-direct-backend.md b/docs/source/build-run-qualcomm-ai-engine-direct-backend.md index ff5cb51595c..9623a87bcc4 100644 --- a/docs/source/build-run-qualcomm-ai-engine-direct-backend.md +++ b/docs/source/build-run-qualcomm-ai-engine-direct-backend.md @@ -5,6 +5,7 @@ build ExecuTorch for Qualcomm AI Engine Direct and running a model on it. Qualcomm AI Engine Direct is also referred to as QNN in the source and documentation. + ::::{grid} 2 :::{grid-item-card} What you will learn in this tutorial: @@ -35,11 +36,10 @@ Currently, this ExecuTorch Backend can delegate AI computations to Hexagon proce ### Host OS -The Linux host operating system that QNN Backend is verified with is Ubuntu 20.04 LTS x64. - -However, because Qualcomm Package Manager(QPM) used to download necessary SDK (see below) -only support Ubuntu, we recommend users to exercise this tutorial exacly -on Ubuntu 20.04. +The Linux host operating system that QNN Backend is verified with is Ubuntu 22.04 LTS x64 +at the moment of updating this tutorial. +Usually, we verified the backend on the same OS version which QNN is verified with. +The version is documented in QNN SDK. ### Hardware: You will need an Android smartphone with adb-connected running on one of below Qualcomm SoCs: @@ -53,20 +53,18 @@ This example is verified with SM8550 and SM8450. ### Software: - Follow ExecuTorch recommended Python version. - - A compiler to compile AOT parts. GCC 9.4 come with Ubuntu20.04 is verified. - - [Android NDK](https://developer.android.com/ndk). This example is verified with NDK 25c. + - A compiler to compile AOT parts, e.g., the GCC compiler comes with Ubuntu LTS. + - [Android NDK](https://developer.android.com/ndk). This example is verified with NDK 26c. - [Qualcomm AI Engine Direct SDK](https://developer.qualcomm.com/software/qualcomm-ai-engine-direct-sdk) - - Follow the download button. After logging in, search Qualcomm AI Stack at the *Tool* panel. - - You can find Qualcomm AI Engine Direct SDK under the AI Stack group. - - Please download the Linux version, and follow instructions on the page to extract the file. - - The SDK should be installed to somewhere `/opt/qcom/aistack/qnn` by default. - - It's also OK to place it somewhere else. We don't have assumption about the absolute path of the SDK. - - This example is verified with version 2.12.0. + - Click the "Get Software" button to download a version of QNN SDK. + - However, at the moment of updating this tutorial, the above website doesn't provide QNN SDK newer than 2.22.6. + - The below is public links to download various QNN versions. Hope they can be publicly discoverable soon. + - [QNN 2.25.0](https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.25.0.240728.zip) + - [QNN 2.24.0](https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.24.0.240626.zip) + - [QNN 2.23.0](https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.23.0.24.06.24.zip) The directory with installed Qualcomm AI Engine Direct SDK looks like: ``` -$ tree -L 1 /opt/qcom/aistack/qnn// -/opt/qcom/aistack/qnn// ├── benchmarks ├── bin ├── docs @@ -74,11 +72,15 @@ $ tree -L 1 /opt/qcom/aistack/qnn// ├── include ├── lib ├── LICENSE.pdf +├── NOTICE.txt +├── NOTICE_WINDOWS.txt ├── QNN_NOTICE.txt ├── QNN_README.txt ├── QNN_ReleaseNotes.txt -├── share -└── Uninstall +├── ReleaseNotes.txt +├── ReleaseNotesWindows.txt +├── sdk.yaml +└── share ``` @@ -89,7 +91,7 @@ $ tree -L 1 /opt/qcom/aistack/qnn// `$QNN_SDK_ROOT` refers to the root of Qualcomm AI Engine Direct SDK, i.e., the directory containing `QNN_README.txt`. -`$ANDROID_NDK` refers to the root of Android NDK. +`$ANDROID_NDK_ROOT` refers to the root of Android NDK. `$EXECUTORCH_ROOT` refers to the root of executorch git repository. @@ -107,7 +109,16 @@ export PYTHONPATH=$EXECUTORCH_ROOT/.. ## Build -An example script for below building instructions is [here](https://github.com/pytorch/executorch/blob/main/backends/qualcomm/scripts/build.sh). +An example script for the below building instructions is [here](https://github.com/pytorch/executorch/blob/main/backends/qualcomm/scripts/build.sh). +We recommend to use the script because the ExecuTorch build-command can change from time to time. +The above script is actively used. It is updated more frquently than this tutorial. +An example usage is +```bash +cd $EXECUTORCH_ROOT +./backends/qualcomm/scripts/build.sh +# or +./backends/qualcomm/scripts/build.sh --release +``` ### AOT (Ahead-of-time) components: @@ -115,14 +126,21 @@ Python APIs on x64 are required to compile models to Qualcomm AI Engine Direct b ```bash cd $EXECUTORCH_ROOT -# Workaround for fbs files in exir/_serialize -cp schema/program.fbs exir/_serialize/program.fbs -cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs - mkdir build_x86_64 cd build_x86_64 -cmake .. -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=${QNN_SDK_ROOT} -cmake --build . -t "PyQnnManagerAdaptor" "PyQnnWrapperAdaptor" -j8 +# Note that the below command might change. +# Please refer to the above build.sh for latest workable commands. +cmake .. \ + -DCMAKE_INSTALL_PREFIX=$PWD \ + -DEXECUTORCH_BUILD_QNN=ON \ + -DQNN_SDK_ROOT=${QNN_SDK_ROOT} \ + -DEXECUTORCH_BUILD_SDK=ON \ + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ + -DPYTHON_EXECUTABLE=python3 \ + -DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=OFF + +cmake --build $PWD -t "PyQnnManagerAdaptor" "PyQnnWrapperAdaptor" -j8 # install Python APIs to correct import path # The filename might vary depending on your Python and host version. @@ -143,43 +161,50 @@ cd build_android # build executorch & qnn_executorch_backend cmake .. \ -DCMAKE_INSTALL_PREFIX=$PWD \ - -DEXECUTORCH_BUILD_SDK=ON \ -DEXECUTORCH_BUILD_QNN=ON \ - -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ -DQNN_SDK_ROOT=$QNN_SDK_ROOT \ - -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \ + -DEXECUTORCH_BUILD_SDK=ON \ + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ + -DPYTHON_EXECUTABLE=python3 \ + -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \ -DANDROID_ABI='arm64-v8a' \ - -DANDROID_NATIVE_API_LEVEL=23 \ - -B$PWD + -DANDROID_NATIVE_API_LEVEL=23 cmake --build $PWD -j16 --target install cmake ../examples/qualcomm \ - -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \ + -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \ -DANDROID_ABI='arm64-v8a' \ -DANDROID_NATIVE_API_LEVEL=23 \ -DCMAKE_PREFIX_PATH="$PWD/lib/cmake/ExecuTorch;$PWD/third-party/gflags;" \ -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \ + -DPYTHON_EXECUTABLE=python3 \ -Bexamples/qualcomm cmake --build examples/qualcomm -j16 + +# qnn_executor_runner can be found under examples/qualcomm +# The full path is $EXECUTORCH_ROOT/build_android/examples/qualcomm/qnn_executor_runner +ls examples/qualcomm ``` **Note:** If you want to build for release, add `-DCMAKE_BUILD_TYPE=Release` to the `cmake` command options. -You can find `qnn_executor_runner` under `build_android/examples/qualcomm/`. - -The build script is also available [here](https://github.com/pytorch/executorch/blob/main/backends/qualcomm/scripts/build.sh). ## Deploying and running on device ### AOT compile a model -You can refer to [this script](https://github.com/pytorch/executorch/blob/main/examples/qualcomm/scripts/deeplab_v3.py) for the exact flow. +Refer to [this script](https://github.com/pytorch/executorch/blob/main/examples/qualcomm/scripts/deeplab_v3.py) for the exact flow. We use deeplab-v3-resnet101 as an example in this tutorial. Run below commands to compile: -``` +```bash cd $EXECUTORCH_ROOT +# Workaround for fbs files in exir/_serialize +cp schema/program.fbs exir/_serialize/program.fbs +cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs + python -m examples.qualcomm.scripts.deeplab_v3 -b build_android -m SM8550 --compile_only --download ``` @@ -212,11 +237,13 @@ The compiled model is `./deeplab_v3/dlv3_qnn.pte`. DEVICE_DIR=/data/local/tmp/executorch_qualcomm_tutorial/ adb shell "mkdir -p ${DEVICE_DIR}" adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtp.so ${DEVICE_DIR} +adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnSystem.so ${DEVICE_DIR} adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV69Stub.so ${DEVICE_DIR} adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV73Stub.so ${DEVICE_DIR} -adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnSystem.so ${DEVICE_DIR} +adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV75Stub.so ${DEVICE_DIR} adb push ${QNN_SDK_ROOT}/lib/hexagon-v69/unsigned/libQnnHtpV69Skel.so ${DEVICE_DIR} adb push ${QNN_SDK_ROOT}/lib/hexagon-v73/unsigned/libQnnHtpV73Skel.so ${DEVICE_DIR} +adb push ${QNN_SDK_ROOT}/lib/hexagon-v75/unsigned/libQnnHtpV75Skel.so ${DEVICE_DIR} ``` ***Step 2***. We also need to indicate dynamic linkers on Android and Hexagon @@ -236,12 +263,28 @@ adb shell "cd ${DEVICE_DIR} \ You should see something like below: ``` -I 00:00:01.835706 executorch:qnn_executor_runner.cpp:298] 100 inference took 1096.626000 ms, avg 10.966260 ms -[INFO][Qnn ExecuTorch] Destroy Qnn backend parameters -[INFO][Qnn ExecuTorch] Destroy Qnn context -[INFO][Qnn ExecuTorch] Destroy Qnn device -[INFO][Qnn ExecuTorch] Destroy Qnn backend +I 00:00:00.257354 executorch:qnn_executor_runner.cpp:213] Method loaded. +I 00:00:00.323502 executorch:qnn_executor_runner.cpp:262] ignoring error from set_output_data_ptr(): 0x2 +I 00:00:00.357496 executorch:qnn_executor_runner.cpp:262] ignoring error from set_output_data_ptr(): 0x2 +I 00:00:00.357555 executorch:qnn_executor_runner.cpp:265] Inputs prepared. +I 00:00:00.364824 executorch:qnn_executor_runner.cpp:414] Model executed successfully. +I 00:00:00.364875 executorch:qnn_executor_runner.cpp:425] Write etdump to etdump.etdp, Size = 424 +[INFO] [Qnn ExecuTorch]: Destroy Qnn backend parameters +[INFO] [Qnn ExecuTorch]: Destroy Qnn context +[INFO] [Qnn ExecuTorch]: Destroy Qnn backend +``` + +The model is merely executed. If we want to feed real inputs and get model outputs, we can use +```bash +cd +python -m examples.qualcomm.scripts.deeplab_v3 -b build_android -m SM8550 --download -s ``` +The `` can be found by `adb devices` command. + +After the above command, pre-processed inputs and outputs are put in `$EXECUTORCH_ROOT/deeplab_v3` and `$EXECUTORCH_ROOT/deeplab_v3/outputs` folder. + +The command-line arguents are written in [utils.py](https://github.com/pytorch/executorch/blob/main/examples/qualcomm/scripts/utils.py#L127). +The model, inputs, and output location are passed to `qnn_executorch_runner` by `--model_path`, `--input_list_path`, and `--output_folder_path`. ### Running a model via ExecuTorch's android demo-app @@ -252,8 +295,8 @@ An Android demo-app using Qualcomm AI Engine Direct Backend can be found in ## What is coming? - - [An example using quantized mobilebert](https://github.com/pytorch/executorch/pull/1043) to solve multi-class text classification. - - More Qualcomm AI Engine Direct accelerators, e.g., GPU. + - [llama2 and llama3](https://github.com/pytorch/executorch/pull/4030). Note that at the moment of writing, we still suffer from the quantization issue in llama2-7B and llama3-8B cases. Only storiesllama works well. + - We will support pre-compiled binaries from [Qualcomm AI Hub](https://aihub.qualcomm.com/). ## FAQ diff --git a/examples/qualcomm/README.md b/examples/qualcomm/README.md index d41ad80ecfb..e2ae7406cd0 100644 --- a/examples/qualcomm/README.md +++ b/examples/qualcomm/README.md @@ -10,13 +10,13 @@ Here are some general information and limitations. Please finish tutorial [Setting up executorch](https://pytorch.org/executorch/stable/getting-started-setup). -Please finish [setup QNN backend](../../backends/qualcomm/setup.md). +Please finish [setup QNN backend](../../docs/source/build-run-qualcomm-ai-engine-direct-backend.md). ## Environment Please set up `QNN_SDK_ROOT` environment variable. Note that this version should be exactly same as building QNN backend. -Please check [setup](../../backends/qualcomm/setup.md). +Please check [setup](../../docs/source/build-run-qualcomm-ai-engine-direct-backend.md). Please set up `LD_LIBRARY_PATH` to `$QNN_SDK_ROOT/lib/x86_64-linux-clang`. Or, you could put QNN libraries to default search path of the dynamic linker. From 0d1443eed11faedc4bd4ebd8df04550bc62fa7c3 Mon Sep 17 00:00:00 2001 From: chiwwang Date: Thu, 8 Aug 2024 16:17:23 +0800 Subject: [PATCH 2/4] Address feedback. Add emulator section. Add supported model list --- backends/qualcomm/scripts/build.sh | 4 +- backends/qualcomm/setup.md | 1 - backends/qualcomm/tests/utils.py | 2 +- ...d-run-qualcomm-ai-engine-direct-backend.md | 77 ++++++++++++++++--- examples/qualcomm/README.md | 4 +- examples/qualcomm/llama2/README.md | 4 +- examples/qualcomm/scripts/utils.py | 2 +- 7 files changed, 76 insertions(+), 18 deletions(-) diff --git a/backends/qualcomm/scripts/build.sh b/backends/qualcomm/scripts/build.sh index 9576da8dea1..fc862f56c9f 100755 --- a/backends/qualcomm/scripts/build.sh +++ b/backends/qualcomm/scripts/build.sh @@ -25,9 +25,9 @@ usage() { [ "$1" = -h ] && usage BUILD_X86_64="true" -CMAKE_X86_64="build_x86_64" +CMAKE_X86_64="cmake-out" BUILD_AARCH64="true" -CMAKE_AARCH64="build_android" +CMAKE_AARCH64="cmake-out-android" CLEAN="true" BUILD_TYPE="Debug" diff --git a/backends/qualcomm/setup.md b/backends/qualcomm/setup.md index b3bac83771d..37d8e04c210 100644 --- a/backends/qualcomm/setup.md +++ b/backends/qualcomm/setup.md @@ -5,4 +5,3 @@ Please refer to [Building and Running ExecuTorch with Qualcomm AI Engine Direct That is a tutorial for building and running Qualcomm AI Engine Direct backend, including compiling a model on a x64 host and running the inference on a Android device. - diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py index ef0ac0f202f..b6e7d3540ca 100644 --- a/backends/qualcomm/tests/utils.py +++ b/backends/qualcomm/tests/utils.py @@ -231,7 +231,7 @@ def validate_profile(): qnn_sdk = os.environ.get("QNN_SDK_ROOT", None) assert qnn_sdk, "QNN_SDK_ROOT was not found in environment variable" - build_path = "build_x86_64" + build_path = "cmake-out" cmds = [ # export LD_LIBRARY_PATH to QNN_SDK_ROOT f"export LD_LIBRARY_PATH={qnn_sdk}/lib/{target}/:{self.executorch_root}/{build_path}/lib && " diff --git a/docs/source/build-run-qualcomm-ai-engine-direct-backend.md b/docs/source/build-run-qualcomm-ai-engine-direct-backend.md index 9623a87bcc4..3d0a67153e3 100644 --- a/docs/source/build-run-qualcomm-ai-engine-direct-backend.md +++ b/docs/source/build-run-qualcomm-ai-engine-direct-backend.md @@ -126,8 +126,8 @@ Python APIs on x64 are required to compile models to Qualcomm AI Engine Direct b ```bash cd $EXECUTORCH_ROOT -mkdir build_x86_64 -cd build_x86_64 +mkdir cmake-out +cd cmake-out # Note that the below command might change. # Please refer to the above build.sh for latest workable commands. cmake .. \ @@ -140,7 +140,9 @@ cmake .. \ -DPYTHON_EXECUTABLE=python3 \ -DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=OFF -cmake --build $PWD -t "PyQnnManagerAdaptor" "PyQnnWrapperAdaptor" -j8 +# nproc is used to detect the number of available CPU. +# If it is not applicable, please feel free to use the number you want. +cmake --build $PWD --target "PyQnnManagerAdaptor" "PyQnnWrapperAdaptor" -j$(nproc) # install Python APIs to correct import path # The filename might vary depending on your Python and host version. @@ -156,8 +158,8 @@ Commands to build `qnn_executor_runner` for Android: ```bash cd $EXECUTORCH_ROOT -mkdir build_android -cd build_android +mkdir cmake-out-android +cd cmake-out-android # build executorch & qnn_executorch_backend cmake .. \ -DCMAKE_INSTALL_PREFIX=$PWD \ @@ -171,7 +173,9 @@ cmake .. \ -DANDROID_ABI='arm64-v8a' \ -DANDROID_NATIVE_API_LEVEL=23 -cmake --build $PWD -j16 --target install +# nproc is used to detect the number of available CPU. +# If it is not applicable, please feel free to use the number you want. +cmake --build $PWD --target install -j$(nproc) cmake ../examples/qualcomm \ -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \ @@ -182,10 +186,10 @@ cmake ../examples/qualcomm \ -DPYTHON_EXECUTABLE=python3 \ -Bexamples/qualcomm -cmake --build examples/qualcomm -j16 +cmake --build examples/qualcomm -j$(nproc) # qnn_executor_runner can be found under examples/qualcomm -# The full path is $EXECUTORCH_ROOT/build_android/examples/qualcomm/qnn_executor_runner +# The full path is $EXECUTORCH_ROOT/cmake-out-android/examples/qualcomm/qnn_executor_runner ls examples/qualcomm ``` @@ -228,6 +232,58 @@ output output output ([getitem_ The compiled model is `./deeplab_v3/dlv3_qnn.pte`. +### Test model inference on QNN HTP emulator + +We can test model inferences before deploying it to a device by HTP emulator. + +Let's build `qnn_executor_runner` for a x64 host: +```bash +# assuming the AOT component is built. +cd $EXECUTORCH_ROOT/cmake-out +cmake ../examples/qualcomm \ + -DCMAKE_PREFIX_PATH="$PWD/lib/cmake/ExecuTorch;$PWD/third-party/gflags;" \ + -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \ + -DPYTHON_EXECUTABLE=python3 \ + -Bexamples/qualcomm + +cmake --build examples/qualcomm -j$(nproc) + +# qnn_executor_runner can be found under examples/qualcomm +# The full path is $EXECUTORCH_ROOT/cmake-out/examples/qualcomm/qnn_executor_runner +ls examples/qualcomm/ +``` + +To run the HTP emulator, the dynamic linker need to access QNN libraries and `libqnn_executorch_backend.so`. +We set the below two paths to `LD_LIBRARY_PATH` environment variable: + 1. `$QNN_SDK_ROOT/lib/x86_64-linux-clang/` + 2. `$EXECUTORCH_ROOT/cmake-out/lib/` + +The first path is for QNN libraries including HTP emulator. It has been configured in the AOT compilation section. + +The second path is for `libqnn_executorch_backend.so`. + +So, we can run `./deeplab_v3/dlv3_qnn.pte` by: +```bash +cd $EXECUTORCH_ROOT/cmake-out +export LD_LIBRARY_PATH=$EXECUTORCH_ROOT/cmake-out/lib/:$LD_LIBRARY_PATH +examples/qualcomm/qnn_executor_runner --model_path ../deeplab_v3/dlv3_qnn.pte +``` + +We should see some outputs like the below. Note that the emulator can take some time to finish. +```bash +I 00:00:00.354662 executorch:qnn_executor_runner.cpp:213] Method loaded. +I 00:00:00.356460 executorch:qnn_executor_runner.cpp:261] ignoring error from set_output_data_ptr(): 0x2 +I 00:00:00.357991 executorch:qnn_executor_runner.cpp:261] ignoring error from set_output_data_ptr(): 0x2 +I 00:00:00.357996 executorch:qnn_executor_runner.cpp:265] Inputs prepared. + +I 00:01:09.328144 executorch:qnn_executor_runner.cpp:414] Model executed successfully. +I 00:01:09.328159 executorch:qnn_executor_runner.cpp:421] Write etdump to etdump.etdp, Size = 424 +[INFO] [Qnn ExecuTorch]: Destroy Qnn backend parameters +[INFO] [Qnn ExecuTorch]: Destroy Qnn context +[INFO] [Qnn ExecuTorch]: Destroy Qnn device +[INFO] [Qnn ExecuTorch]: Destroy Qnn backend +``` + ### Run model inference on an Android smartphone with Qualcomm SoCs ***Step 1***. We need to push required QNN libraries to the device. @@ -276,7 +332,7 @@ I 00:00:00.364875 executorch:qnn_executor_runner.cpp:425] Write etdump to etdump The model is merely executed. If we want to feed real inputs and get model outputs, we can use ```bash -cd +cd $EXECUTORCH_ROOT python -m examples.qualcomm.scripts.deeplab_v3 -b build_android -m SM8550 --download -s ``` The `` can be found by `adb devices` command. @@ -292,6 +348,9 @@ The model, inputs, and output location are passed to `qnn_executorch_runner` by An Android demo-app using Qualcomm AI Engine Direct Backend can be found in `examples`. Please refer to android demo app [tutorial](https://pytorch.org/executorch/stable/demo-apps-android.html). +## Supported model list + +Please refer to `$EXECUTORCH_ROOT/examples/qualcomm/scripts/` and `EXECUTORCH_ROOT/examples/qualcomm/oss_scripts/` to the list of supported models. ## What is coming? diff --git a/examples/qualcomm/README.md b/examples/qualcomm/README.md index e2ae7406cd0..dd5c614fd75 100644 --- a/examples/qualcomm/README.md +++ b/examples/qualcomm/README.md @@ -39,12 +39,12 @@ cd $EXECUTORCH_ROOT/examples/qualcomm/scripts #### For MobileNet_v2 ```bash -python mobilenet_v2.py -s -m "SM8550" -b path/to/build_android/ -d /path/to/imagenet-mini/val +python mobilenet_v2.py -s -m "SM8550" -b path/to/cmake-out-android/ -d /path/to/imagenet-mini/val ``` #### For DeepLab_v3 ```bash -python deeplab_v3.py -s -m "SM8550" -b path/to/build_android/ --download +python deeplab_v3.py -s -m "SM8550" -b path/to/cmake-out-android/ --download ``` ## Additional Dependency diff --git a/examples/qualcomm/llama2/README.md b/examples/qualcomm/llama2/README.md index 4670f742514..2186d98745d 100644 --- a/examples/qualcomm/llama2/README.md +++ b/examples/qualcomm/llama2/README.md @@ -34,7 +34,7 @@ echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": Default example generates the story based on the given prompt, "Once". ```bash # 16a4w quant: -python examples/qualcomm/llama2/llama.py -a ${ARTIFACTS} -b build_android -s ${SERIAL_NUM} -m ${SOC_MODEL} --ptq 16a4w --checkpoint stories110M --params params.json --tokenizer_model tokenizer.model --tokenizer_bin tokenizer.bin --prompt "Once" +python examples/qualcomm/llama2/llama.py -a ${ARTIFACTS} -b cmake-out-android -s ${SERIAL_NUM} -m ${SOC_MODEL} --ptq 16a4w --checkpoint stories110M --params params.json --tokenizer_model tokenizer.model --tokenizer_bin tokenizer.bin --prompt "Once" ``` #### (Note) Customized PTQ data set @@ -63,5 +63,5 @@ python -m examples.models.llama2.tokenizer.tokenizer -t tokenizer.model -o token #### Step3: Run default examples ```bash # AIHUB_CONTEXT_BINARIES: ${PATH_TO_AIHUB_WORKSPACE}/build/llama_v2_7b_chat_quantized -python examples/qualcomm/llama2/llama_qaihub.py -a ${ARTIFACTS} -b build_android -s ${SERIAL_NUM} -m ${SOC_MODEL} --context_binaries ${AIHUB_CONTEXT_BINARIES} --tokenizer_bin tokenizer.bin --prompt "What is Python?" +python examples/qualcomm/llama2/llama_qaihub.py -a ${ARTIFACTS} -b cmake-out-android -s ${SERIAL_NUM} -m ${SOC_MODEL} --context_binaries ${AIHUB_CONTEXT_BINARIES} --tokenizer_bin tokenizer.bin --prompt "What is Python?" ``` diff --git a/examples/qualcomm/scripts/utils.py b/examples/qualcomm/scripts/utils.py index 8211dc45810..9c33cf4445b 100755 --- a/examples/qualcomm/scripts/utils.py +++ b/examples/qualcomm/scripts/utils.py @@ -336,7 +336,7 @@ def setup_common_args_and_variables(): parser.add_argument( "-b", "--build_folder", - help="path to cmake binary directory for android, e.g., /path/to/build_android", + help="path to cmake binary directory for android, e.g., /path/to/cmake-out-android", type=str, required=True, ) From c899736758d5c0f19f0027d78128fc9cc69665dd Mon Sep 17 00:00:00 2001 From: chiwwang Date: Fri, 9 Aug 2024 11:49:04 +0800 Subject: [PATCH 3/4] Refine build_folder and subprocess.run() in x64 UT --- backends/qualcomm/tests/utils.py | 44 ++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py index b6e7d3540ca..d069a4506b0 100644 --- a/backends/qualcomm/tests/utils.py +++ b/backends/qualcomm/tests/utils.py @@ -231,25 +231,43 @@ def validate_profile(): qnn_sdk = os.environ.get("QNN_SDK_ROOT", None) assert qnn_sdk, "QNN_SDK_ROOT was not found in environment variable" - build_path = "cmake-out" - cmds = [ - # export LD_LIBRARY_PATH to QNN_SDK_ROOT - f"export LD_LIBRARY_PATH={qnn_sdk}/lib/{target}/:{self.executorch_root}/{build_path}/lib && " + build_folder = self.build_folder + if os.path.isabs(self.build_folder): + # obey user's opinion + pass + else: + # ok, assuming the user give a relative path to cwd + build_folder = os.path.join(os.getcwd(), self.build_folder) + + cmd = [ # qnn_executor_runner - f"{self.executorch_root}/{build_path}/examples/qualcomm/qnn_executor_runner", - f"--model_path {pte_fname}", - f"--input_list_path {tmp_dir}/input_list.txt", - f"--output_folder_path {output_dir}", + f"{build_folder}/examples/qualcomm/qnn_executor_runner", + "--model_path", + f"{pte_fname}", + "--input_list_path", + f"{tmp_dir}/input_list.txt", + "--output_folder_path", + f"{output_dir}", ] - subprocess.run( - " ".join(cmds), - shell=True, - executable="/bin/bash", - capture_output=True, + env = dict(os.environ) + env["LD_LIBRARY_PATH"] = f"{qnn_sdk}/lib/{target}/:{build_folder}/lib" + proc = subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + env=env, cwd=tmp_dir, ) + self.assertEqual( + proc.returncode, + 0, + f"The process running qnn_executorch_runner reutrn {proc.returncode}, " + "STDOUT=\n" + f"{proc.stdout.decode('utf-8')}", + ) + # Verify the outputs post_process() self._assert_outputs_equal(outputs, ref_outputs) From dbb452f8f0a383bc6c918742ac168f604ff03419 Mon Sep 17 00:00:00 2001 From: chiwwang Date: Fri, 9 Aug 2024 13:11:11 +0800 Subject: [PATCH 4/4] oops, fix typo --- backends/qualcomm/tests/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py index d069a4506b0..7a7c6ee3d93 100644 --- a/backends/qualcomm/tests/utils.py +++ b/backends/qualcomm/tests/utils.py @@ -263,7 +263,7 @@ def validate_profile(): self.assertEqual( proc.returncode, 0, - f"The process running qnn_executorch_runner reutrn {proc.returncode}, " + f"The process running qnn_executorch_runner return {proc.returncode}, " "STDOUT=\n" f"{proc.stdout.decode('utf-8')}", )