From caa484ffef11bf776125e28e51c5d5715b442fdf Mon Sep 17 00:00:00 2001
From: chiwwang <quic_chiwwang@quicinc.com>
Date: Wed, 7 Aug 2024 17:23:23 +0800
Subject: [PATCH 1/4] Qualcomm AI Engine Direct -- update documents

1. Updated QNN download links.
2. Unified setup.md and build-run-qualcomm-ai-engine-direct-backend.md
3. Fixed build commands in
   build-run-qualcomm-ai-engine-direct-backend.md
---
 backends/qualcomm/README.md                   |   9 +-
 backends/qualcomm/scripts/build.sh            |   4 +-
 backends/qualcomm/setup.md                    | 187 +-----------------
 ...d-run-qualcomm-ai-engine-direct-backend.md | 131 +++++++-----
 examples/qualcomm/README.md                   |   4 +-
 5 files changed, 100 insertions(+), 235 deletions(-)

diff --git a/backends/qualcomm/README.md b/backends/qualcomm/README.md
index 618a1f3e321..8fa87caf899 100644
--- a/backends/qualcomm/README.md
+++ b/backends/qualcomm/README.md
@@ -1,12 +1,14 @@
 # Qualcomm AI Engine Direct Backend
 
 Disclaimer: At present, we do not offer any backward compatibility guarantees
-for any APIs. We are currently in a pre-alpha development phase, and as such,
+for any APIs. We are currently in a development phase, and as such,
 we reserve the right to modify interfaces and implementations.
 
 This backend is implemented on the top of
 [Qualcomm AI Engine Direct SDK](https://developer.qualcomm.com/software/qualcomm-ai-engine-direct-sdk).
-Please follow [tutorial](https://pytorch.org/executorch/stable/build-run-qualcomm-ai-engine-direct-backend.html) to setup environment, build, and run executorch models by this backend (Qualcomm AI Engine Direct is also referred to as QNN in the source and documentation).
+Please follow [tutorial](../../docs/source/build-run-qualcomm-ai-engine-direct-backend.md) to setup environment, build, and run executorch models by this backend (Qualcomm AI Engine Direct is also referred to as QNN in the source and documentation).
+
+A website version of the tutorial is [here](https://pytorch.org/executorch/stable/build-run-qualcomm-ai-engine-direct-backend.html).
 
 ## Delegate Options
 
@@ -29,7 +31,7 @@ Add SoC model into QcomChipset enum in [schema](./serialization/schema.fbs) and
 Insert new SoC information into _soc_info_table in [qnn_compile_spec_schema](./serialization/qnn_compile_spec_schema.py).
 
 #### Step 3: Recompile the .pte file
-Follow [setup](setup.md) to setup environment and build runtime with new schema header.
+Follow [setup](../../docs/source/build-run-qualcomm-ai-engine-direct-backend.md) to setup environment and build runtime with new schema header.
 
 ### Supported Inference Type
 - Quantized
@@ -46,6 +48,7 @@ backends/qualcomm
 ├── partition # QNN Partitioner (AoT Part).
 ├── passes # Various passes helping lower models to QNN backend (AoT Part).
 ├── python # Places to put pybind artifacts for accessing QNN APIs, structures, etc (AoT Part).
+├── quantizer # QNN Quantizer
 ├── runtime # Here is QNN runtime responsbile for compiling a model on x64.
 |   |       # Meanwhile, this is also the runtime responsbile for executing compiled
 |   |       # models on a device.
diff --git a/backends/qualcomm/scripts/build.sh b/backends/qualcomm/scripts/build.sh
index d6b1da62fcd..9576da8dea1 100755
--- a/backends/qualcomm/scripts/build.sh
+++ b/backends/qualcomm/scripts/build.sh
@@ -16,7 +16,7 @@ usage() {
   echo "Usage: Build the aarch64 version of executor runner or the python interface of Qnn Manager"
   echo "First, you need to set the environment variable for QNN_SDK_ROOT"
   echo ", and if you want to build the aarch64 version of executor runner"
-  echo ", you need to set ANDROID_NDK_ROOT"
+  echo ", you need to export ANDROID_NDK_ROOT=/path/to/android_ndkXX"
   echo "e.g.: executorch$ ./backends/qualcomm/scripts/build.sh --skip_x86_64"
   exit 1
 }
@@ -59,7 +59,7 @@ PRJ_ROOT="$( cd "$(dirname "$0")/../../.." ; pwd -P)"
 
 if [ "$BUILD_AARCH64" = true ]; then
     if [[ -z ${ANDROID_NDK_ROOT} ]]; then
-        echo "Please export ANDROID_NDK_ROOT=/path/to/android_ndk"
+        echo "Please export ANDROID_NDK_ROOT=/path/to/android_ndkXX"
         exit -1
     fi
     BUILD_ROOT=$PRJ_ROOT/$CMAKE_AARCH64
diff --git a/backends/qualcomm/setup.md b/backends/qualcomm/setup.md
index b4b0f2ea72d..b3bac83771d 100644
--- a/backends/qualcomm/setup.md
+++ b/backends/qualcomm/setup.md
@@ -1,189 +1,8 @@
 # Setting up QNN Backend
 
-This is a tutorial for building and running Qualcomm AI Engine Direct backend,
+Please refer to [Building and Running ExecuTorch with Qualcomm AI Engine Direct Backend](../../docs/source/build-run-qualcomm-ai-engine-direct-backend.md).
+
+That is a tutorial for building and running Qualcomm AI Engine Direct backend,
 including compiling a model on a x64 host and running the inference
 on a Android device.
 
-
-## Prerequisite
-
-Please finish tutorial [Setting up executorch](../../docs/source/getting-started-setup.md).
-
-
-## Conventions
-
-`$QNN_SDK_ROOT` refers to the root of Qualcomm AI Engine Direct SDK,
-i.e., the directory containing `QNN_README.txt`.
-
-`$ANDROID_NDK_ROOT` refers to the root of Android NDK.
-
-`$EXECUTORCH_ROOT` refers to the root of executorch git repository.
-
-
-## Environment Setup
-
-### Download Qualcomm AI Engine Direct SDK
-
-Navigate to [Qualcomm AI Engine Direct SDK](https://developer.qualcomm.com/software/qualcomm-ai-engine-direct-sdk) and follow the download button.
-
-You might need to apply for a Qualcomm account to download the SDK.
-
-After logging in, search Qualcomm AI Stack at the *Tool* panel.
-You can find Qualcomm AI Engine Direct SDK under the AI Stack group.
-
-Please download the Linux version, and follow instructions on the page to
-extract the file.
-
-The SDK should be installed to somewhere `/opt/qcom/aistack/qnn` by default.
-
-### Download Android NDK
-
-Please navigate to [Android NDK](https://developer.android.com/ndk) and download
-a version of NDK. We recommend LTS version, currently r25c.
-
-### Setup environment variables
-
-We need to make sure Qualcomm AI Engine Direct libraries can be found by
-the dynamic linker on x64. Hence we set `LD_LIBRARY_PATH`. In production,
-we recommend users to put libraries in default search path or use `rpath`
-to indicate the location of libraries.
-
-Further, we set up `$PYTHONPATH` because it's easier to develop and import executorch Python APIs. Users might also build and install executorch package as usual python package.
-
-```bash
-export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/:$LD_LIBRARY_PATH
-export PYTHONPATH=$EXECUTORCH_ROOT/..
-```
-
-Note: Since we set `PYTHONPATH`, we may have issue with finding `program.fbs`
-and `scalar_type.fbs` when we export a model, because they are installed into
-`pip-out` directory with the same package name pattern. A workaround is that
-we copy `$EXECUTORCH_ROOT/pip-out/lib.linux-x86_64-cpython-310/executorch/exir/_serialize/program.fbs`
-and `$EXECUTORCH_ROOT/pip-out/lib.linux-x86_64-cpython-310/executorch/exir/_serialize/scalar_type.fbs`
-to `$EXECUTORCH_ROOT/exir/_serialize/`.
-
-
-## End to End Inference
-
-### Step 1: Build Python APIs for AOT compilation on x64
-
-Python APIs on x64 are required to compile models to Qualcomm AI Engine Direct binary.
-Make sure `buck2` is under a directory in `PATH`.
-
-```bash
-cd $EXECUTORCH_ROOT
-mkdir build_x86_64
-cd build_x86_64
-cmake .. -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=${QNN_SDK_ROOT}
-cmake --build . -t "PyQnnManagerAdaptor" "PyQnnWrapperAdaptor" -j8
-
-# install Python APIs to correct import path
-# The filename might vary depending on your Python and host version.
-cp -f backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so $EXECUTORCH_ROOT/backends/qualcomm/python
-cp -f backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so $EXECUTORCH_ROOT/backends/qualcomm/python
-```
-
-
-### Step 2: Build `qnn_executor_runner` for Android
-
-`qnn_executor_runner` is an executable running the compiled model.
-
-You might want to ensure the correct `flatc`. `flatc` can be built along with the above step. For example, we can find `flatc` in `build_x86_64/third-party/flatbuffers/`.
-
-We can prepend `$EXECUTORCH_ROOT/build_x86_64/third-party/flatbuffers` to `PATH`. Then below cross-compiling can find the correct flatbuffer compiler.
-
-Commands to build `qnn_executor_runner` for Android:
-
-```bash
-cd $EXECUTORCH_ROOT
-mkdir build_android
-cd build_android
-# build executorch & qnn_executorch_backend
-cmake .. \
-    -DCMAKE_INSTALL_PREFIX=$PWD \
-    -DEXECUTORCH_BUILD_QNN=ON \
-    -DEXECUTORCH_BUILD_SDK=ON \
-    -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
-    -DQNN_SDK_ROOT=$QNN_SDK_ROOT \
-    -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \
-    -DANDROID_ABI='arm64-v8a' \
-    -DANDROID_NATIVE_API_LEVEL=23 \
-    -B$PWD
-
-cmake --build $PWD -j16 --target install
-
-cmake ../examples/qualcomm \
-    -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \
-    -DANDROID_ABI='arm64-v8a' \
-    -DANDROID_NATIVE_API_LEVEL=23 \
-    -DCMAKE_PREFIX_PATH="$PWD/lib/cmake/ExecuTorch;$PWD/third-party/gflags;" \
-    -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \
-    -Bexamples/qualcomm
-
-cmake --build examples/qualcomm -j16
-```
-**Note:** If you want to build for release, add `-DCMAKE_BUILD_TYPE=Release` to the `cmake` command options.
-
-You can find `qnn_executor_runner` under `build_android/examples/qualcomm/`.
-
-
-### Step 3: Compile a model
-
-```
-python -m examples.qualcomm.scripts.export_example --model_name mv2
-```
-
-Then the generated `mv2.pte` can be run on the device by
-`build_android/backends/qualcomm/qnn_executor_runner` with Qualcomm AI Engine
-Direct backend.
-
-[**Note**] To get proper accuracy, please apply calibrations with representative
-dataset, which could be learnt more from examples under `examples/qualcomm/`.
-
-
-### Step 4: Model Inference
-
-The backend rely on Qualcomm AI Engine Direct SDK libraries.
-
-You might want to follow docs in Qualcomm AI Engine Direct SDK to setup the device environment.
-Or see below for a quick setup for testing:
-
-```bash
-# make sure you have write-permission on below path.
-DEVICE_DIR=/data/local/tmp/executorch_test/
-adb shell "mkdir -p ${DEVICE_DIR}"
-adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtp.so ${DEVICE_DIR}
-adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV69Stub.so ${DEVICE_DIR}
-adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV73Stub.so ${DEVICE_DIR}
-adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV75Stub.so ${DEVICE_DIR}
-adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnSystem.so ${DEVICE_DIR}
-adb push ${QNN_SDK_ROOT}/lib/hexagon-v69/unsigned/libQnnHtpV69Skel.so ${DEVICE_DIR}
-adb push ${QNN_SDK_ROOT}/lib/hexagon-v73/unsigned/libQnnHtpV73Skel.so ${DEVICE_DIR}
-adb push ${QNN_SDK_ROOT}/lib/hexagon-v75/unsigned/libQnnHtpV75Skel.so ${DEVICE_DIR}
-```
-
-We also need to indicate dynamic linkers on Android and Hexagon where to find these libraries
-by setting `ADSP_LIBRARY_PATH` and `LD_LIBRARY_PATH`.
-
-So, we can run `qnn_executor_runner` like
-```bash
-adb push mv2.pte ${DEVICE_DIR}
-adb push ${EXECUTORCH_ROOT}/build_android/examples/qualcomm/qnn_executor_runner ${DEVICE_DIR}
-adb shell "cd ${DEVICE_DIR} \
-           && export LD_LIBRARY_PATH=${DEVICE_DIR} \
-           && export ADSP_LIBRARY_PATH=${DEVICE_DIR} \
-           && ./qnn_executor_runner --model_path ./mv2_qnn.pte"
-```
-
-You should see the following result.
-Note that no output file will be generated in this example.
-```
-I 00:00:00.133366 executorch:qnn_executor_runner.cpp:156] Method loaded.
-I 00:00:00.133590 executorch:util.h:104] input already initialized, refilling.
-I 00:00:00.135162 executorch:qnn_executor_runner.cpp:161] Inputs prepared.
-I 00:00:00.136768 executorch:qnn_executor_runner.cpp:278] Model executed successfully.
-[INFO][Qnn ExecuTorch] Destroy Qnn backend parameters
-[INFO][Qnn ExecuTorch] Destroy Qnn context
-[INFO][Qnn ExecuTorch] Destroy Qnn device
-[INFO][Qnn ExecuTorch] Destroy Qnn backend
-```
diff --git a/docs/source/build-run-qualcomm-ai-engine-direct-backend.md b/docs/source/build-run-qualcomm-ai-engine-direct-backend.md
index ff5cb51595c..9623a87bcc4 100644
--- a/docs/source/build-run-qualcomm-ai-engine-direct-backend.md
+++ b/docs/source/build-run-qualcomm-ai-engine-direct-backend.md
@@ -5,6 +5,7 @@ build ExecuTorch for Qualcomm AI Engine Direct and running a model on it.
 
 Qualcomm AI Engine Direct is also referred to as QNN in the source and documentation.
 
+
 <!----This will show a grid card on the page----->
 ::::{grid} 2
 :::{grid-item-card}  What you will learn in this tutorial:
@@ -35,11 +36,10 @@ Currently, this ExecuTorch Backend can delegate AI computations to Hexagon proce
 
 ### Host OS
 
-The Linux host operating system that QNN Backend is verified with is Ubuntu 20.04 LTS x64.
-
-However, because Qualcomm Package Manager(QPM) used to download necessary SDK (see below)
-only support Ubuntu, we recommend users to exercise this tutorial exacly
-on Ubuntu 20.04.
+The Linux host operating system that QNN Backend is verified with is Ubuntu 22.04 LTS x64
+at the moment of updating this tutorial.
+Usually, we verified the backend on the same OS version which QNN is verified with.
+The version is documented in QNN SDK.
 
 ### Hardware:
 You will need an Android smartphone with adb-connected running on one of below Qualcomm SoCs:
@@ -53,20 +53,18 @@ This example is verified with SM8550 and SM8450.
 ### Software:
 
  - Follow ExecuTorch recommended Python version.
- - A compiler to compile AOT parts. GCC 9.4 come with Ubuntu20.04 is verified.
- - [Android NDK](https://developer.android.com/ndk). This example is verified with NDK 25c.
+ - A compiler to compile AOT parts, e.g., the GCC compiler comes with Ubuntu LTS.
+ - [Android NDK](https://developer.android.com/ndk). This example is verified with NDK 26c.
  - [Qualcomm AI Engine Direct SDK](https://developer.qualcomm.com/software/qualcomm-ai-engine-direct-sdk)
-   - Follow the download button. After logging in, search Qualcomm AI Stack at the *Tool* panel.
-   - You can find Qualcomm AI Engine Direct SDK under the AI Stack group.
-   - Please download the Linux version, and follow instructions on the page to extract the file.
-   - The SDK should be installed to somewhere `/opt/qcom/aistack/qnn` by default.
-   - It's also OK to place it somewhere else. We don't have assumption about the absolute path of the SDK.
-   - This example is verified with version 2.12.0.
+   - Click the "Get Software" button to download a version of QNN SDK.
+   - However, at the moment of updating this tutorial, the above website doesn't provide QNN SDK newer than 2.22.6.
+   - The below is public links to download various QNN versions. Hope they can be publicly discoverable soon.
+   - [QNN 2.25.0](https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.25.0.240728.zip)
+   - [QNN 2.24.0](https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.24.0.240626.zip)
+   - [QNN 2.23.0](https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.23.0.24.06.24.zip)
 
 The directory with installed Qualcomm AI Engine Direct SDK looks like:
 ```
-$ tree -L 1 /opt/qcom/aistack/qnn/<version>/
-/opt/qcom/aistack/qnn/<version>/
 ├── benchmarks
 ├── bin
 ├── docs
@@ -74,11 +72,15 @@ $ tree -L 1 /opt/qcom/aistack/qnn/<version>/
 ├── include
 ├── lib
 ├── LICENSE.pdf
+├── NOTICE.txt
+├── NOTICE_WINDOWS.txt
 ├── QNN_NOTICE.txt
 ├── QNN_README.txt
 ├── QNN_ReleaseNotes.txt
-├── share
-└── Uninstall
+├── ReleaseNotes.txt
+├── ReleaseNotesWindows.txt
+├── sdk.yaml
+└── share
 ```
 
 
@@ -89,7 +91,7 @@ $ tree -L 1 /opt/qcom/aistack/qnn/<version>/
 `$QNN_SDK_ROOT` refers to the root of Qualcomm AI Engine Direct SDK,
 i.e., the directory containing `QNN_README.txt`.
 
-`$ANDROID_NDK` refers to the root of Android NDK.
+`$ANDROID_NDK_ROOT` refers to the root of Android NDK.
 
 `$EXECUTORCH_ROOT` refers to the root of executorch git repository.
 
@@ -107,7 +109,16 @@ export PYTHONPATH=$EXECUTORCH_ROOT/..
 
 ## Build
 
-An example script for below building instructions is [here](https://github.com/pytorch/executorch/blob/main/backends/qualcomm/scripts/build.sh).
+An example script for the below building instructions is [here](https://github.com/pytorch/executorch/blob/main/backends/qualcomm/scripts/build.sh).
+We recommend to use the script because the ExecuTorch build-command can change from time to time.
+The above script is actively used. It is updated more frquently than this tutorial.
+An example usage is
+```bash
+cd $EXECUTORCH_ROOT
+./backends/qualcomm/scripts/build.sh
+# or
+./backends/qualcomm/scripts/build.sh --release
+```
 
 ### AOT (Ahead-of-time) components:
 
@@ -115,14 +126,21 @@ Python APIs on x64 are required to compile models to Qualcomm AI Engine Direct b
 
 ```bash
 cd $EXECUTORCH_ROOT
-# Workaround for fbs files in exir/_serialize
-cp schema/program.fbs exir/_serialize/program.fbs
-cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
-
 mkdir build_x86_64
 cd build_x86_64
-cmake .. -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=${QNN_SDK_ROOT}
-cmake --build . -t "PyQnnManagerAdaptor" "PyQnnWrapperAdaptor" -j8
+# Note that the below command might change.
+# Please refer to the above build.sh for latest workable commands.
+cmake .. \
+  -DCMAKE_INSTALL_PREFIX=$PWD \
+  -DEXECUTORCH_BUILD_QNN=ON \
+  -DQNN_SDK_ROOT=${QNN_SDK_ROOT} \
+  -DEXECUTORCH_BUILD_SDK=ON \
+  -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+  -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
+  -DPYTHON_EXECUTABLE=python3 \
+  -DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=OFF
+
+cmake --build $PWD -t "PyQnnManagerAdaptor" "PyQnnWrapperAdaptor" -j8
 
 # install Python APIs to correct import path
 # The filename might vary depending on your Python and host version.
@@ -143,43 +161,50 @@ cd build_android
 # build executorch & qnn_executorch_backend
 cmake .. \
     -DCMAKE_INSTALL_PREFIX=$PWD \
-    -DEXECUTORCH_BUILD_SDK=ON \
     -DEXECUTORCH_BUILD_QNN=ON \
-    -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
     -DQNN_SDK_ROOT=$QNN_SDK_ROOT \
-    -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
+    -DEXECUTORCH_BUILD_SDK=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+    -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
+    -DPYTHON_EXECUTABLE=python3 \
+    -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \
     -DANDROID_ABI='arm64-v8a' \
-    -DANDROID_NATIVE_API_LEVEL=23 \
-    -B$PWD
+    -DANDROID_NATIVE_API_LEVEL=23
 
 cmake --build $PWD -j16 --target install
 
 cmake ../examples/qualcomm \
-    -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
+    -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \
     -DANDROID_ABI='arm64-v8a' \
     -DANDROID_NATIVE_API_LEVEL=23 \
     -DCMAKE_PREFIX_PATH="$PWD/lib/cmake/ExecuTorch;$PWD/third-party/gflags;" \
     -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \
+    -DPYTHON_EXECUTABLE=python3 \
     -Bexamples/qualcomm
 
 cmake --build examples/qualcomm -j16
+
+# qnn_executor_runner can be found under examples/qualcomm
+# The full path is $EXECUTORCH_ROOT/build_android/examples/qualcomm/qnn_executor_runner
+ls examples/qualcomm
 ```
 
 **Note:** If you want to build for release, add `-DCMAKE_BUILD_TYPE=Release` to the `cmake` command options.
 
-You can find `qnn_executor_runner` under `build_android/examples/qualcomm/`.
-
-The build script is also available [here](https://github.com/pytorch/executorch/blob/main/backends/qualcomm/scripts/build.sh).
 
 ## Deploying and running on device
 
 ### AOT compile a model
 
-You can refer to [this script](https://github.com/pytorch/executorch/blob/main/examples/qualcomm/scripts/deeplab_v3.py) for the exact flow.
+Refer to [this script](https://github.com/pytorch/executorch/blob/main/examples/qualcomm/scripts/deeplab_v3.py) for the exact flow.
 We use deeplab-v3-resnet101 as an example in this tutorial. Run below commands to compile:
 
-```
+```bash
 cd $EXECUTORCH_ROOT
+# Workaround for fbs files in exir/_serialize
+cp schema/program.fbs exir/_serialize/program.fbs
+cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
+
 python -m examples.qualcomm.scripts.deeplab_v3 -b build_android -m SM8550 --compile_only --download
 ```
 
@@ -212,11 +237,13 @@ The compiled model is `./deeplab_v3/dlv3_qnn.pte`.
 DEVICE_DIR=/data/local/tmp/executorch_qualcomm_tutorial/
 adb shell "mkdir -p ${DEVICE_DIR}"
 adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtp.so ${DEVICE_DIR}
+adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnSystem.so ${DEVICE_DIR}
 adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV69Stub.so ${DEVICE_DIR}
 adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV73Stub.so ${DEVICE_DIR}
-adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnSystem.so ${DEVICE_DIR}
+adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV75Stub.so ${DEVICE_DIR}
 adb push ${QNN_SDK_ROOT}/lib/hexagon-v69/unsigned/libQnnHtpV69Skel.so ${DEVICE_DIR}
 adb push ${QNN_SDK_ROOT}/lib/hexagon-v73/unsigned/libQnnHtpV73Skel.so ${DEVICE_DIR}
+adb push ${QNN_SDK_ROOT}/lib/hexagon-v75/unsigned/libQnnHtpV75Skel.so ${DEVICE_DIR}
 ```
 
 ***Step 2***.  We also need to indicate dynamic linkers on Android and Hexagon
@@ -236,12 +263,28 @@ adb shell "cd ${DEVICE_DIR} \
 You should see something like below:
 
 ```
-I 00:00:01.835706 executorch:qnn_executor_runner.cpp:298] 100 inference took 1096.626000 ms, avg 10.966260 ms
-[INFO][Qnn ExecuTorch] Destroy Qnn backend parameters
-[INFO][Qnn ExecuTorch] Destroy Qnn context
-[INFO][Qnn ExecuTorch] Destroy Qnn device
-[INFO][Qnn ExecuTorch] Destroy Qnn backend
+I 00:00:00.257354 executorch:qnn_executor_runner.cpp:213] Method loaded.
+I 00:00:00.323502 executorch:qnn_executor_runner.cpp:262] ignoring error from set_output_data_ptr(): 0x2
+I 00:00:00.357496 executorch:qnn_executor_runner.cpp:262] ignoring error from set_output_data_ptr(): 0x2
+I 00:00:00.357555 executorch:qnn_executor_runner.cpp:265] Inputs prepared.
+I 00:00:00.364824 executorch:qnn_executor_runner.cpp:414] Model executed successfully.
+I 00:00:00.364875 executorch:qnn_executor_runner.cpp:425] Write etdump to etdump.etdp, Size = 424
+[INFO] [Qnn ExecuTorch]: Destroy Qnn backend parameters
+[INFO] [Qnn ExecuTorch]: Destroy Qnn context
+[INFO] [Qnn ExecuTorch]: Destroy Qnn backend
+```
+
+The model is merely executed. If we want to feed real inputs and get model outputs, we can use
+```bash
+cd
+python -m examples.qualcomm.scripts.deeplab_v3 -b build_android -m SM8550 --download -s <device_serial>
 ```
+The `<device_serial>` can be found by `adb devices` command.
+
+After the above command, pre-processed inputs and outputs are put in `$EXECUTORCH_ROOT/deeplab_v3` and `$EXECUTORCH_ROOT/deeplab_v3/outputs` folder.
+
+The command-line arguents are written in [utils.py](https://github.com/pytorch/executorch/blob/main/examples/qualcomm/scripts/utils.py#L127).
+The model, inputs, and output location are passed to `qnn_executorch_runner` by `--model_path`, `--input_list_path`, and `--output_folder_path`.
 
 
 ### Running a model via ExecuTorch's android demo-app
@@ -252,8 +295,8 @@ An Android demo-app using Qualcomm AI Engine Direct Backend can be found in
 
 ## What is coming?
 
- - [An example using quantized mobilebert](https://github.com/pytorch/executorch/pull/1043) to solve multi-class text classification.
- - More Qualcomm AI Engine Direct accelerators, e.g., GPU.
+ - [llama2 and llama3](https://github.com/pytorch/executorch/pull/4030). Note that at the moment of writing, we still suffer from the quantization issue in llama2-7B and llama3-8B cases. Only storiesllama works well.
+ - We will support pre-compiled binaries from [Qualcomm AI Hub](https://aihub.qualcomm.com/).
 
 ## FAQ
 
diff --git a/examples/qualcomm/README.md b/examples/qualcomm/README.md
index d41ad80ecfb..e2ae7406cd0 100644
--- a/examples/qualcomm/README.md
+++ b/examples/qualcomm/README.md
@@ -10,13 +10,13 @@ Here are some general information and limitations.
 
 Please finish tutorial [Setting up executorch](https://pytorch.org/executorch/stable/getting-started-setup).
 
-Please finish [setup QNN backend](../../backends/qualcomm/setup.md).
+Please finish [setup QNN backend](../../docs/source/build-run-qualcomm-ai-engine-direct-backend.md).
 
 ## Environment
 
 Please set up `QNN_SDK_ROOT` environment variable.
 Note that this version should be exactly same as building QNN backend.
-Please check [setup](../../backends/qualcomm/setup.md).
+Please check [setup](../../docs/source/build-run-qualcomm-ai-engine-direct-backend.md).
 
 Please set up `LD_LIBRARY_PATH` to `$QNN_SDK_ROOT/lib/x86_64-linux-clang`.
 Or, you could put QNN libraries to default search path of the dynamic linker.

From 0d1443eed11faedc4bd4ebd8df04550bc62fa7c3 Mon Sep 17 00:00:00 2001
From: chiwwang <quic_chiwwang@quicinc.com>
Date: Thu, 8 Aug 2024 16:17:23 +0800
Subject: [PATCH 2/4] Address feedback. Add emulator section. Add supported
 model list

---
 backends/qualcomm/scripts/build.sh            |  4 +-
 backends/qualcomm/setup.md                    |  1 -
 backends/qualcomm/tests/utils.py              |  2 +-
 ...d-run-qualcomm-ai-engine-direct-backend.md | 77 ++++++++++++++++---
 examples/qualcomm/README.md                   |  4 +-
 examples/qualcomm/llama2/README.md            |  4 +-
 examples/qualcomm/scripts/utils.py            |  2 +-
 7 files changed, 76 insertions(+), 18 deletions(-)

diff --git a/backends/qualcomm/scripts/build.sh b/backends/qualcomm/scripts/build.sh
index 9576da8dea1..fc862f56c9f 100755
--- a/backends/qualcomm/scripts/build.sh
+++ b/backends/qualcomm/scripts/build.sh
@@ -25,9 +25,9 @@ usage() {
 [ "$1" = -h ] && usage
 
 BUILD_X86_64="true"
-CMAKE_X86_64="build_x86_64"
+CMAKE_X86_64="cmake-out"
 BUILD_AARCH64="true"
-CMAKE_AARCH64="build_android"
+CMAKE_AARCH64="cmake-out-android"
 CLEAN="true"
 BUILD_TYPE="Debug"
 
diff --git a/backends/qualcomm/setup.md b/backends/qualcomm/setup.md
index b3bac83771d..37d8e04c210 100644
--- a/backends/qualcomm/setup.md
+++ b/backends/qualcomm/setup.md
@@ -5,4 +5,3 @@ Please refer to [Building and Running ExecuTorch with Qualcomm AI Engine Direct
 That is a tutorial for building and running Qualcomm AI Engine Direct backend,
 including compiling a model on a x64 host and running the inference
 on a Android device.
-
diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py
index ef0ac0f202f..b6e7d3540ca 100644
--- a/backends/qualcomm/tests/utils.py
+++ b/backends/qualcomm/tests/utils.py
@@ -231,7 +231,7 @@ def validate_profile():
                 qnn_sdk = os.environ.get("QNN_SDK_ROOT", None)
                 assert qnn_sdk, "QNN_SDK_ROOT was not found in environment variable"
 
-                build_path = "build_x86_64"
+                build_path = "cmake-out"
                 cmds = [
                     # export LD_LIBRARY_PATH to QNN_SDK_ROOT
                     f"export LD_LIBRARY_PATH={qnn_sdk}/lib/{target}/:{self.executorch_root}/{build_path}/lib && "
diff --git a/docs/source/build-run-qualcomm-ai-engine-direct-backend.md b/docs/source/build-run-qualcomm-ai-engine-direct-backend.md
index 9623a87bcc4..3d0a67153e3 100644
--- a/docs/source/build-run-qualcomm-ai-engine-direct-backend.md
+++ b/docs/source/build-run-qualcomm-ai-engine-direct-backend.md
@@ -126,8 +126,8 @@ Python APIs on x64 are required to compile models to Qualcomm AI Engine Direct b
 
 ```bash
 cd $EXECUTORCH_ROOT
-mkdir build_x86_64
-cd build_x86_64
+mkdir cmake-out
+cd cmake-out
 # Note that the below command might change.
 # Please refer to the above build.sh for latest workable commands.
 cmake .. \
@@ -140,7 +140,9 @@ cmake .. \
   -DPYTHON_EXECUTABLE=python3 \
   -DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=OFF
 
-cmake --build $PWD -t "PyQnnManagerAdaptor" "PyQnnWrapperAdaptor" -j8
+# nproc is used to detect the number of available CPU.
+# If it is not applicable, please feel free to use the number you want.
+cmake --build $PWD --target "PyQnnManagerAdaptor" "PyQnnWrapperAdaptor" -j$(nproc)
 
 # install Python APIs to correct import path
 # The filename might vary depending on your Python and host version.
@@ -156,8 +158,8 @@ Commands to build `qnn_executor_runner` for Android:
 
 ```bash
 cd $EXECUTORCH_ROOT
-mkdir build_android
-cd build_android
+mkdir cmake-out-android
+cd cmake-out-android
 # build executorch & qnn_executorch_backend
 cmake .. \
     -DCMAKE_INSTALL_PREFIX=$PWD \
@@ -171,7 +173,9 @@ cmake .. \
     -DANDROID_ABI='arm64-v8a' \
     -DANDROID_NATIVE_API_LEVEL=23
 
-cmake --build $PWD -j16 --target install
+# nproc is used to detect the number of available CPU.
+# If it is not applicable, please feel free to use the number you want.
+cmake --build $PWD --target install -j$(nproc)
 
 cmake ../examples/qualcomm \
     -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \
@@ -182,10 +186,10 @@ cmake ../examples/qualcomm \
     -DPYTHON_EXECUTABLE=python3 \
     -Bexamples/qualcomm
 
-cmake --build examples/qualcomm -j16
+cmake --build examples/qualcomm -j$(nproc)
 
 # qnn_executor_runner can be found under examples/qualcomm
-# The full path is $EXECUTORCH_ROOT/build_android/examples/qualcomm/qnn_executor_runner
+# The full path is $EXECUTORCH_ROOT/cmake-out-android/examples/qualcomm/qnn_executor_runner
 ls examples/qualcomm
 ```
 
@@ -228,6 +232,58 @@ output         output                    output                       ([getitem_
 The compiled model is `./deeplab_v3/dlv3_qnn.pte`.
 
 
+### Test model inference on QNN HTP emulator
+
+We can test model inferences before deploying it to a device by HTP emulator.
+
+Let's build `qnn_executor_runner` for a x64 host:
+```bash
+# assuming the AOT component is built.
+cd $EXECUTORCH_ROOT/cmake-out
+cmake ../examples/qualcomm \
+  -DCMAKE_PREFIX_PATH="$PWD/lib/cmake/ExecuTorch;$PWD/third-party/gflags;" \
+  -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \
+  -DPYTHON_EXECUTABLE=python3 \
+  -Bexamples/qualcomm
+
+cmake --build examples/qualcomm -j$(nproc)
+
+# qnn_executor_runner can be found under examples/qualcomm
+# The full path is $EXECUTORCH_ROOT/cmake-out/examples/qualcomm/qnn_executor_runner
+ls examples/qualcomm/
+```
+
+To run the HTP emulator, the dynamic linker need to access QNN libraries and `libqnn_executorch_backend.so`.
+We set the below two paths to `LD_LIBRARY_PATH` environment variable:
+  1. `$QNN_SDK_ROOT/lib/x86_64-linux-clang/`
+  2. `$EXECUTORCH_ROOT/cmake-out/lib/`
+
+The first path is for QNN libraries including HTP emulator. It has been configured in the AOT compilation section.
+
+The second path is for `libqnn_executorch_backend.so`.
+
+So, we can run `./deeplab_v3/dlv3_qnn.pte` by:
+```bash
+cd $EXECUTORCH_ROOT/cmake-out
+export LD_LIBRARY_PATH=$EXECUTORCH_ROOT/cmake-out/lib/:$LD_LIBRARY_PATH
+examples/qualcomm/qnn_executor_runner --model_path ../deeplab_v3/dlv3_qnn.pte
+```
+
+We should see some outputs like the below. Note that the emulator can take some time to finish.
+```bash
+I 00:00:00.354662 executorch:qnn_executor_runner.cpp:213] Method loaded.
+I 00:00:00.356460 executorch:qnn_executor_runner.cpp:261] ignoring error from set_output_data_ptr(): 0x2
+I 00:00:00.357991 executorch:qnn_executor_runner.cpp:261] ignoring error from set_output_data_ptr(): 0x2
+I 00:00:00.357996 executorch:qnn_executor_runner.cpp:265] Inputs prepared.
+
+I 00:01:09.328144 executorch:qnn_executor_runner.cpp:414] Model executed successfully.
+I 00:01:09.328159 executorch:qnn_executor_runner.cpp:421] Write etdump to etdump.etdp, Size = 424
+[INFO] [Qnn ExecuTorch]: Destroy Qnn backend parameters
+[INFO] [Qnn ExecuTorch]: Destroy Qnn context
+[INFO] [Qnn ExecuTorch]: Destroy Qnn device
+[INFO] [Qnn ExecuTorch]: Destroy Qnn backend
+```
+
 ### Run model inference on an Android smartphone with Qualcomm SoCs
 
 ***Step 1***. We need to push required QNN libraries to the device.
@@ -276,7 +332,7 @@ I 00:00:00.364875 executorch:qnn_executor_runner.cpp:425] Write etdump to etdump
 
 The model is merely executed. If we want to feed real inputs and get model outputs, we can use
 ```bash
-cd
+cd $EXECUTORCH_ROOT
 python -m examples.qualcomm.scripts.deeplab_v3 -b build_android -m SM8550 --download -s <device_serial>
 ```
 The `<device_serial>` can be found by `adb devices` command.
@@ -292,6 +348,9 @@ The model, inputs, and output location are passed to `qnn_executorch_runner` by
 An Android demo-app using Qualcomm AI Engine Direct Backend can be found in
 `examples`. Please refer to android demo app [tutorial](https://pytorch.org/executorch/stable/demo-apps-android.html).
 
+## Supported model list
+
+Please refer to `$EXECUTORCH_ROOT/examples/qualcomm/scripts/` and `EXECUTORCH_ROOT/examples/qualcomm/oss_scripts/` to the list of supported models.
 
 ## What is coming?
 
diff --git a/examples/qualcomm/README.md b/examples/qualcomm/README.md
index e2ae7406cd0..dd5c614fd75 100644
--- a/examples/qualcomm/README.md
+++ b/examples/qualcomm/README.md
@@ -39,12 +39,12 @@ cd $EXECUTORCH_ROOT/examples/qualcomm/scripts
 
 #### For MobileNet_v2
 ```bash
-python mobilenet_v2.py -s <device_serial> -m "SM8550" -b path/to/build_android/ -d /path/to/imagenet-mini/val
+python mobilenet_v2.py -s <device_serial> -m "SM8550" -b path/to/cmake-out-android/ -d /path/to/imagenet-mini/val
 ```
 
 #### For DeepLab_v3
 ```bash
-python deeplab_v3.py -s <device_serial> -m "SM8550" -b path/to/build_android/ --download
+python deeplab_v3.py -s <device_serial> -m "SM8550" -b path/to/cmake-out-android/ --download
 ```
 
 ## Additional Dependency
diff --git a/examples/qualcomm/llama2/README.md b/examples/qualcomm/llama2/README.md
index 4670f742514..2186d98745d 100644
--- a/examples/qualcomm/llama2/README.md
+++ b/examples/qualcomm/llama2/README.md
@@ -34,7 +34,7 @@ echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps":
 Default example generates the story based on the given prompt, "Once".
 ```bash
 # 16a4w quant:
-python examples/qualcomm/llama2/llama.py -a ${ARTIFACTS} -b build_android -s ${SERIAL_NUM} -m ${SOC_MODEL} --ptq 16a4w --checkpoint stories110M --params params.json --tokenizer_model tokenizer.model --tokenizer_bin tokenizer.bin --prompt "Once"
+python examples/qualcomm/llama2/llama.py -a ${ARTIFACTS} -b cmake-out-android -s ${SERIAL_NUM} -m ${SOC_MODEL} --ptq 16a4w --checkpoint stories110M --params params.json --tokenizer_model tokenizer.model --tokenizer_bin tokenizer.bin --prompt "Once"
 ```
 
 #### (Note) Customized PTQ data set
@@ -63,5 +63,5 @@ python -m examples.models.llama2.tokenizer.tokenizer -t tokenizer.model -o token
 #### Step3: Run default examples
 ```bash
 # AIHUB_CONTEXT_BINARIES: ${PATH_TO_AIHUB_WORKSPACE}/build/llama_v2_7b_chat_quantized
-python examples/qualcomm/llama2/llama_qaihub.py -a ${ARTIFACTS} -b build_android -s ${SERIAL_NUM} -m ${SOC_MODEL} --context_binaries ${AIHUB_CONTEXT_BINARIES} --tokenizer_bin tokenizer.bin --prompt "What is Python?"
+python examples/qualcomm/llama2/llama_qaihub.py -a ${ARTIFACTS} -b cmake-out-android -s ${SERIAL_NUM} -m ${SOC_MODEL} --context_binaries ${AIHUB_CONTEXT_BINARIES} --tokenizer_bin tokenizer.bin --prompt "What is Python?"
 ```
diff --git a/examples/qualcomm/scripts/utils.py b/examples/qualcomm/scripts/utils.py
index 8211dc45810..9c33cf4445b 100755
--- a/examples/qualcomm/scripts/utils.py
+++ b/examples/qualcomm/scripts/utils.py
@@ -336,7 +336,7 @@ def setup_common_args_and_variables():
     parser.add_argument(
         "-b",
         "--build_folder",
-        help="path to cmake binary directory for android, e.g., /path/to/build_android",
+        help="path to cmake binary directory for android, e.g., /path/to/cmake-out-android",
         type=str,
         required=True,
     )

From c899736758d5c0f19f0027d78128fc9cc69665dd Mon Sep 17 00:00:00 2001
From: chiwwang <quic_chiwwang@quicinc.com>
Date: Fri, 9 Aug 2024 11:49:04 +0800
Subject: [PATCH 3/4] Refine build_folder and subprocess.run() in x64 UT

---
 backends/qualcomm/tests/utils.py | 44 ++++++++++++++++++++++----------
 1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py
index b6e7d3540ca..d069a4506b0 100644
--- a/backends/qualcomm/tests/utils.py
+++ b/backends/qualcomm/tests/utils.py
@@ -231,25 +231,43 @@ def validate_profile():
                 qnn_sdk = os.environ.get("QNN_SDK_ROOT", None)
                 assert qnn_sdk, "QNN_SDK_ROOT was not found in environment variable"
 
-                build_path = "cmake-out"
-                cmds = [
-                    # export LD_LIBRARY_PATH to QNN_SDK_ROOT
-                    f"export LD_LIBRARY_PATH={qnn_sdk}/lib/{target}/:{self.executorch_root}/{build_path}/lib && "
+                build_folder = self.build_folder
+                if os.path.isabs(self.build_folder):
+                    # obey user's opinion
+                    pass
+                else:
+                    # ok, assuming the user give a relative path to cwd
+                    build_folder = os.path.join(os.getcwd(), self.build_folder)
+
+                cmd = [
                     # qnn_executor_runner
-                    f"{self.executorch_root}/{build_path}/examples/qualcomm/qnn_executor_runner",
-                    f"--model_path {pte_fname}",
-                    f"--input_list_path {tmp_dir}/input_list.txt",
-                    f"--output_folder_path {output_dir}",
+                    f"{build_folder}/examples/qualcomm/qnn_executor_runner",
+                    "--model_path",
+                    f"{pte_fname}",
+                    "--input_list_path",
+                    f"{tmp_dir}/input_list.txt",
+                    "--output_folder_path",
+                    f"{output_dir}",
                 ]
 
-                subprocess.run(
-                    " ".join(cmds),
-                    shell=True,
-                    executable="/bin/bash",
-                    capture_output=True,
+                env = dict(os.environ)
+                env["LD_LIBRARY_PATH"] = f"{qnn_sdk}/lib/{target}/:{build_folder}/lib"
+                proc = subprocess.run(
+                    cmd,
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.STDOUT,
+                    env=env,
                     cwd=tmp_dir,
                 )
 
+                self.assertEqual(
+                    proc.returncode,
+                    0,
+                    f"The process running qnn_executorch_runner reutrn {proc.returncode}, "
+                    "STDOUT=\n"
+                    f"{proc.stdout.decode('utf-8')}",
+                )
+
                 # Verify the outputs
                 post_process()
                 self._assert_outputs_equal(outputs, ref_outputs)

From dbb452f8f0a383bc6c918742ac168f604ff03419 Mon Sep 17 00:00:00 2001
From: chiwwang <quic_chiwwang@quicinc.com>
Date: Fri, 9 Aug 2024 13:11:11 +0800
Subject: [PATCH 4/4] oops, fix typo

---
 backends/qualcomm/tests/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py
index d069a4506b0..7a7c6ee3d93 100644
--- a/backends/qualcomm/tests/utils.py
+++ b/backends/qualcomm/tests/utils.py
@@ -263,7 +263,7 @@ def validate_profile():
                 self.assertEqual(
                     proc.returncode,
                     0,
-                    f"The process running qnn_executorch_runner reutrn {proc.returncode}, "
+                    f"The process running qnn_executorch_runner return {proc.returncode}, "
                     "STDOUT=\n"
                     f"{proc.stdout.decode('utf-8')}",
                 )