Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions backends/qualcomm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,11 @@ backends/qualcomm
├── tests # Unit tests and model tests go here.
└── utils # Miscellaneous utilities.

examples
└── qualcomm # Examples to run QNN backends.
examples/qualcomm
├── executor_runner # A general runner that is capable of running most of the basic models.
├── oss_scripts # Scripts for OSS(Open Source Software) models and customized runner for some specific models.
├── qaihub_scripts # Scripts for Qaihub models and corresponding customized runner for these models.
└── scripts # Scripts for models provided by executorch.
```

## Examples
Expand Down
58 changes: 56 additions & 2 deletions backends/qualcomm/tests/test_qnn_delegate.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
generate_qnn_executorch_compiler_spec,
)

from executorch.examples.qualcomm.scripts.utils import setup_common_args_and_variables
from executorch.examples.qualcomm.utils import setup_common_args_and_variables

from executorch.backends.qualcomm.tests.models import * # noqa: F403

Expand Down Expand Up @@ -1803,6 +1803,60 @@ def test_squeezenet(self):
self.assertGreaterEqual(msg["top_5"], 70)


class TestExampleQaihubScript(TestQNN):

def required_envs(self, conditions=None) -> bool:
conditions = [] if conditions is None else conditions
return all(
[
self.executorch_root,
self.artifact_dir,
*conditions,
]
)

def test_llama2_7b(self):
if not self.required_envs():
self.skipTest("missing required envs")

prompt = "Explain the rules of baseball"
cmds = [
"python",
f"{self.executorch_root}/examples/qualcomm/qaihub_scripts/llama2/qaihub_llama2_7b.py",
"--artifact",
self.artifact_dir,
"--build_folder",
self.build_folder,
"--device",
self.device,
"--model",
self.model,
"--tokenizer_bin",
f"{self.artifact_dir}/tokenizer.bin",
"--context_binaries",
f"{self.artifact_dir}",
"--ip",
self.ip,
"--port",
str(self.port),
"--prompt",
f"{prompt}",
]
if self.host:
cmds.extend(["--host", self.host])

p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
with Listener((self.ip, self.port)) as listener:
conn = listener.accept()
p.communicate()
msg = json.loads(conn.recv())
if "Error" in msg:
self.fail(msg["Error"])
else:
model_out = msg["result"]
self.assertTrue(model_out.startswith(prompt))


class TestExampleScript(TestQNN):
def required_envs(self, conditions=None) -> bool:
conditions = [] if conditions is None else conditions
Expand Down Expand Up @@ -2085,7 +2139,7 @@ def test_stories_single_llama(self):

cmds = [
"python",
f"{self.executorch_root}/examples/qualcomm/llama2/llama.py",
f"{self.executorch_root}/examples/qualcomm/oss_scripts/llama2/llama.py",
"--artifact",
self.artifact_dir,
"--build_folder",
Expand Down
4 changes: 2 additions & 2 deletions backends/qualcomm/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
QcomChipset,
)
from executorch.backends.qualcomm.utils.utils import capture_program
from executorch.examples.qualcomm.scripts.utils import (
from executorch.examples.qualcomm.utils import (
generate_inputs,
make_output_dir,
SimpleADB,
Expand Down Expand Up @@ -241,7 +241,7 @@ def validate_profile():

cmd = [
# qnn_executor_runner
f"{build_folder}/examples/qualcomm/qnn_executor_runner",
f"{build_folder}/examples/qualcomm/executor_runner/qnn_executor_runner",
"--model_path",
f"{pte_fname}",
"--input_list_path",
Expand Down
8 changes: 4 additions & 4 deletions docs/source/build-run-qualcomm-ai-engine-direct-backend.md
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ cd $EXECUTORCH_ROOT
cp schema/program.fbs exir/_serialize/program.fbs
cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs

python -m examples.qualcomm.scripts.deeplab_v3 -b build_android -m SM8550 --compile_only --download
python -m examples.qualcomm.scripts.deeplab_v3 -b cmake-out-android -m SM8550 --compile_only --download
```

You might see something like below:
Expand Down Expand Up @@ -308,8 +308,8 @@ So, we can run `qnn_executor_runner` like

```bash
adb push ./deeplab_v3/dlv3_qnn.pte ${DEVICE_DIR}
adb push ${EXECUTORCH_ROOT}/build_android/examples/qualcomm/qnn_executor_runner ${DEVICE_DIR}
adb push ${EXECUTORCH_ROOT}/build_android/lib/libqnn_executorch_backend.so ${DEVICE_DIR}
adb push ${EXECUTORCH_ROOT}/cmake-out-android/examples/qualcomm/executor_runner/qnn_executor_runner ${DEVICE_DIR}
adb push ${EXECUTORCH_ROOT}/cmake-out-android/lib/libqnn_executorch_backend.so ${DEVICE_DIR}
adb shell "cd ${DEVICE_DIR} \
&& export LD_LIBRARY_PATH=${DEVICE_DIR} \
&& export ADSP_LIBRARY_PATH=${DEVICE_DIR} \
Expand All @@ -333,7 +333,7 @@ I 00:00:00.364875 executorch:qnn_executor_runner.cpp:425] Write etdump to etdump
The model is merely executed. If we want to feed real inputs and get model outputs, we can use
```bash
cd $EXECUTORCH_ROOT
python -m examples.qualcomm.scripts.deeplab_v3 -b build_android -m SM8550 --download -s <device_serial>
python -m examples.qualcomm.scripts.deeplab_v3 -b cmake-out-android -m SM8550 --download -s <device_serial>
```
The `<device_serial>` can be found by `adb devices` command.

Expand Down
2 changes: 1 addition & 1 deletion examples/demo-apps/android/ExecuTorchDemo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ For delegating to Qualcomm Hexagon NPU, please follow the tutorial [here](build-
After generating the model, copy the model to `assets` directory.

```bash
python -m examples.qualcomm.scripts.deeplab_v3 -b build_android -m SM8450 -s <adb_connected_device_serial>
python -m examples.qualcomm.scripts.deeplab_v3 -b cmake-out-android -m SM8450 -s <adb_connected_device_serial>
cp deeplab_v3/dlv3_qnn.pte examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/
```

Expand Down
74 changes: 10 additions & 64 deletions examples/qualcomm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,7 @@ include(${EXECUTORCH_SRCS_FILE})
get_filename_component(
EXECUTORCH_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../.." ABSOLUTE
)
set(_qnn_executor_runner__srcs ${_executor_runner__srcs})
set(_qnn_llama_runner__srcs ${_llama_runner__srcs})
set(_qnn_qaihub_llama_runner__srcs ${_llama_runner__srcs})


# portable_ops_lib
gen_selected_ops(LIB_NAME "full_portable_ops_lib" INCLUDE_ALL_OPS "ON")
Expand All @@ -69,69 +67,17 @@ target_include_directories(
full_portable_ops_lib PUBLIC ${_common_include_directories}
)

# preprocess executor runner src files
list(TRANSFORM _qnn_executor_runner__srcs PREPEND "${EXECUTORCH_SOURCE_DIR}/")
list(FILTER _qnn_executor_runner__srcs EXCLUDE REGEX ".*executor_runner.cpp$")
list(PREPEND _qnn_executor_runner__srcs
${CMAKE_CURRENT_LIST_DIR}/executor_runner/qnn_executor_runner.cpp
)
# preprocess llama runner src files
list(TRANSFORM _qnn_llama_runner__srcs PREPEND "${EXECUTORCH_SOURCE_DIR}/")
list(FILTER _qnn_llama_runner__srcs EXCLUDE REGEX ".*(/runner/).*")
message(ERROR ${_qnn_llama_runner__srcs})
list(PREPEND _qnn_llama_runner__srcs
${CMAKE_CURRENT_LIST_DIR}/executor_runner/qnn_llama_runner.cpp
${CMAKE_CURRENT_LIST_DIR}/llama2/runner/runner.cpp
${CMAKE_CURRENT_LIST_DIR}/llama2/runner/runner.h
)
# preprocess qaihub llama runner src files
list(TRANSFORM _qnn_qaihub_llama_runner__srcs PREPEND "${EXECUTORCH_SOURCE_DIR}/")
list(FILTER _qnn_qaihub_llama_runner__srcs EXCLUDE REGEX ".*(/runner/).*")
list(PREPEND _qnn_qaihub_llama_runner__srcs
${CMAKE_CURRENT_LIST_DIR}/executor_runner/qnn_qaihub_llama_runner.cpp
${CMAKE_CURRENT_LIST_DIR}/llama2/qaihub_runner/runner.cpp
${CMAKE_CURRENT_LIST_DIR}/llama2/qaihub_runner/runner.h
${CMAKE_CURRENT_LIST_DIR}/llama2/qaihub_runner/io_memory.cpp
${CMAKE_CURRENT_LIST_DIR}/llama2/qaihub_runner/io_memory.h
# build qnn_executor_runner
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/executor_runner
)

# build executor runner
add_executable(qnn_executor_runner ${_qnn_executor_runner__srcs})
target_include_directories(
qnn_executor_runner PUBLIC ${_common_include_directories}
)
target_link_libraries(
qnn_executor_runner qnn_executorch_backend full_portable_ops_lib etdump
${FLATCCRT_LIB} gflags
# build qnn_llama_runner
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/oss_scripts/llama2
)

# build llama runner
add_executable(qnn_llama_runner ${_qnn_llama_runner__srcs})
target_include_directories(
qnn_llama_runner PUBLIC ${_common_include_directories}
)
target_link_libraries(qnn_llama_runner
qnn_executorch_backend
full_portable_ops_lib
extension_data_loader
extension_module
gflags
)
target_compile_options(qnn_llama_runner
PUBLIC ${_common_compile_options}
)
# build qaihub llama runner
add_executable(qnn_qaihub_llama_runner ${_qnn_qaihub_llama_runner__srcs})
target_include_directories(qnn_qaihub_llama_runner
PUBLIC ${_common_include_directories}
)
target_link_libraries(qnn_qaihub_llama_runner
qnn_executorch_backend
executorch_no_prim_ops
extension_data_loader
extension_module
gflags
)
target_compile_options(qnn_qaihub_llama_runner
PUBLIC ${_common_compile_options}
# build qaihub_llama2_7b_runner
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/qaihub_scripts/llama2
)
25 changes: 25 additions & 0 deletions examples/qualcomm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,20 @@

This directory contains examples for some AI models.

We have seperated the example scripts into the following subfolders, please refer to [README.md](../../backends/qualcomm/README.md) for the example scripts' directory structure:

1. executor_runner: This folder contains a general executor runner capable of running most of the models. As a rule of thumb, if a model does not have its own customized runner, execute the model using [executor_runner](./executor_runner/qnn_executor_runner.cpp). On the other hand, if a model has its own runner, such as [llama2](./oss_scripts/llama2/qnn_llama_runner.cpp), use the customized runner to execute the model. Customized runner should be located under the same folder as the model's python script.

2. oss_scripts: OSS stands for Open Source Software. This folder contains python scripts for open source models. Some models under this folder might also have their own customized runner.
For example, [llama2](./oss_scripts/llama2/qnn_llama_runner.cpp) contains not only the python scripts to prepare the model but also a customized runner for executing the model.

3. qaihub_scripts: QAIHub stands for [Qualcomm AI Hub](https://aihub.qualcomm.com/). On QAIHub, users can find pre-compiled context binaries, a format used by QNN to save its models. This provides users with a new option for model deployment. Different from oss_scripts & scripts, which the example scripts are converting a model from nn.Module to ExecuTorch .pte files, qaihub_scripts provides example scripts for converting pre-compiled context binaries to ExecuTorch .pte files. Additionaly, users can find customized example runners specific to the QAIHub models for execution. For example [qaihub_llama2_7b](./qaihub_scripts/llama2/qaihub_llama2_7b.py) is a script converting context binaries to ExecuTorch .pte files, and [qaihub_llama2_7b_runner](./qaihub_scripts/llama2/qaihub_llama2_7b_runner.cpp) is a customized example runner to execute llama2 .pte files. Please be aware that context-binaries downloaded from QAIHub are tied to a specific QNN SDK version.
Before executing the scripts and runner, please ensure that you are using the QNN SDK version that is matching the context binary. Tutorial below will also cover how to check the QNN Version for a context binary.

4. scripts: This folder contains scripts to build models provided by executorch.



Please check helper of each examples for detailed arguments.

Here are some general information and limitations.
Expand Down Expand Up @@ -47,6 +61,17 @@ python mobilenet_v2.py -s <device_serial> -m "SM8550" -b path/to/cmake-out-andro
python deeplab_v3.py -s <device_serial> -m "SM8550" -b path/to/cmake-out-android/ --download
```

#### Check context binary version
```bash
cd ${QNN_SDK_ROOT}/bin/x86_64-linux-clang
./qnn-context-binary-utility --context_binary ${PATH_TO_CONTEXT_BINARY} --json_file ${OUTPUT_JSON_NAME}
```
After retreiving the json file, search in the json file for the field "buildId" and ensure it matches the ${QNN_SDK_ROOT} you are using for the environment variable.
If you run into the following error, that means the ${QNN_SDK_ROOT} that you are using is older than the context binary QNN SDK version. In this case, please download a newer QNN SDK version.
```
Error: Failed to get context binary info.
```

## Additional Dependency

The mobilebert multi-class text classification example requires `pandas` and `sklearn`.
Expand Down
22 changes: 22 additions & 0 deletions examples/qualcomm/executor_runner/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set(_qnn_executor_runner__srcs ${_executor_runner__srcs})

# preprocess executor runner src files
list(TRANSFORM _qnn_executor_runner__srcs PREPEND "${EXECUTORCH_SOURCE_DIR}/")
list(FILTER _qnn_executor_runner__srcs EXCLUDE REGEX ".*executor_runner.cpp$")
list(PREPEND _qnn_executor_runner__srcs ${CMAKE_CURRENT_LIST_DIR}/qnn_executor_runner.cpp)

# build executor runner
add_executable(qnn_executor_runner ${_qnn_executor_runner__srcs})
target_include_directories(
qnn_executor_runner PUBLIC ${_common_include_directories}
)
target_link_libraries(
qnn_executor_runner qnn_executorch_backend full_portable_ops_lib etdump
${FLATCCRT_LIB} gflags
)
10 changes: 9 additions & 1 deletion examples/qualcomm/executor_runner/qnn_executor_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,15 @@ int main(int argc, char** argv) {
elapsed_time,
elapsed_time / inference_index);
} else {
// if no input is provided, run with default input as executor_runner.
// if no input is provided, fill the inputs with default values
auto inputs = util::prepare_input_tensors(*method);
ET_CHECK_MSG(
inputs.ok(),
"Could not prepare inputs: 0x%" PRIx32,
(uint32_t)inputs.error());
ET_LOG(
Info,
"Input list not provided. Inputs prepared with default values set.");
Error status = method->execute();
ET_CHECK_MSG(
status == Error::Ok,
Expand Down
67 changes: 0 additions & 67 deletions examples/qualcomm/llama2/README.md

This file was deleted.

2 changes: 1 addition & 1 deletion examples/qualcomm/oss_scripts/dino_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import torch
from executorch.backends.qualcomm.quantizer.quantizer import QuantDtype

from executorch.examples.qualcomm.scripts.utils import (
from executorch.examples.qualcomm.utils import (
build_executorch_binary,
make_output_dir,
parse_skip_delegation_node,
Expand Down
Loading