pytorch · facebook-github-bot · Aug 13, 2024 · Jul 30, 2024 · Aug 12, 2024 · Aug 12, 2024
@@ -61,8 +61,11 @@ backends/qualcomm
 ├── tests # Unit tests and model tests go here.
 └── utils # Miscellaneous utilities.
 
-examples
-└── qualcomm # Examples to run QNN backends.
+examples/qualcomm
+├── executor_runner # A general runner that is capable of running most of the basic models.
+├── oss_scripts # Scripts for OSS(Open Source Software) models and customized runner for some specific models.
+├── qaihub_scripts # Scripts for Qaihub models and corresponding customized runner for these models.
+└── scripts # Scripts for models provided by executorch.
 ```
 
 ## Examples

@@ -34,7 +34,7 @@
     generate_qnn_executorch_compiler_spec,
 )
 
-from executorch.examples.qualcomm.scripts.utils import setup_common_args_and_variables
+from executorch.examples.qualcomm.utils import setup_common_args_and_variables
 
 from executorch.backends.qualcomm.tests.models import *  # noqa: F403
 
@@ -1803,6 +1803,60 @@ def test_squeezenet(self):
                 self.assertGreaterEqual(msg["top_5"], 70)
 
 
+class TestExampleQaihubScript(TestQNN):
+
+    def required_envs(self, conditions=None) -> bool:
+        conditions = [] if conditions is None else conditions
+        return all(
+            [
+                self.executorch_root,
+                self.artifact_dir,
+                *conditions,
+            ]
+        )
+
+    def test_llama2_7b(self):
+        if not self.required_envs():
+            self.skipTest("missing required envs")
+
+        prompt = "Explain the rules of baseball"
+        cmds = [
+            "python",
+            f"{self.executorch_root}/examples/qualcomm/qaihub_scripts/llama2/qaihub_llama2_7b.py",
+            "--artifact",
+            self.artifact_dir,
+            "--build_folder",
+            self.build_folder,
+            "--device",
+            self.device,
+            "--model",
+            self.model,
+            "--tokenizer_bin",
+            f"{self.artifact_dir}/tokenizer.bin",
+            "--context_binaries",
+            f"{self.artifact_dir}",
+            "--ip",
+            self.ip,
+            "--port",
+            str(self.port),
+            "--prompt",
+            f"{prompt}",
+        ]
+        if self.host:
+            cmds.extend(["--host", self.host])
+
+        p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
+        with Listener((self.ip, self.port)) as listener:
+            conn = listener.accept()
+            p.communicate()
+            msg = json.loads(conn.recv())
+            if "Error" in msg:
+                self.fail(msg["Error"])
+            else:
+                model_out = msg["result"]
+                self.assertTrue(model_out.startswith(prompt))
+
+
 class TestExampleScript(TestQNN):
     def required_envs(self, conditions=None) -> bool:
         conditions = [] if conditions is None else conditions
@@ -2085,7 +2139,7 @@ def test_stories_single_llama(self):
 
         cmds = [
             "python",
-            f"{self.executorch_root}/examples/qualcomm/llama2/llama.py",
+            f"{self.executorch_root}/examples/qualcomm/oss_scripts/llama2/llama.py",
             "--artifact",
             self.artifact_dir,
             "--build_folder",

@@ -27,7 +27,7 @@
     QcomChipset,
 )
 from executorch.backends.qualcomm.utils.utils import capture_program
-from executorch.examples.qualcomm.scripts.utils import (
+from executorch.examples.qualcomm.utils import (
     generate_inputs,
     make_output_dir,
     SimpleADB,
@@ -241,7 +241,7 @@ def validate_profile():
 
                 cmd = [
                     # qnn_executor_runner
-                    f"{build_folder}/examples/qualcomm/qnn_executor_runner",
+                    f"{build_folder}/examples/qualcomm/executor_runner/qnn_executor_runner",
                     "--model_path",
                     f"{pte_fname}",
                     "--input_list_path",

@@ -209,7 +209,7 @@ cd $EXECUTORCH_ROOT
 cp schema/program.fbs exir/_serialize/program.fbs
 cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
 
-python -m examples.qualcomm.scripts.deeplab_v3 -b build_android -m SM8550 --compile_only --download
+python -m examples.qualcomm.scripts.deeplab_v3 -b cmake-out-android -m SM8550 --compile_only --download
 ```
 
 You might see something like below:
@@ -308,8 +308,8 @@ So, we can run `qnn_executor_runner` like
 
 ```bash
 adb push ./deeplab_v3/dlv3_qnn.pte ${DEVICE_DIR}
-adb push ${EXECUTORCH_ROOT}/build_android/examples/qualcomm/qnn_executor_runner ${DEVICE_DIR}
-adb push ${EXECUTORCH_ROOT}/build_android/lib/libqnn_executorch_backend.so ${DEVICE_DIR}
+adb push ${EXECUTORCH_ROOT}/cmake-out-android/examples/qualcomm/executor_runner/qnn_executor_runner ${DEVICE_DIR}
+adb push ${EXECUTORCH_ROOT}/cmake-out-android/lib/libqnn_executorch_backend.so ${DEVICE_DIR}
 adb shell "cd ${DEVICE_DIR} \
            && export LD_LIBRARY_PATH=${DEVICE_DIR} \
            && export ADSP_LIBRARY_PATH=${DEVICE_DIR} \
@@ -333,7 +333,7 @@ I 00:00:00.364875 executorch:qnn_executor_runner.cpp:425] Write etdump to etdump
 The model is merely executed. If we want to feed real inputs and get model outputs, we can use
 ```bash
 cd $EXECUTORCH_ROOT
-python -m examples.qualcomm.scripts.deeplab_v3 -b build_android -m SM8550 --download -s <device_serial>
+python -m examples.qualcomm.scripts.deeplab_v3 -b cmake-out-android -m SM8550 --download -s <device_serial>
 ```
 The `<device_serial>` can be found by `adb devices` command.
 

@@ -53,7 +53,7 @@ For delegating to Qualcomm Hexagon NPU, please follow the tutorial [here](build-
 After generating the model, copy the model to `assets` directory.
 
 ```bash
-python -m examples.qualcomm.scripts.deeplab_v3 -b build_android -m SM8450 -s <adb_connected_device_serial>
+python -m examples.qualcomm.scripts.deeplab_v3 -b cmake-out-android -m SM8450 -s <adb_connected_device_serial>
 cp deeplab_v3/dlv3_qnn.pte examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/
 ```
 

@@ -49,9 +49,7 @@ include(${EXECUTORCH_SRCS_FILE})
 get_filename_component(
   EXECUTORCH_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../.." ABSOLUTE
 )
-set(_qnn_executor_runner__srcs ${_executor_runner__srcs})
-set(_qnn_llama_runner__srcs ${_llama_runner__srcs})
-set(_qnn_qaihub_llama_runner__srcs ${_llama_runner__srcs})
+
 
 # portable_ops_lib
 gen_selected_ops(LIB_NAME "full_portable_ops_lib" INCLUDE_ALL_OPS "ON")
@@ -69,69 +67,17 @@ target_include_directories(
   full_portable_ops_lib PUBLIC ${_common_include_directories}
 )
 
-# preprocess executor runner src files
-list(TRANSFORM _qnn_executor_runner__srcs PREPEND "${EXECUTORCH_SOURCE_DIR}/")
-list(FILTER _qnn_executor_runner__srcs EXCLUDE REGEX ".*executor_runner.cpp$")
-list(PREPEND _qnn_executor_runner__srcs
-  ${CMAKE_CURRENT_LIST_DIR}/executor_runner/qnn_executor_runner.cpp
-)
-# preprocess llama runner src files
-list(TRANSFORM _qnn_llama_runner__srcs PREPEND "${EXECUTORCH_SOURCE_DIR}/")
-list(FILTER _qnn_llama_runner__srcs EXCLUDE REGEX ".*(/runner/).*")
-message(ERROR ${_qnn_llama_runner__srcs})
-list(PREPEND _qnn_llama_runner__srcs
-  ${CMAKE_CURRENT_LIST_DIR}/executor_runner/qnn_llama_runner.cpp
-  ${CMAKE_CURRENT_LIST_DIR}/llama2/runner/runner.cpp
-  ${CMAKE_CURRENT_LIST_DIR}/llama2/runner/runner.h
-)
-# preprocess qaihub llama runner src files
-list(TRANSFORM _qnn_qaihub_llama_runner__srcs PREPEND "${EXECUTORCH_SOURCE_DIR}/")
-list(FILTER _qnn_qaihub_llama_runner__srcs EXCLUDE REGEX ".*(/runner/).*")
-list(PREPEND _qnn_qaihub_llama_runner__srcs
-  ${CMAKE_CURRENT_LIST_DIR}/executor_runner/qnn_qaihub_llama_runner.cpp
-  ${CMAKE_CURRENT_LIST_DIR}/llama2/qaihub_runner/runner.cpp
-  ${CMAKE_CURRENT_LIST_DIR}/llama2/qaihub_runner/runner.h
-  ${CMAKE_CURRENT_LIST_DIR}/llama2/qaihub_runner/io_memory.cpp
-  ${CMAKE_CURRENT_LIST_DIR}/llama2/qaihub_runner/io_memory.h
+# build qnn_executor_runner
+add_subdirectory(
+    ${CMAKE_CURRENT_SOURCE_DIR}/executor_runner
 )
 
-# build executor runner
-add_executable(qnn_executor_runner ${_qnn_executor_runner__srcs})
-target_include_directories(
-  qnn_executor_runner PUBLIC ${_common_include_directories}
-)
-target_link_libraries(
-  qnn_executor_runner qnn_executorch_backend full_portable_ops_lib etdump
-  ${FLATCCRT_LIB} gflags
+# build qnn_llama_runner
+add_subdirectory(
+    ${CMAKE_CURRENT_SOURCE_DIR}/oss_scripts/llama2
 )
 
-# build llama runner
-add_executable(qnn_llama_runner ${_qnn_llama_runner__srcs})
-target_include_directories(
-  qnn_llama_runner PUBLIC ${_common_include_directories}
-)
-target_link_libraries(qnn_llama_runner
-  qnn_executorch_backend
-  full_portable_ops_lib
-  extension_data_loader
-  extension_module
-  gflags
-)
-target_compile_options(qnn_llama_runner
-  PUBLIC ${_common_compile_options}
-)
-# build qaihub llama runner
-add_executable(qnn_qaihub_llama_runner ${_qnn_qaihub_llama_runner__srcs})
-target_include_directories(qnn_qaihub_llama_runner
-  PUBLIC ${_common_include_directories}
-)
-target_link_libraries(qnn_qaihub_llama_runner
-  qnn_executorch_backend
-  executorch_no_prim_ops
-  extension_data_loader
-  extension_module
-  gflags
-)
-target_compile_options(qnn_qaihub_llama_runner
-  PUBLIC ${_common_compile_options}
+# build qaihub_llama2_7b_runner
+add_subdirectory(
+  ${CMAKE_CURRENT_SOURCE_DIR}/qaihub_scripts/llama2
 )
@@ -2,6 +2,20 @@
 
 This directory contains examples for some AI models.
 
+We have seperated the example scripts into the following subfolders, please refer to [README.md](../../backends/qualcomm/README.md) for the example scripts' directory structure:
+
+1. executor_runner: This folder contains a general executor runner capable of running most of the models. As a rule of thumb, if a model does not have its own customized runner, execute the model using [executor_runner](./executor_runner/qnn_executor_runner.cpp). On the other hand, if a model has its own runner, such as [llama2](./oss_scripts/llama2/qnn_llama_runner.cpp), use the customized runner to execute the model. Customized runner should be located under the same folder as the model's python script. 
+
+2. oss_scripts: OSS stands for Open Source Software. This folder contains python scripts for open source models. Some models under this folder might also have their own customized runner.
+   For example, [llama2](./oss_scripts/llama2/qnn_llama_runner.cpp) contains not only the python scripts to prepare the model but also a customized runner for executing the model.
+
+3. qaihub_scripts: QAIHub stands for [Qualcomm AI Hub](https://aihub.qualcomm.com/). On QAIHub, users can find pre-compiled context binaries, a format used by QNN to save its models. This provides users with a new option for model deployment. Different from oss_scripts & scripts, which the example scripts are converting a model from nn.Module to ExecuTorch .pte files, qaihub_scripts provides example scripts for converting pre-compiled context binaries to ExecuTorch .pte files. Additionaly, users can find customized example runners specific to the QAIHub models for execution. For example [qaihub_llama2_7b](./qaihub_scripts/llama2/qaihub_llama2_7b.py) is a script converting context binaries to ExecuTorch .pte files, and [qaihub_llama2_7b_runner](./qaihub_scripts/llama2/qaihub_llama2_7b_runner.cpp) is a customized example runner to execute llama2 .pte files. Please be aware that context-binaries downloaded from QAIHub are tied to a specific QNN SDK version.
+Before executing the scripts and runner, please ensure that you are using the QNN SDK version that is matching the context binary. Tutorial below will also cover how to check the QNN Version for a context binary.
+
+4. scripts: This folder contains scripts to build models provided by executorch.
+
+
+
 Please check helper of each examples for detailed arguments.
 
 Here are some general information and limitations.
@@ -47,6 +61,17 @@ python mobilenet_v2.py -s <device_serial> -m "SM8550" -b path/to/cmake-out-andro
 python deeplab_v3.py -s <device_serial> -m "SM8550" -b path/to/cmake-out-android/ --download
 ```
 
+#### Check context binary version
+```bash
+cd ${QNN_SDK_ROOT}/bin/x86_64-linux-clang
+./qnn-context-binary-utility --context_binary ${PATH_TO_CONTEXT_BINARY} --json_file ${OUTPUT_JSON_NAME}
+```
+After retreiving the json file, search in the json file for the field "buildId" and ensure it matches the ${QNN_SDK_ROOT} you are using for the environment variable.
+If you run into the following error, that means the ${QNN_SDK_ROOT} that you are using is older than the context binary QNN SDK version. In this case, please download a newer QNN SDK version.
+```
+Error: Failed to get context binary info.
+```
+
 ## Additional Dependency
 
 The mobilebert multi-class text classification example requires `pandas` and `sklearn`.

@@ -0,0 +1,22 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set(_qnn_executor_runner__srcs ${_executor_runner__srcs})
+
+# preprocess executor runner src files
+list(TRANSFORM _qnn_executor_runner__srcs PREPEND "${EXECUTORCH_SOURCE_DIR}/")
+list(FILTER _qnn_executor_runner__srcs EXCLUDE REGEX ".*executor_runner.cpp$")
+list(PREPEND _qnn_executor_runner__srcs ${CMAKE_CURRENT_LIST_DIR}/qnn_executor_runner.cpp)
+
+# build executor runner
+add_executable(qnn_executor_runner ${_qnn_executor_runner__srcs})
+target_include_directories(
+  qnn_executor_runner PUBLIC ${_common_include_directories}
+)
+target_link_libraries(
+  qnn_executor_runner qnn_executorch_backend full_portable_ops_lib etdump
+  ${FLATCCRT_LIB} gflags
+)
@@ -404,7 +404,15 @@ int main(int argc, char** argv) {
         elapsed_time,
         elapsed_time / inference_index);
   } else {
-    // if no input is provided, run with default input as executor_runner.
+    // if no input is provided, fill the inputs with default values
+    auto inputs = util::prepare_input_tensors(*method);
+    ET_CHECK_MSG(
+        inputs.ok(),
+        "Could not prepare inputs: 0x%" PRIx32,
+        (uint32_t)inputs.error());
+    ET_LOG(
+        Info,
+        "Input list not provided. Inputs prepared with default values set.");
     Error status = method->execute();
     ET_CHECK_MSG(
         status == Error::Ok,

@@ -13,7 +13,7 @@
 import torch
 from executorch.backends.qualcomm.quantizer.quantizer import QuantDtype
 
-from executorch.examples.qualcomm.scripts.utils import (
+from executorch.examples.qualcomm.utils import (
     build_executorch_binary,
     make_output_dir,
     parse_skip_delegation_node,