From 4f9171cba29eb16bdcb4426d16705ed0c2106e61 Mon Sep 17 00:00:00 2001 From: winskuo-quic Date: Tue, 15 Oct 2024 10:40:50 +0800 Subject: [PATCH] Update arch and soc chipset terms --- backends/qualcomm/README.md | 13 +----- backends/qualcomm/tests/test_qnn_delegate.py | 46 +++++++++---------- backends/qualcomm/tests/utils.py | 2 +- examples/qualcomm/oss_scripts/llama2/llama.py | 2 +- examples/qualcomm/utils.py | 8 ++-- 5 files changed, 31 insertions(+), 40 deletions(-) diff --git a/backends/qualcomm/README.md b/backends/qualcomm/README.md index d426aff16b1..a0cb5a5a502 100644 --- a/backends/qualcomm/README.md +++ b/backends/qualcomm/README.md @@ -21,17 +21,8 @@ Please check `generate_qnn_executorch_compiler_spec()` in - Snapdragon 8 Gen 2 - Snapdragon 8 Gen 3 -### How to add more supported Chipset - -#### Step 1: Check SoC model of snapdragon device -Get SoC model which would like to be supported from the document of Qualcomm AI Engine Direct SDK. - -#### Step 2: Update schema of compiler option and SoC information in serialization -Add SoC model into QcomChipset enum in [schema](./serialization/schema.fbs) and [qnn_compile_spec_schema](./serialization/qnn_compile_spec_schema.py). -Insert new SoC information into _soc_info_table in [qnn_compile_spec_schema](./serialization/qnn_compile_spec_schema.py). - -#### Step 3: Recompile the .pte file -Follow [setup](../../docs/source/build-run-qualcomm-ai-engine-direct-backend.md) to setup environment and build runtime with new schema header. +### Adding more supported Chipset +Currently, users cannot add additional chipset models because the chipset ID is not accessible to community users. If you have specific chipset models you wish to add, please contact one of the authors in the `Code Reviews` section at the bottom of this page. ### Supported Inference Type - Quantized diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py index 2e1bd0eff38..01b1014e4cf 100644 --- a/backends/qualcomm/tests/test_qnn_delegate.py +++ b/backends/qualcomm/tests/test_qnn_delegate.py @@ -68,7 +68,7 @@ def setUp(self): TestQNN.rtol = 1e-1 backend_options = generate_htp_compiler_spec(use_fp16=True) TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, debug=False, saver=False, @@ -522,7 +522,7 @@ def setUp(self): TestQNN.rtol = 1e-1 backend_options = generate_htp_compiler_spec(use_fp16=True) TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, debug=False, saver=False, @@ -674,7 +674,7 @@ def setUp(self): TestQNN.rtol = 1 backend_options = generate_htp_compiler_spec(use_fp16=False) TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, debug=False, saver=False, @@ -1236,7 +1236,7 @@ def setUp(self): TestQNN.rtol = 1 backend_options = generate_htp_compiler_spec(use_fp16=False) TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, debug=False, saver=False, @@ -1444,7 +1444,7 @@ def setUp(self): TestQNN.rtol = 1e-1 backend_options = generate_htp_compiler_spec(use_fp16=True) TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, debug=False, saver=False, @@ -1453,7 +1453,7 @@ def setUp(self): def test_qnn_backend_dump_intermediate_outputs(self): backend_options = generate_htp_compiler_spec(use_fp16=True) TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, dump_intermediate_outputs=True, ) @@ -1498,7 +1498,7 @@ def test_qnn_backend_multi_contexts(self): use_multi_contexts=True, ) compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, ) partitioner = QnnPartitioner(compiler_specs) @@ -1514,7 +1514,7 @@ def test_qnn_backend_multi_contexts_composite(self): use_multi_contexts=True, ) compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, ) module = CompositeDelegateModule( # noqa: F405 @@ -1535,7 +1535,7 @@ def test_qnn_backend_profile_op(self): TestQNN.enable_profile = True backend_options = generate_htp_compiler_spec(use_fp16=True) TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, profile=True, ) @@ -1554,7 +1554,7 @@ def test_qnn_backend_shared_buffer(self): use_fp16=True, ) TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, shared_buffer=True, ) @@ -1569,7 +1569,7 @@ def test_qnn_backend_shared_buffer(self): def test_qnn_backend_online_prepare(self): backend_options = generate_htp_compiler_spec(use_fp16=True) TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, online_prepare=True, ) @@ -1590,7 +1590,7 @@ def test_qnn_backend_context_direct(self): bundle_program = from_context_binary(ctx_path, "ctx_loader") backend_options = generate_htp_compiler_spec(use_fp16=True) compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, is_from_context_binary=True, ) @@ -1614,7 +1614,7 @@ def setUp(self): TestQNN.rtol = 1 backend_options = generate_htp_compiler_spec(use_fp16=False) TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, debug=False, saver=False, @@ -1623,7 +1623,7 @@ def setUp(self): def test_qnn_backend_dump_intermediate_outputs(self): backend_options = generate_htp_compiler_spec(use_fp16=False) TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, dump_intermediate_outputs=True, ) @@ -1657,7 +1657,7 @@ def test_qnn_backend_skip_node_id_quantizer(self): use_fp16=False, ) compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, ) partitioner = QnnPartitioner(compiler_specs) @@ -1704,7 +1704,7 @@ def test_qnn_backend_skip_node_op_quantizer(self): use_fp16=False, ) compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, ) partitioner = QnnPartitioner(compiler_specs) @@ -1740,7 +1740,7 @@ def test_qnn_backend_graph_level_mixed_precision(self): use_fp16=False, ) compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, ) partitioner = QnnPartitioner(compiler_specs) @@ -1781,7 +1781,7 @@ def test_qnn_backend_multi_contexts(self): use_multi_contexts=True, ) compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, ) partitioner = QnnPartitioner(compiler_specs) @@ -1797,7 +1797,7 @@ def test_qnn_backend_multi_contexts_composite(self): use_multi_contexts=True, ) compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, ) module = CompositeDelegateModule( # noqa: F405 @@ -1819,7 +1819,7 @@ def test_qnn_backend_profile_op(self): TestQNN.enable_profile = True backend_options = generate_htp_compiler_spec(use_fp16=False) TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, profile=True, ) @@ -1839,7 +1839,7 @@ def test_qnn_backend_shared_buffer(self): use_fp16=False, ) TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, shared_buffer=True, ) @@ -1855,7 +1855,7 @@ def test_qnn_backend_shared_buffer(self): def test_qnn_backend_online_prepare(self): backend_options = generate_htp_compiler_spec(use_fp16=False) TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, online_prepare=True, ) @@ -1877,7 +1877,7 @@ def test_qnn_backend_context_direct(self): bundle_program = from_context_binary(ctx_path, "ctx_loader") backend_options = generate_htp_compiler_spec(use_fp16=False) compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=self.arch_table[TestQNN.model], + soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, is_from_context_binary=True, ) diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py index c58da42e847..4e5ed04bc59 100644 --- a/backends/qualcomm/tests/utils.py +++ b/backends/qualcomm/tests/utils.py @@ -120,7 +120,7 @@ class TestQNN(unittest.TestCase): build_folder: Literal = "" model: QcomChipset = None compiler_specs: List[CompileSpec] = None - arch_table = get_soc_to_chipset_map() + chipset_table = get_soc_to_chipset_map() error_only = False ip = "localhost" port = 8080 diff --git a/examples/qualcomm/oss_scripts/llama2/llama.py b/examples/qualcomm/oss_scripts/llama2/llama.py index fb21da4e9c4..5434efea065 100644 --- a/examples/qualcomm/oss_scripts/llama2/llama.py +++ b/examples/qualcomm/oss_scripts/llama2/llama.py @@ -394,7 +394,7 @@ def compile(args): end_quantize_ts = time.time() print("single_llama.quantize(quant_dtype)", end_quantize_ts - start_quantize_ts) single_llama.lowering_modules( - args.artifact, kv_type=kv_type, soc_model=get_soc_to_chipset_map[args.model] + args.artifact, kv_type=kv_type, soc_model=get_soc_to_chipset_map()[args.model] ) end_lowering_ts = time.time() print("Complete Compile", end_lowering_ts - end_quantize_ts) diff --git a/examples/qualcomm/utils.py b/examples/qualcomm/utils.py index f10babc5bbe..27c9db2ffcc 100755 --- a/examples/qualcomm/utils.py +++ b/examples/qualcomm/utils.py @@ -83,7 +83,7 @@ def __init__( self.dump_intermediate_outputs = dump_intermediate_outputs self.debug_output_path = f"{self.workspace}/debug_output.bin" self.output_folder = f"{self.workspace}/outputs" - self.soc_model = get_soc_to_arch_map()[soc_model] + self.htp_arch = get_soc_to_arch_map()[soc_model] self.error_only = error_only self.shared_buffer = shared_buffer self.runner = runner @@ -108,12 +108,12 @@ def push(self, inputs=None, input_list=None, files=None): *self.pte_path, f"{self.qnn_sdk}/lib/aarch64-android/libQnnHtp.so", ( - f"{self.qnn_sdk}/lib/hexagon-v{self.soc_model}/" - f"unsigned/libQnnHtpV{self.soc_model}Skel.so" + f"{self.qnn_sdk}/lib/hexagon-v{self.htp_arch}/" + f"unsigned/libQnnHtpV{self.htp_arch}Skel.so" ), ( f"{self.qnn_sdk}/lib/aarch64-android/" - f"libQnnHtpV{self.soc_model}Stub.so" + f"libQnnHtpV{self.htp_arch}Stub.so" ), f"{self.qnn_sdk}/lib/aarch64-android/libQnnHtpPrepare.so", f"{self.qnn_sdk}/lib/aarch64-android/libQnnSystem.so",