Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 2 additions & 11 deletions backends/qualcomm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,8 @@ Please check `generate_qnn_executorch_compiler_spec()` in
- Snapdragon 8 Gen 2
- Snapdragon 8 Gen 3

### How to add more supported Chipset

#### Step 1: Check SoC model of snapdragon device
Get SoC model which would like to be supported from the document of Qualcomm AI Engine Direct SDK.

#### Step 2: Update schema of compiler option and SoC information in serialization
Add SoC model into QcomChipset enum in [schema](./serialization/schema.fbs) and [qnn_compile_spec_schema](./serialization/qnn_compile_spec_schema.py).
Insert new SoC information into _soc_info_table in [qnn_compile_spec_schema](./serialization/qnn_compile_spec_schema.py).

#### Step 3: Recompile the .pte file
Follow [setup](../../docs/source/build-run-qualcomm-ai-engine-direct-backend.md) to setup environment and build runtime with new schema header.
### Adding more supported Chipset
Currently, users cannot add additional chipset models because the chipset ID is not accessible to community users. If you have specific chipset models you wish to add, please contact one of the authors in the `Code Reviews` section at the bottom of this page.

### Supported Inference Type
- Quantized
Expand Down
46 changes: 23 additions & 23 deletions backends/qualcomm/tests/test_qnn_delegate.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def setUp(self):
TestQNN.rtol = 1e-1
backend_options = generate_htp_compiler_spec(use_fp16=True)
TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
debug=False,
saver=False,
Expand Down Expand Up @@ -522,7 +522,7 @@ def setUp(self):
TestQNN.rtol = 1e-1
backend_options = generate_htp_compiler_spec(use_fp16=True)
TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
debug=False,
saver=False,
Expand Down Expand Up @@ -674,7 +674,7 @@ def setUp(self):
TestQNN.rtol = 1
backend_options = generate_htp_compiler_spec(use_fp16=False)
TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
debug=False,
saver=False,
Expand Down Expand Up @@ -1236,7 +1236,7 @@ def setUp(self):
TestQNN.rtol = 1
backend_options = generate_htp_compiler_spec(use_fp16=False)
TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
debug=False,
saver=False,
Expand Down Expand Up @@ -1444,7 +1444,7 @@ def setUp(self):
TestQNN.rtol = 1e-1
backend_options = generate_htp_compiler_spec(use_fp16=True)
TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
debug=False,
saver=False,
Expand All @@ -1453,7 +1453,7 @@ def setUp(self):
def test_qnn_backend_dump_intermediate_outputs(self):
backend_options = generate_htp_compiler_spec(use_fp16=True)
TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
dump_intermediate_outputs=True,
)
Expand Down Expand Up @@ -1498,7 +1498,7 @@ def test_qnn_backend_multi_contexts(self):
use_multi_contexts=True,
)
compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
)
partitioner = QnnPartitioner(compiler_specs)
Expand All @@ -1514,7 +1514,7 @@ def test_qnn_backend_multi_contexts_composite(self):
use_multi_contexts=True,
)
compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
)
module = CompositeDelegateModule( # noqa: F405
Expand All @@ -1535,7 +1535,7 @@ def test_qnn_backend_profile_op(self):
TestQNN.enable_profile = True
backend_options = generate_htp_compiler_spec(use_fp16=True)
TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
profile=True,
)
Expand All @@ -1554,7 +1554,7 @@ def test_qnn_backend_shared_buffer(self):
use_fp16=True,
)
TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
shared_buffer=True,
)
Expand All @@ -1569,7 +1569,7 @@ def test_qnn_backend_shared_buffer(self):
def test_qnn_backend_online_prepare(self):
backend_options = generate_htp_compiler_spec(use_fp16=True)
TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
online_prepare=True,
)
Expand All @@ -1590,7 +1590,7 @@ def test_qnn_backend_context_direct(self):
bundle_program = from_context_binary(ctx_path, "ctx_loader")
backend_options = generate_htp_compiler_spec(use_fp16=True)
compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
is_from_context_binary=True,
)
Expand All @@ -1614,7 +1614,7 @@ def setUp(self):
TestQNN.rtol = 1
backend_options = generate_htp_compiler_spec(use_fp16=False)
TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
debug=False,
saver=False,
Expand All @@ -1623,7 +1623,7 @@ def setUp(self):
def test_qnn_backend_dump_intermediate_outputs(self):
backend_options = generate_htp_compiler_spec(use_fp16=False)
TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
dump_intermediate_outputs=True,
)
Expand Down Expand Up @@ -1657,7 +1657,7 @@ def test_qnn_backend_skip_node_id_quantizer(self):
use_fp16=False,
)
compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
)
partitioner = QnnPartitioner(compiler_specs)
Expand Down Expand Up @@ -1704,7 +1704,7 @@ def test_qnn_backend_skip_node_op_quantizer(self):
use_fp16=False,
)
compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
)
partitioner = QnnPartitioner(compiler_specs)
Expand Down Expand Up @@ -1740,7 +1740,7 @@ def test_qnn_backend_graph_level_mixed_precision(self):
use_fp16=False,
)
compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
)
partitioner = QnnPartitioner(compiler_specs)
Expand Down Expand Up @@ -1781,7 +1781,7 @@ def test_qnn_backend_multi_contexts(self):
use_multi_contexts=True,
)
compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
)
partitioner = QnnPartitioner(compiler_specs)
Expand All @@ -1797,7 +1797,7 @@ def test_qnn_backend_multi_contexts_composite(self):
use_multi_contexts=True,
)
compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
)
module = CompositeDelegateModule( # noqa: F405
Expand All @@ -1819,7 +1819,7 @@ def test_qnn_backend_profile_op(self):
TestQNN.enable_profile = True
backend_options = generate_htp_compiler_spec(use_fp16=False)
TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
profile=True,
)
Expand All @@ -1839,7 +1839,7 @@ def test_qnn_backend_shared_buffer(self):
use_fp16=False,
)
TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
shared_buffer=True,
)
Expand All @@ -1855,7 +1855,7 @@ def test_qnn_backend_shared_buffer(self):
def test_qnn_backend_online_prepare(self):
backend_options = generate_htp_compiler_spec(use_fp16=False)
TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
online_prepare=True,
)
Expand All @@ -1877,7 +1877,7 @@ def test_qnn_backend_context_direct(self):
bundle_program = from_context_binary(ctx_path, "ctx_loader")
backend_options = generate_htp_compiler_spec(use_fp16=False)
compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.arch_table[TestQNN.model],
soc_model=self.chipset_table[TestQNN.model],
backend_options=backend_options,
is_from_context_binary=True,
)
Expand Down
2 changes: 1 addition & 1 deletion backends/qualcomm/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ class TestQNN(unittest.TestCase):
build_folder: Literal = ""
model: QcomChipset = None
compiler_specs: List[CompileSpec] = None
arch_table = get_soc_to_chipset_map()
chipset_table = get_soc_to_chipset_map()
error_only = False
ip = "localhost"
port = 8080
Expand Down
2 changes: 1 addition & 1 deletion examples/qualcomm/oss_scripts/llama2/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ def compile(args):
end_quantize_ts = time.time()
print("single_llama.quantize(quant_dtype)", end_quantize_ts - start_quantize_ts)
single_llama.lowering_modules(
args.artifact, kv_type=kv_type, soc_model=get_soc_to_chipset_map[args.model]
args.artifact, kv_type=kv_type, soc_model=get_soc_to_chipset_map()[args.model]
)
end_lowering_ts = time.time()
print("Complete Compile", end_lowering_ts - end_quantize_ts)
Expand Down
8 changes: 4 additions & 4 deletions examples/qualcomm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def __init__(
self.dump_intermediate_outputs = dump_intermediate_outputs
self.debug_output_path = f"{self.workspace}/debug_output.bin"
self.output_folder = f"{self.workspace}/outputs"
self.soc_model = get_soc_to_arch_map()[soc_model]
self.htp_arch = get_soc_to_arch_map()[soc_model]
self.error_only = error_only
self.shared_buffer = shared_buffer
self.runner = runner
Expand All @@ -108,12 +108,12 @@ def push(self, inputs=None, input_list=None, files=None):
*self.pte_path,
f"{self.qnn_sdk}/lib/aarch64-android/libQnnHtp.so",
(
f"{self.qnn_sdk}/lib/hexagon-v{self.soc_model}/"
f"unsigned/libQnnHtpV{self.soc_model}Skel.so"
f"{self.qnn_sdk}/lib/hexagon-v{self.htp_arch}/"
f"unsigned/libQnnHtpV{self.htp_arch}Skel.so"
),
(
f"{self.qnn_sdk}/lib/aarch64-android/"
f"libQnnHtpV{self.soc_model}Stub.so"
f"libQnnHtpV{self.htp_arch}Stub.so"
),
f"{self.qnn_sdk}/lib/aarch64-android/libQnnHtpPrepare.so",
f"{self.qnn_sdk}/lib/aarch64-android/libQnnSystem.so",
Expand Down
Loading