From 9dd2292acf5e65d750a117189fc6114a6102812b Mon Sep 17 00:00:00 2001 From: Stephen Jia Date: Mon, 13 Oct 2025 21:23:05 -0400 Subject: [PATCH] [Samsung] Docs template Summary: Title says it all! Add docs for the Samsung backend based on the template introduced in https://github.com/pytorch/executorch/pull/14873. --- .../samsung/samsung-op-support-table.csv | 45 +++++++ .../backends/samsung/samsung-op-support.rst | 11 ++ .../backends/samsung/samsung-overview.md | 117 ++++++++++++++++++ .../backends/samsung/samsung-partitioner.md | 29 +++++ .../backends/samsung/samsung-quantization.md | 60 +++++++++ 5 files changed, 262 insertions(+) create mode 100644 docs/source/backends/samsung/samsung-op-support-table.csv create mode 100644 docs/source/backends/samsung/samsung-op-support.rst create mode 100644 docs/source/backends/samsung/samsung-overview.md create mode 100644 docs/source/backends/samsung/samsung-partitioner.md create mode 100644 docs/source/backends/samsung/samsung-quantization.md diff --git a/docs/source/backends/samsung/samsung-op-support-table.csv b/docs/source/backends/samsung/samsung-op-support-table.csv new file mode 100644 index 00000000000..7d925c43400 --- /dev/null +++ b/docs/source/backends/samsung/samsung-op-support-table.csv @@ -0,0 +1,45 @@ +Operator,Quantization,Constraints +add,static int8, +avg_pool2d,static int8,"ceil_mode=False, divisor_override=pooling_region" +batch_norm,static int8, +bmm,static int8, +cat,static int8,at most 1 constant tensor +clamp,static int8, +constant_pad_nd,static int8,padding_value=0.0 only +conv2d,static int8,constant weights +dequantize_per_channel,, +dequantize_per_tensor,, +div,static int8, +embedding,static int8, +expand_copy,,"expanding at most one axis, new dimensions must be size 1" +gelu,static int8, +getitem,, +hardsigmoid,static int8, +hardswish,static int8, +hardtanh,static int8, +layer_norm,static int8,norm at last axis only +leaky_relu,static int8, +linear,static int8,constant weights +log_softmax,static int8, +max_pool2d,static int8,"ceil_mode=False, indices not supported" +maximum,, +mean_dim,static int8, +minimum,, +mul,static int8, +permute,static int8, +pixel_shuffle,, +quantize_per_channel,, +quantize_per_tensor,, +relu,static int8, +reshape,static int8, +rsqrt,static int8, +select,static int8, +slice_copy,static int8, +softmax,static int8, +sqrt,static int8, +squeeze,static int8, +sub,static int8, +to_copy,,memory_format=contiguous only +unsqueeze,static int8, +upsample_bilinear2d,static int8, +upsample_nearest2d,static int8, diff --git a/docs/source/backends/samsung/samsung-op-support.rst b/docs/source/backends/samsung/samsung-op-support.rst new file mode 100644 index 00000000000..ecccd565021 --- /dev/null +++ b/docs/source/backends/samsung/samsung-op-support.rst @@ -0,0 +1,11 @@ +================ +Operator Support +================ + +This page lists the PyTorch operators currently supported by the Samsung Exynos backend. + +.. csv-table:: Operator Support + :file: samsung-op-support-table.csv + :header-rows: 1 + :widths: 25 15 55 + :align: center diff --git a/docs/source/backends/samsung/samsung-overview.md b/docs/source/backends/samsung/samsung-overview.md new file mode 100644 index 00000000000..9bdc4eb4289 --- /dev/null +++ b/docs/source/backends/samsung/samsung-overview.md @@ -0,0 +1,117 @@ +# Samsung Exynos Backend + +ExecuTorch's Samsung Exynos backend enables the execution of ExecuTorch models on +Samsung SoCs via the NPU/DSP. The delegate is built on top of the +[Samsung Exynos AI Litecore SDK]((https://soc-developer.semiconductor.samsung.com/global/development/ai-litecore)). + +## Features + +- Wide range of operator support +- Supported inference precisions: + - FP16 + - 8-bit statically quantized (int8/uint8) + - 16-bit statically quantized (int16/uint16) + +## Target Requirements + +Currently, the Samsung Exynos backend is supported only for devices with the +following chipsets: + +- Exynos 2500 (E9955) + +## Development Requirements + +The [Samsung Exynos AI Litecore SDK](https://soc-developer.semiconductor.samsung.com/global/development/ai-litecore) +is required to build the Exynos backend from source, and is also required to +export models to the Exynos delegate. + +---- + +## Using the Samsung Exynos Backend + +To target the Exynos backend during the export and lowering process, pass an instance of +the `EnnPartitioner` to `to_edge_transform_and_lower`. The example below +demonstrates this process using the MobileNet V2 model from torchvision. + +```python +import torch +import torchvision.models as models +from torchvision.models.mobilenetv2 import MobileNet_V2_Weights +from executorch.backends.samsung.partition.enn_partitioner import EnnPartitioner +from executorch.backends.samsung.serialization.compile_options import ( + gen_samsung_backend_compile_spec, +) +from executorch.exir import to_edge_transform_and_lower + +mobilenet_v2 = models.mobilenetv2.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).eval() +sample_inputs = (torch.randn(1, 3, 224, 224), ) + +chipset = "E9955" +compile_specs = [gen_samsung_backend_compile_spec(chipset)] + +et_program = to_edge_transform_and_lower( + torch.export.export(mobilenet_v2, sample_inputs), + partitioner=[EnnPartitioner(compile_specs)], +).to_executorch() + +with open("mv2_xnnpack.pte", "wb") as file: + et_program.write_to_file(file) +``` + +See [Partitioner API](/backends/samsung/samsung-partitioner) for a reference on available partitioner options. + +---- + +## Quantization + +The Samsung Exynos backend support statically quantized models with 8-bit and 16-bit +integral types. + +See [Samsung Exynos Quantization](/backends/samsung/samsung-quantization) for more +information on available quantization schemes and APIs. + +---- + +## Runtime Integration + +To run the model on-device, use the standard ExecuTorch runtime APIs. + +The Exynos backend is currently not available in any of ExecuTorch's published packages. +To access it, build ExecuTorch from source. When building from source, pass +`-DEXECUTORCH_BUILD_EXYNOS=ON` when configuring the CMake build. See [Running on Device](/getting-started.md#running-on-device) +for more information. + +Then, to link against the backend, add the `executorch_backends` CMake target as a build +dependency. + +``` +# CMakeLists.txt +add_subdirectory("executorch") +... +target_link_libraries( + my_target + PRIVATE executorch + executorch_backends + ... +) +``` + +No additional steps are necessary to use the backend beyond linking the target. Any +Exynos delegated .pte file will automatically run on the registered backend. + +## Reference + +**→{doc}`exynos-partitioner` — Partitioner options.** + +**→{doc}`exynos-quantization` — Supported quantization schemes.** + +**→{doc}`exynos-op-support` — Supported operators.** + +```{toctree} +:maxdepth: 2 +:hidden: +:caption: Exynos Backend + +exynos-partitioner +exynos-quantization +exynos-op-support diff --git a/docs/source/backends/samsung/samsung-partitioner.md b/docs/source/backends/samsung/samsung-partitioner.md new file mode 100644 index 00000000000..eb84a795551 --- /dev/null +++ b/docs/source/backends/samsung/samsung-partitioner.md @@ -0,0 +1,29 @@ +# Partitioner API + +The `EnnPartitioner` API is the primary entrypoint when exporting a model to the Samsung +Exynos backend. The partitioner is responsible for determining which parts of the model +should be lowered to the backend and also provides an interface for configuring the +behaviour of the backend. + +Currently, the configuration options for `EnnPartitioner` can be generated automatically +using the `gen_samsung_backend_compile_spec` API. For instance, + +```python +from executorch.backends.samsung.partition.enn_partitioner import EnnPartitioner +from executorch.backends.samsung.serialization.compile_options import ( + gen_samsung_backend_compile_spec, +) + +from executorch.exir import to_edge_transform_and_lower + +chipset = "E9955" +compile_specs = [gen_samsung_backend_compile_spec(chipset)] + +et_program = to_edge_transform_and_lower( + exported_program, + partitioner=[EnnPartitioner(compile_specs)], +).to_executorch() +``` + +At the moment, only `"E9955"` is supported as a valid chipset name, which corresponds to +the Exynose 2500 SoC. Support for additional chipsets will be added in the future. diff --git a/docs/source/backends/samsung/samsung-quantization.md b/docs/source/backends/samsung/samsung-quantization.md new file mode 100644 index 00000000000..ad4b50cb93d --- /dev/null +++ b/docs/source/backends/samsung/samsung-quantization.md @@ -0,0 +1,60 @@ +# Quantization + +The Exynos backend currently supports executing statically quantized 8-bit models. + +### 8-bit quantization with the PT2E quantization flow + +To perform 8-bit quantization with the PT2E flow, perform the following steps prior to exporting the model: + +1) Create an instance of the `EnnQuantizer` class and set the desired quantization behaviour. +2) Use `torch.export.export` to obtain a graph module representation of the source model. +3) Use `prepare_pt2e` to prepare the model for quantization. +4) Execute the prepared model with representative samples to calibrate the quantizated tensor activation ranges. +5) Use `convert_pt2e` to quantize the model. +6) Export and lower the model using the standard export flow. + +The output of `convert_pt2e` is a PyTorch model which can be exported and lowered using +the same export flow as non-quantized models. As it is a regular PyTorch model, it can +also be used to evaluate the accuracy of the quantized model using standard PyTorch +techniques. + +The below example shows how to quantize a MobileNetV2 model using the PT2E quantization flow. + +```python +import torch +import torchvision.models as models +from torchvision.models.mobilenetv2 import MobileNet_V2_Weights + +from executorch.backends.samsung.partition.enn_partitioner import EnnPartitioner +from executorch.backends.samsung.quantizer.quantizer import EnnQuantizer, Precision + +from executorch.exir import to_edge_transform_and_lower +from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e + +model = models.mobilenetv2.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).eval() +sample_inputs = (torch.randn(1, 3, 224, 224), ) + +# Currently, "A8W8" is the only supported precision mode +precision = "A8W8" +is_per_channel = True +is_qat = False + +quantizer = EnnQuantizer() +quantizer.set_quant_params(precision, is_per_channel, is_qat) # (1) + +training_ep = torch.export.export(model, sample_inputs).module() # (2) +prepared_model = prepare_pt2e(training_ep, quantizer) # (3) + +for cal_sample in [torch.randn(1, 3, 224, 224)]: # Replace with representative model inputs + prepared_model(cal_sample) # (4) Calibrate + +quantized_model = convert_pt2e(prepared_model) # (5) + +et_program = to_edge_transform_and_lower( # (6) + torch.export.export(quantized_model, sample_inputs), + partitioner=[EnnPartitioner()], +).to_executorch() +``` + +See [PyTorch 2 Export Post Training Quantization](https://docs.pytorch.org/ao/main/tutorials_source/pt2e_quant_ptq.html) +for more information.