From a45778085148dfc82aae66de0f940c21beda9ad6 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Wed, 9 Apr 2025 14:58:11 -0700 Subject: [PATCH] Update backends-xnnpack.md --- docs/source/backends-xnnpack.md | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/docs/source/backends-xnnpack.md b/docs/source/backends-xnnpack.md index 45055ea4e13..db1c055dc9c 100644 --- a/docs/source/backends-xnnpack.md +++ b/docs/source/backends-xnnpack.md @@ -28,6 +28,7 @@ the core ExecuTorch runtime. To target the XNNPACK backend during the export and lowering process, pass an instance of the `XnnpackPartitioner` to `to_edge_transform_and_lower`. The example below demonstrates this process using the MobileNet V2 model from torchvision. ```python +import torch import torchvision.models as models from torchvision.models.mobilenetv2 import MobileNet_V2_Weights from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner @@ -47,10 +48,10 @@ with open("mv2_xnnpack.pte", "wb") as file: ### Partitioner API -The XNNPACK partitioner API allows for configuration of the model delegation to XNNPACK. Passing an `XnnpackPartitioner` instance with no additional parameters will run as much of the model as possible on the XNNPACK backend. This is the most common use-case. For advanced use cases, the partitioner exposes the following options via the [constructor](https://github.com/pytorch/executorch/blob/14ff52ff89a89c074fc6c14d3f01683677783dcd/backends/xnnpack/partition/xnnpack_partitioner.py#L31): +The XNNPACK partitioner API allows for configuration of the model delegation to XNNPACK. Passing an `XnnpackPartitioner` instance with no additional parameters will run as much of the model as possible on the XNNPACK backend. This is the most common use-case. For advanced use cases, the partitioner exposes the following options via the [constructor](https://github.com/pytorch/executorch/blob/release/0.6/backends/xnnpack/partition/xnnpack_partitioner.py#L31): - - `configs`: Control which operators are delegated to XNNPACK. By default, all available operators all delegated. See [../config/\_\_init\_\_.py](https://github.com/pytorch/executorch/blob/14ff52ff89a89c074fc6c14d3f01683677783dcd/backends/xnnpack/partition/config/__init__.py#L66) for an exhaustive list of available operator configs. - - `config_precisions`: Filter operators by data type. By default, delegate all precisions. One or more of `ConfigPrecisionType.FP32`, `ConfigPrecisionType.STATIC_QUANT`, or `ConfigPrecisionType.DYNAMIC_QUANT`. See [ConfigPrecisionType](https://github.com/pytorch/executorch/blob/14ff52ff89a89c074fc6c14d3f01683677783dcd/backends/xnnpack/partition/config/xnnpack_config.py#L24). + - `configs`: Control which operators are delegated to XNNPACK. By default, all available operators all delegated. See [../config/\_\_init\_\_.py](https://github.com/pytorch/executorch/blob/release/0.6/backends/xnnpack/partition/config/__init__.py#L66) for an exhaustive list of available operator configs. + - `config_precisions`: Filter operators by data type. By default, delegate all precisions. One or more of `ConfigPrecisionType.FP32`, `ConfigPrecisionType.STATIC_QUANT`, or `ConfigPrecisionType.DYNAMIC_QUANT`. See [ConfigPrecisionType](https://github.com/pytorch/executorch/blob/release/0.6/backends/xnnpack/partition/config/xnnpack_config.py#L24). - `per_op_mode`: If true, emit individual delegate calls for every operator. This is an advanced option intended to reduce memory overhead in some contexts at the cost of a small amount of runtime overhead. Defaults to false. - `verbose`: If true, print additional information during lowering. @@ -87,15 +88,23 @@ To perform 8-bit quantization with the PT2E flow, perform the following steps pr The output of `convert_pt2e` is a PyTorch model which can be exported and lowered using the normal flow. As it is a regular PyTorch model, it can also be used to evaluate the accuracy of the quantized model using standard PyTorch techniques. ```python +import torch +import torchvision.models as models +from torchvision.models.mobilenetv2 import MobileNet_V2_Weights from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import XNNPACKQuantizer +from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner +from executorch.exir import to_edge_transform_and_lower from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e from torch.ao.quantization.quantizer.xnnpack_quantizer import get_symmetric_quantization_config +model = models.mobilenetv2.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).eval() +sample_inputs = (torch.randn(1, 3, 224, 224), ) + qparams = get_symmetric_quantization_config(is_per_channel=True) # (1) quantizer = XNNPACKQuantizer() quantizer.set_global(qparams) -training_ep = torch.export.export_for_training(model, sample_inputs).module(), # (2) +training_ep = torch.export.export_for_training(model, sample_inputs).module() # (2) prepared_model = prepare_pt2e(training_ep, quantizer) # (3) for cal_sample in [torch.randn(1, 3, 224, 224)]: # Replace with representative model inputs