pytorch · zingo · Jan 31, 2025 · Jan 8, 2025 · Jan 8, 2025
@@ -122,6 +122,18 @@ The you can run the tests with
 pytest -c /dev/null -v -n auto backends/arm/test --arm_run_corstoneFVP
 ```
 
+## Passes
+
+With the default passes in the Arm Ethos-U backend, assuming the model lowers fully to the
+Ethos-U, the exported program is composed of a Quantize node, Ethos-U custom delegate
+and a Dequantize node. In some circumstances, you may want to feed quantized input to the Neural
+Network straight away, e.g. if you have a camera sensor outputting (u)int8 data and keep all the
+arithmetic of the application in the int8 domain. For these cases, you can apply the
+`exir/passes/quantize_io_pass.py`. See the unit test in `executorch/backends/arm/
+test/passes/test_ioquantization_pass.py`for an example how to feed quantized inputs and
+obtain quantized outputs.
+
+
 ### Code coverage
 
 To get code coverage:

@@ -0,0 +1,70 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+
+from executorch.backends.arm.test import common
+
+from executorch.backends.arm.test.tester.arm_tester import ArmTester
+from executorch.exir.passes.quantize_io_pass import QuantizeInputs, QuantizeOutputs
+
+
+class SimpleModel(torch.nn.Module):
+    def forward(self, x, y):
+        return x + y
+
+    def get_inputs(self):
+        a = torch.rand(1, 2, 2, 1)
+        b = torch.rand(1, 2, 2, 1)
+        return (a, b)
+
+
+class TestIOQuantizationPass(unittest.TestCase):
+    """
+    Test the executorch/exir/passes/quanize_io_pass pass works(meaning we don't get Q/DQ nodes) on a simple model
+    """
+
+    def test_ioquantisation_pass(self):
+        model = SimpleModel()
+        tester = (
+            ArmTester(
+                model,
+                example_inputs=model.get_inputs(),
+                compile_spec=common.get_u55_compile_spec(),
+            )
+            .quantize()
+            .export()
+            .to_edge()
+            .check_count(
+                {
+                    "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default": 3
+                }
+            )
+            .check_count(
+                {
+                    "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default": 3
+                }
+            )
+            .partition()
+            .check_count(
+                {
+                    "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default": 2
+                }
+            )
+            .check_count(
+                {
+                    "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default": 1
+                }
+            )
+        )
+        edge = tester.get_artifact()
+        edge.transform(
+            passes=[QuantizeInputs(edge, [0, 1]), QuantizeOutputs(edge, [0])]
+        )
+        tester.check_not(["edge__ops_quantized_decomposed_quantize_per_tensor"])
+        tester.check_not(["edge__ops_quantized_decomposed_dequantize_per_tensor"])