From 98cda94263645a9103df5e7cb09068e5abb24b25 Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Tue, 5 Aug 2025 12:11:40 -0700 Subject: [PATCH 1/8] Test xnnpack delegated model on pybindings in test_model.sh by default --- .ci/scripts/test_model.sh | 6 +++--- examples/xnnpack/aot_compiler.py | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh index 74eb75c6ddd..52e75223d3b 100755 --- a/.ci/scripts/test_model.sh +++ b/.ci/scripts/test_model.sh @@ -131,13 +131,13 @@ test_model_with_xnnpack() { return 0 fi - # Delegation + # Delegation and test with pybindings if [[ ${WITH_QUANTIZATION} == true ]]; then SUFFIX="q8" - "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --quantize + "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --quantize --test_with_pybindings else SUFFIX="fp32" - "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate + "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --test_with_pybindings fi OUTPUT_MODEL_PATH="${MODEL_NAME}_xnnpack_${SUFFIX}.pte" diff --git a/examples/xnnpack/aot_compiler.py b/examples/xnnpack/aot_compiler.py index 81eeb75c72c..9fed6fe1154 100644 --- a/examples/xnnpack/aot_compiler.py +++ b/examples/xnnpack/aot_compiler.py @@ -61,6 +61,14 @@ default="", help="Generate and save an ETRecord to the given file location", ) + parser.add_argument( + "-t", + "--test_with_pybindings", + action="store_true", + required=False, + default=False, + help="Test the pte with pybindings", + ) parser.add_argument("-o", "--output_dir", default=".", help="output directory") args = parser.parse_args() @@ -117,3 +125,10 @@ quant_tag = "q8" if args.quantize else "fp32" model_name = f"{args.model_name}_xnnpack_{quant_tag}" save_pte_program(exec_prog, model_name, args.output_dir) + + if args.test_pybind: + logging.info("Testing the pte with pybind") + from executorch.extension.pybindings.portable_lib import _load_for_executorch_from_buffer + + m = _load_for_executorch_from_buffer(exec_prog.buffer) + m.run_method("forward", example_inputs) From 9a4773794abe47933602261fd97d082a745672c9 Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Tue, 5 Aug 2025 12:19:23 -0700 Subject: [PATCH 2/8] Fix --- examples/xnnpack/aot_compiler.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/examples/xnnpack/aot_compiler.py b/examples/xnnpack/aot_compiler.py index 9fed6fe1154..f1b9c87322c 100644 --- a/examples/xnnpack/aot_compiler.py +++ b/examples/xnnpack/aot_compiler.py @@ -129,6 +129,14 @@ if args.test_pybind: logging.info("Testing the pte with pybind") from executorch.extension.pybindings.portable_lib import _load_for_executorch_from_buffer + # Import custom ops. This requires portable_lib to be loaded first. + from executorch.extension.llm.custom_ops import ( # noqa: F401, F403 + custom_ops, + ) # usort: skip + # Import quantized ops. This requires portable_lib to be loaded first. + from executorch.kernels import quantized # usort: skip # noqa: F401, F403 m = _load_for_executorch_from_buffer(exec_prog.buffer) - m.run_method("forward", example_inputs) + logging.info("Successfully loaded the model") + res = m.run_method("forward", example_inputs) + logging.info("Successfully ran the model") From 9c385a59a9f24d68e7f997a4ec35069f0d6ab079 Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Tue, 5 Aug 2025 12:20:15 -0700 Subject: [PATCH 3/8] Lint --- examples/xnnpack/aot_compiler.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/examples/xnnpack/aot_compiler.py b/examples/xnnpack/aot_compiler.py index f1b9c87322c..306645acdc8 100644 --- a/examples/xnnpack/aot_compiler.py +++ b/examples/xnnpack/aot_compiler.py @@ -128,7 +128,10 @@ if args.test_pybind: logging.info("Testing the pte with pybind") - from executorch.extension.pybindings.portable_lib import _load_for_executorch_from_buffer + from executorch.extension.pybindings.portable_lib import ( + _load_for_executorch_from_buffer, + ) + # Import custom ops. This requires portable_lib to be loaded first. from executorch.extension.llm.custom_ops import ( # noqa: F401, F403 custom_ops, @@ -136,6 +139,7 @@ # Import quantized ops. This requires portable_lib to be loaded first. from executorch.kernels import quantized # usort: skip # noqa: F401, F403 + m = _load_for_executorch_from_buffer(exec_prog.buffer) logging.info("Successfully loaded the model") res = m.run_method("forward", example_inputs) From ff3034e56b56475f90d14640ea0b367d5b96628c Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Tue, 5 Aug 2025 13:27:07 -0700 Subject: [PATCH 4/8] Fix argument --- examples/xnnpack/aot_compiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/xnnpack/aot_compiler.py b/examples/xnnpack/aot_compiler.py index 306645acdc8..021417b2df7 100644 --- a/examples/xnnpack/aot_compiler.py +++ b/examples/xnnpack/aot_compiler.py @@ -126,7 +126,7 @@ model_name = f"{args.model_name}_xnnpack_{quant_tag}" save_pte_program(exec_prog, model_name, args.output_dir) - if args.test_pybind: + if args.test_with_pybindings: logging.info("Testing the pte with pybind") from executorch.extension.pybindings.portable_lib import ( _load_for_executorch_from_buffer, From 874ce1e88f068042b0ab1cde79608028f3e534bd Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Tue, 5 Aug 2025 15:13:50 -0700 Subject: [PATCH 5/8] Fix example_inputs --- examples/xnnpack/aot_compiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/xnnpack/aot_compiler.py b/examples/xnnpack/aot_compiler.py index 021417b2df7..11f976c99f8 100644 --- a/examples/xnnpack/aot_compiler.py +++ b/examples/xnnpack/aot_compiler.py @@ -142,5 +142,5 @@ m = _load_for_executorch_from_buffer(exec_prog.buffer) logging.info("Successfully loaded the model") - res = m.run_method("forward", example_inputs) + res = m.run_method("forward", *example_inputs) logging.info("Successfully ran the model") From b3fe1feb4537083d9d39ff32dc2398a2875a3e4f Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Tue, 5 Aug 2025 16:21:19 -0700 Subject: [PATCH 6/8] Flatten example inputs --- examples/xnnpack/aot_compiler.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/xnnpack/aot_compiler.py b/examples/xnnpack/aot_compiler.py index 11f976c99f8..5c916354ed8 100644 --- a/examples/xnnpack/aot_compiler.py +++ b/examples/xnnpack/aot_compiler.py @@ -139,8 +139,10 @@ # Import quantized ops. This requires portable_lib to be loaded first. from executorch.kernels import quantized # usort: skip # noqa: F401, F403 + from torch.utils._pytree import tree_flatten m = _load_for_executorch_from_buffer(exec_prog.buffer) logging.info("Successfully loaded the model") - res = m.run_method("forward", *example_inputs) + flattened = tree_flatten(example_inputs)[0] + res = m.run_method("forward", flattened) logging.info("Successfully ran the model") From 94461ecb42bf86e6f85b98599404a6e14e0130d5 Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Thu, 18 Sep 2025 19:51:28 -0700 Subject: [PATCH 7/8] Address comments --- examples/xnnpack/aot_compiler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/xnnpack/aot_compiler.py b/examples/xnnpack/aot_compiler.py index 5c916354ed8..9a78138adf3 100644 --- a/examples/xnnpack/aot_compiler.py +++ b/examples/xnnpack/aot_compiler.py @@ -63,7 +63,7 @@ ) parser.add_argument( "-t", - "--test_with_pybindings", + "--test_after_export", action="store_true", required=False, default=False, @@ -126,7 +126,7 @@ model_name = f"{args.model_name}_xnnpack_{quant_tag}" save_pte_program(exec_prog, model_name, args.output_dir) - if args.test_with_pybindings: + if args.test_after_export: logging.info("Testing the pte with pybind") from executorch.extension.pybindings.portable_lib import ( _load_for_executorch_from_buffer, From eaf6b5e5a8fe7fe31d6f54e174b73d76da2c70f5 Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Thu, 18 Sep 2025 20:24:37 -0700 Subject: [PATCH 8/8] Fix test_model.sh --- .ci/scripts/test_model.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh index 52e75223d3b..de28597b1d5 100755 --- a/.ci/scripts/test_model.sh +++ b/.ci/scripts/test_model.sh @@ -134,10 +134,10 @@ test_model_with_xnnpack() { # Delegation and test with pybindings if [[ ${WITH_QUANTIZATION} == true ]]; then SUFFIX="q8" - "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --quantize --test_with_pybindings + "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --quantize --test_after_export else SUFFIX="fp32" - "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --test_with_pybindings + "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --test_after_export fi OUTPUT_MODEL_PATH="${MODEL_NAME}_xnnpack_${SUFFIX}.pte"