From 98cda94263645a9103df5e7cb09068e5abb24b25 Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Tue, 5 Aug 2025 12:11:40 -0700
Subject: [PATCH 1/8] Test xnnpack delegated model on pybindings in
 test_model.sh by default

---
 .ci/scripts/test_model.sh        |  6 +++---
 examples/xnnpack/aot_compiler.py | 15 +++++++++++++++
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh
index 74eb75c6ddd..52e75223d3b 100755
--- a/.ci/scripts/test_model.sh
+++ b/.ci/scripts/test_model.sh
@@ -131,13 +131,13 @@ test_model_with_xnnpack() {
     return 0
   fi
 
-  # Delegation
+  # Delegation and test with pybindings
   if [[ ${WITH_QUANTIZATION} == true ]]; then
     SUFFIX="q8"
-    "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --quantize
+    "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --quantize --test_with_pybindings
   else
     SUFFIX="fp32"
-    "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate
+    "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --test_with_pybindings
   fi
 
   OUTPUT_MODEL_PATH="${MODEL_NAME}_xnnpack_${SUFFIX}.pte"
diff --git a/examples/xnnpack/aot_compiler.py b/examples/xnnpack/aot_compiler.py
index 81eeb75c72c..9fed6fe1154 100644
--- a/examples/xnnpack/aot_compiler.py
+++ b/examples/xnnpack/aot_compiler.py
@@ -61,6 +61,14 @@
         default="",
         help="Generate and save an ETRecord to the given file location",
     )
+    parser.add_argument(
+        "-t",
+        "--test_with_pybindings",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Test the pte with pybindings",
+    )
     parser.add_argument("-o", "--output_dir", default=".", help="output directory")
 
     args = parser.parse_args()
@@ -117,3 +125,10 @@
     quant_tag = "q8" if args.quantize else "fp32"
     model_name = f"{args.model_name}_xnnpack_{quant_tag}"
     save_pte_program(exec_prog, model_name, args.output_dir)
+
+    if args.test_pybind:
+        logging.info("Testing the pte with pybind")
+        from executorch.extension.pybindings.portable_lib import _load_for_executorch_from_buffer
+
+        m = _load_for_executorch_from_buffer(exec_prog.buffer)
+        m.run_method("forward", example_inputs)

From 9a4773794abe47933602261fd97d082a745672c9 Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Tue, 5 Aug 2025 12:19:23 -0700
Subject: [PATCH 2/8] Fix

---
 examples/xnnpack/aot_compiler.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/examples/xnnpack/aot_compiler.py b/examples/xnnpack/aot_compiler.py
index 9fed6fe1154..f1b9c87322c 100644
--- a/examples/xnnpack/aot_compiler.py
+++ b/examples/xnnpack/aot_compiler.py
@@ -129,6 +129,14 @@
     if args.test_pybind:
         logging.info("Testing the pte with pybind")
         from executorch.extension.pybindings.portable_lib import _load_for_executorch_from_buffer
+        # Import custom ops. This requires portable_lib to be loaded first.
+        from executorch.extension.llm.custom_ops import (  # noqa: F401, F403
+            custom_ops,
+        )  # usort: skip
 
+        # Import quantized ops. This requires portable_lib to be loaded first.
+        from executorch.kernels import quantized  # usort: skip # noqa: F401, F403
         m = _load_for_executorch_from_buffer(exec_prog.buffer)
-        m.run_method("forward", example_inputs)
+        logging.info("Successfully loaded the model")
+        res = m.run_method("forward", example_inputs)
+        logging.info("Successfully ran the model")

From 9c385a59a9f24d68e7f997a4ec35069f0d6ab079 Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Tue, 5 Aug 2025 12:20:15 -0700
Subject: [PATCH 3/8] Lint

---
 examples/xnnpack/aot_compiler.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/examples/xnnpack/aot_compiler.py b/examples/xnnpack/aot_compiler.py
index f1b9c87322c..306645acdc8 100644
--- a/examples/xnnpack/aot_compiler.py
+++ b/examples/xnnpack/aot_compiler.py
@@ -128,7 +128,10 @@
 
     if args.test_pybind:
         logging.info("Testing the pte with pybind")
-        from executorch.extension.pybindings.portable_lib import _load_for_executorch_from_buffer
+        from executorch.extension.pybindings.portable_lib import (
+            _load_for_executorch_from_buffer,
+        )
+
         # Import custom ops. This requires portable_lib to be loaded first.
         from executorch.extension.llm.custom_ops import (  # noqa: F401, F403
             custom_ops,
@@ -136,6 +139,7 @@
 
         # Import quantized ops. This requires portable_lib to be loaded first.
         from executorch.kernels import quantized  # usort: skip # noqa: F401, F403
+
         m = _load_for_executorch_from_buffer(exec_prog.buffer)
         logging.info("Successfully loaded the model")
         res = m.run_method("forward", example_inputs)

From ff3034e56b56475f90d14640ea0b367d5b96628c Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Tue, 5 Aug 2025 13:27:07 -0700
Subject: [PATCH 4/8] Fix argument

---
 examples/xnnpack/aot_compiler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/xnnpack/aot_compiler.py b/examples/xnnpack/aot_compiler.py
index 306645acdc8..021417b2df7 100644
--- a/examples/xnnpack/aot_compiler.py
+++ b/examples/xnnpack/aot_compiler.py
@@ -126,7 +126,7 @@
     model_name = f"{args.model_name}_xnnpack_{quant_tag}"
     save_pte_program(exec_prog, model_name, args.output_dir)
 
-    if args.test_pybind:
+    if args.test_with_pybindings:
         logging.info("Testing the pte with pybind")
         from executorch.extension.pybindings.portable_lib import (
             _load_for_executorch_from_buffer,

From 874ce1e88f068042b0ab1cde79608028f3e534bd Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Tue, 5 Aug 2025 15:13:50 -0700
Subject: [PATCH 5/8] Fix example_inputs

---
 examples/xnnpack/aot_compiler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/xnnpack/aot_compiler.py b/examples/xnnpack/aot_compiler.py
index 021417b2df7..11f976c99f8 100644
--- a/examples/xnnpack/aot_compiler.py
+++ b/examples/xnnpack/aot_compiler.py
@@ -142,5 +142,5 @@
 
         m = _load_for_executorch_from_buffer(exec_prog.buffer)
         logging.info("Successfully loaded the model")
-        res = m.run_method("forward", example_inputs)
+        res = m.run_method("forward", *example_inputs)
         logging.info("Successfully ran the model")

From b3fe1feb4537083d9d39ff32dc2398a2875a3e4f Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Tue, 5 Aug 2025 16:21:19 -0700
Subject: [PATCH 6/8] Flatten example inputs

---
 examples/xnnpack/aot_compiler.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/examples/xnnpack/aot_compiler.py b/examples/xnnpack/aot_compiler.py
index 11f976c99f8..5c916354ed8 100644
--- a/examples/xnnpack/aot_compiler.py
+++ b/examples/xnnpack/aot_compiler.py
@@ -139,8 +139,10 @@
 
         # Import quantized ops. This requires portable_lib to be loaded first.
         from executorch.kernels import quantized  # usort: skip # noqa: F401, F403
+        from torch.utils._pytree import tree_flatten
 
         m = _load_for_executorch_from_buffer(exec_prog.buffer)
         logging.info("Successfully loaded the model")
-        res = m.run_method("forward", *example_inputs)
+        flattened = tree_flatten(example_inputs)[0]
+        res = m.run_method("forward", flattened)
         logging.info("Successfully ran the model")

From 94461ecb42bf86e6f85b98599404a6e14e0130d5 Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Thu, 18 Sep 2025 19:51:28 -0700
Subject: [PATCH 7/8] Address comments

---
 examples/xnnpack/aot_compiler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/xnnpack/aot_compiler.py b/examples/xnnpack/aot_compiler.py
index 5c916354ed8..9a78138adf3 100644
--- a/examples/xnnpack/aot_compiler.py
+++ b/examples/xnnpack/aot_compiler.py
@@ -63,7 +63,7 @@
     )
     parser.add_argument(
         "-t",
-        "--test_with_pybindings",
+        "--test_after_export",
         action="store_true",
         required=False,
         default=False,
@@ -126,7 +126,7 @@
     model_name = f"{args.model_name}_xnnpack_{quant_tag}"
     save_pte_program(exec_prog, model_name, args.output_dir)
 
-    if args.test_with_pybindings:
+    if args.test_after_export:
         logging.info("Testing the pte with pybind")
         from executorch.extension.pybindings.portable_lib import (
             _load_for_executorch_from_buffer,

From eaf6b5e5a8fe7fe31d6f54e174b73d76da2c70f5 Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Thu, 18 Sep 2025 20:24:37 -0700
Subject: [PATCH 8/8] Fix test_model.sh

---
 .ci/scripts/test_model.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh
index 52e75223d3b..de28597b1d5 100755
--- a/.ci/scripts/test_model.sh
+++ b/.ci/scripts/test_model.sh
@@ -134,10 +134,10 @@ test_model_with_xnnpack() {
   # Delegation and test with pybindings
   if [[ ${WITH_QUANTIZATION} == true ]]; then
     SUFFIX="q8"
-    "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --quantize --test_with_pybindings
+    "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --quantize --test_after_export
   else
     SUFFIX="fp32"
-    "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --test_with_pybindings
+    "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --test_after_export
   fi
 
   OUTPUT_MODEL_PATH="${MODEL_NAME}_xnnpack_${SUFFIX}.pte"