From 60ddcd563ab11e9640b74ab822b70122d8208f31 Mon Sep 17 00:00:00 2001
From: Erik Lundell <erik.lundell@arm.com>
Date: Wed, 24 Sep 2025 11:46:39 +0200
Subject: [PATCH 1/2] Arm backend: Remove example models

It doesn't make sense to have example models in
the aot_arm_compiler script anymore, they are too
simple, and there are other ways to do examples.

Add and softmax can be replaced with example/models tests,
add2 and add3 are deprecated.
q-models can't be removed yet since the new testing is not
in place but they should be as soon as it is.

Signed-off-by: Erik Lundell <erik.lundell@arm.com>
Change-Id: I43ad1acaf46554245f6ed6928a79de653bc9b319
---
 examples/arm/aot_arm_compiler.py | 77 --------------------------------
 examples/arm/run.sh              |  2 -
 2 files changed, 79 deletions(-)

diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py
index 8b6e1d4b85e..37d42dbe30d 100644
--- a/examples/arm/aot_arm_compiler.py
+++ b/examples/arm/aot_arm_compiler.py
@@ -188,46 +188,6 @@ def quantize(
     return m
 
 
-# Simple example models
-class AddModule(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-
-    def forward(self, x):
-        return x + x
-
-    example_input = (torch.ones(5, dtype=torch.int32),)
-    can_delegate = True
-
-
-class AddModule2(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-
-    def forward(self, x, y):
-        return x + y
-
-    example_input = (
-        torch.ones(5, dtype=torch.int32),
-        torch.ones(5, dtype=torch.int32),
-    )
-    can_delegate = True
-
-
-class AddModule3(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-
-    def forward(self, x, y):
-        return (x + y, x + x)
-
-    example_input = (
-        torch.ones(5, dtype=torch.int32),
-        torch.ones(5, dtype=torch.int32),
-    )
-    can_delegate = True
-
-
 class QuantAddTest(torch.nn.Module):
     def __init__(self):
         super().__init__()
@@ -276,27 +236,6 @@ def forward(self, w, x, y, z):
     can_delegate = True  # when quantized
 
 
-class SoftmaxModule(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.softmax = torch.nn.Softmax(dim=0)
-
-    def forward(self, x):
-        z = self.softmax(x)
-        return z
-
-    example_input = (torch.ones(2, 2),)
-    can_delegate = True
-
-
-class MultipleOutputsModule(torch.nn.Module):
-    def forward(self, x: torch.Tensor, y: torch.Tensor):
-        return (x * y, x.sum(dim=-1, keepdim=True))
-
-    example_input = (torch.randn(10, 4, 5), torch.randn(10, 4, 5))
-    can_delegate = True
-
-
 class QuantLinearTest(torch.nn.Module):
     def __init__(self):
         super().__init__()
@@ -311,29 +250,15 @@ def forward(self, x):
 
 
 models = {
-    "add": AddModule,
-    "add2": AddModule2,
-    "add3": AddModule3,
     "qadd": QuantAddTest,
     "qadd2": QuantAddTest2,
     "qops": QuantOpTest,
-    "softmax": SoftmaxModule,
-    "MultipleOutputsModule": MultipleOutputsModule,
     # TODO: Remove this from here, once we have dedicated MCU test pipeline ready. This is an interim solution.
     # See https://github.com/pytorch/executorch/discussions/13944
     "qlinear": QuantLinearTest,
 }
 
 calibration_data = {
-    "add": (torch.randn(1, 5),),
-    "add2": (
-        torch.randn(1, 5),
-        torch.randn(1, 5),
-    ),
-    "add3": (
-        torch.randn(32, 5),
-        torch.randn(32, 5),
-    ),
     "qadd": (torch.randn(32, 2, 1),),
     "qadd2": (
         torch.randn(32, 2, 1),
@@ -345,8 +270,6 @@ def forward(self, x):
         torch.randn(32, 2, 1) * -0.000001,
         torch.randn(32, 2, 1) * 1000,
     ),
-    "softmax": (torch.randn(32, 2, 2),),
-    "qlinear": (torch.randn(37, 61),),
 }
 
 evaluators = {
diff --git a/examples/arm/run.sh b/examples/arm/run.sh
index 8f5dec85ad4..aeb3c542bd5 100755
--- a/examples/arm/run.sh
+++ b/examples/arm/run.sh
@@ -225,7 +225,6 @@ if [[ -z "$model_name" ]]; then
     test_model=(
         "softmax"   # 0
         "add"       # 1
-        "add3"      # 2
         "qadd"      # 3
         "qadd2"     # 4
         "qops"      # 5
@@ -234,7 +233,6 @@ if [[ -z "$model_name" ]]; then
     model_compiler_flags=(
         ""                      # 0 softmax
         "--delegate"            # 1 add
-        "--delegate"            # 2 add3
         "--delegate --quantize" # 3 qadd
         "--delegate --quantize" # 4 qadd2
         "--delegate --quantize" # 5 qops

From d81228d41cef403fb4fd77ce382ce3a414d31b76 Mon Sep 17 00:00:00 2001
From: Erik Lundell <erik.lundell@arm.com>
Date: Wed, 24 Sep 2025 11:57:01 +0200
Subject: [PATCH 2/2] Arm backend: move evaluation logic out of
 aot_arm_compiler

A start to simplify the aot_arm_compiler script.

Signed-off-by: Erik Lundell <erik.lundell@arm.com>
Change-Id: If6b956afbd2409ee244586aa60a77177915676cf
---
 backends/arm/util/arm_model_evaluator.py | 77 +++++++++++++++++++++++-
 examples/arm/aot_arm_compiler.py         | 72 +---------------------
 2 files changed, 79 insertions(+), 70 deletions(-)

diff --git a/backends/arm/util/arm_model_evaluator.py b/backends/arm/util/arm_model_evaluator.py
index a3dcbdc5c6f..cbfa337ab09 100644
--- a/backends/arm/util/arm_model_evaluator.py
+++ b/backends/arm/util/arm_model_evaluator.py
@@ -6,6 +6,7 @@
 
 # pyre-unsafe
 
+import json
 import logging
 import os
 import random
@@ -14,7 +15,7 @@
 
 from collections import defaultdict
 from pathlib import Path
-from typing import Any, Optional, Tuple
+from typing import Any, cast, Optional, Tuple
 
 import torch
 from torch.nn.modules import Module
@@ -197,3 +198,77 @@ def evaluate(self) -> dict[str, Any]:
 
         output["metrics"]["accuracy"] = {"top-1": top1_correct, "top-5": top5_correct}
         return output
+
+
+evaluators: dict[str, type[GenericModelEvaluator]] = {
+    "generic": GenericModelEvaluator,
+    "mv2": MobileNetV2Evaluator,
+}
+
+
+def evaluator_calibration_data(
+    evaluator_name: str,
+    evaluator_config: str | None,
+):
+    evaluator = evaluators[evaluator_name]
+
+    if hasattr(evaluator, "get_calibrator"):
+        assert evaluator_config is not None
+
+        config_path = Path(evaluator_config)
+        with config_path.open() as f:
+            config = json.load(f)
+
+        if evaluator is MobileNetV2Evaluator:
+            return evaluator.get_calibrator(
+                training_dataset_path=config["training_dataset_path"]
+            )
+        else:
+            raise RuntimeError(f"Unknown evaluator: {evaluator_name}")
+
+
+def evaluate_model(
+    model_name: str,
+    intermediates: str,
+    model_fp32: torch.nn.Module,
+    model_int8: torch.nn.Module,
+    example_inputs: Tuple[torch.Tensor],
+    evaluator_name: str,
+    evaluator_config: str | None,
+) -> None:
+    evaluator = evaluators[evaluator_name]
+
+    # Get the path of the TOSA flatbuffer that is dumped
+    intermediates_path = Path(intermediates)
+    tosa_paths = list(intermediates_path.glob("*.tosa"))
+
+    if evaluator.REQUIRES_CONFIG:
+        assert evaluator_config is not None
+
+        config_path = Path(evaluator_config)
+        with config_path.open() as f:
+            config = json.load(f)
+
+        if evaluator == MobileNetV2Evaluator:
+            mv2_evaluator = cast(type[MobileNetV2Evaluator], evaluator)
+            init_evaluator: GenericModelEvaluator = mv2_evaluator(
+                model_name,
+                model_fp32,
+                model_int8,
+                example_inputs,
+                str(tosa_paths[0]),
+                batch_size=config["batch_size"],
+                validation_dataset_path=config["validation_dataset_path"],
+            )
+        else:
+            raise RuntimeError(f"Unknown evaluator {evaluator_name}")
+    else:
+        init_evaluator = evaluator(
+            model_name, model_fp32, model_int8, example_inputs, str(tosa_paths[0])
+        )
+
+    quant_metrics = init_evaluator.evaluate()
+    output_json_path = intermediates_path / "quant_metrics.json"
+
+    with output_json_path.open("w") as json_file:
+        json.dump(quant_metrics, json_file)
diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py
index 37d42dbe30d..c1a99d092bc 100644
--- a/examples/arm/aot_arm_compiler.py
+++ b/examples/arm/aot_arm_compiler.py
@@ -9,7 +9,6 @@
 
 import argparse
 import copy
-import json
 import logging
 import os
 
@@ -31,8 +30,8 @@
 from executorch.backends.arm.tosa.partitioner import TOSAPartitioner
 
 from executorch.backends.arm.util.arm_model_evaluator import (
-    GenericModelEvaluator,
-    MobileNetV2Evaluator,
+    evaluate_model,
+    evaluator_calibration_data,
 )
 
 from executorch.backends.arm.vgf import VgfCompileSpec, VgfPartitioner
@@ -272,11 +271,6 @@ def forward(self, x):
     ),
 }
 
-evaluators = {
-    "generic": GenericModelEvaluator,
-    "mv2": MobileNetV2Evaluator,
-}
-
 targets = [
     "ethos-u55-32",
     "ethos-u55-64",
@@ -301,21 +295,7 @@ def get_calibration_data(
 ):
     # Firstly, if the model is being evaluated, take the evaluators calibration function if it has one
     if evaluator_name is not None:
-        evaluator = evaluators[evaluator_name]
-
-        if hasattr(evaluator, "get_calibrator"):
-            assert evaluator_config is not None
-
-            config_path = Path(evaluator_config)
-            with config_path.open() as f:
-                config = json.load(f)
-
-            if evaluator_name == "mv2":
-                return evaluator.get_calibrator(
-                    training_dataset_path=config["training_dataset_path"]
-                )
-            else:
-                raise RuntimeError(f"Unknown evaluator: {evaluator_name}")
+        return evaluator_calibration_data(evaluator_name, evaluator_config)
 
     # If the model is in the calibration_data dictionary, get the data from there
     # This is used for the simple model examples provided
@@ -369,52 +349,6 @@ def get_compile_spec(
     return compile_spec
 
 
-def evaluate_model(
-    model_name: str,
-    intermediates: str,
-    model_fp32: torch.nn.Module,
-    model_int8: torch.nn.Module,
-    example_inputs: Tuple[torch.Tensor],
-    evaluator_name: str,
-    evaluator_config: str | None,
-) -> None:
-    evaluator = evaluators[evaluator_name]
-
-    # Get the path of the TOSA flatbuffer that is dumped
-    intermediates_path = Path(intermediates)
-    tosa_paths = list(intermediates_path.glob("*.tosa"))
-
-    if evaluator.REQUIRES_CONFIG:
-        assert evaluator_config is not None
-
-        config_path = Path(evaluator_config)
-        with config_path.open() as f:
-            config = json.load(f)
-
-        if evaluator_name == "mv2":
-            init_evaluator = evaluator(
-                model_name,
-                model_fp32,
-                model_int8,
-                example_inputs,
-                str(tosa_paths[0]),
-                config["batch_size"],
-                config["validation_dataset_path"],
-            )
-        else:
-            raise RuntimeError(f"Unknown evaluator {evaluator_name}")
-    else:
-        init_evaluator = evaluator(
-            model_name, model_fp32, model_int8, example_inputs, str(tosa_paths[0])
-        )
-
-    quant_metrics = init_evaluator.evaluate()
-    output_json_path = intermediates_path / "quant_metrics.json"
-
-    with output_json_path.open("w") as json_file:
-        json.dump(quant_metrics, json_file)
-
-
 def dump_delegation_info(edge, intermediate_files_folder: Optional[str] = None):
     graph_module = edge.exported_program().graph_module
     delegation_info = get_delegation_info(graph_module)