From 9c55fef57aa8308d5db6028c88ccfdec3113b415 Mon Sep 17 00:00:00 2001 From: Mergen Nachin Date: Fri, 12 Sep 2025 11:47:43 -0700 Subject: [PATCH] Arm backend: Add --enable_debug_mode to AOT compiler (Try 2) Summary: Relanding this PR https://github.com/pytorch/executorch/pull/14145 it was revert in https://github.com/pytorch/executorch/pull/14174 Reviewed By: kimishpatel Differential Revision: D82174198 --- backends/arm/operators/op_abs.py | 4 +++- backends/arm/operators/op_sum.py | 8 ++++++-- examples/arm/aot_arm_compiler.py | 24 +++++++++++++++++++++++- 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/backends/arm/operators/op_abs.py b/backends/arm/operators/op_abs.py index 625293d66e0..ec76eb5517f 100644 --- a/backends/arm/operators/op_abs.py +++ b/backends/arm/operators/op_abs.py @@ -73,7 +73,9 @@ def define_node( abs_output = output # Do the INT32 Abs - tosa_graph.addOperator( + self._serialize_operator( + node, + tosa_graph, ts.TosaOp.Op().ABS, [ rescaled_inputs[0].name, diff --git a/backends/arm/operators/op_sum.py b/backends/arm/operators/op_sum.py index 0bd152a8b8c..00676d9f9b3 100644 --- a/backends/arm/operators/op_sum.py +++ b/backends/arm/operators/op_sum.py @@ -67,7 +67,9 @@ def define_node( dtype=ts.DType.INT32, ) - tosa_graph.addOperator( + self._serialize_operator( + node, + tosa_graph, ts.TosaOp.Op().REDUCE_SUM, [rescaled_inputs[0].name], [intermediate.name], @@ -111,7 +113,9 @@ def define_node( attr = ts.TosaSerializerAttribute() attr.ReduceSumAttribute(tensor.dim_order.index(dim)) - tosa_graph.addOperator( + self._serialize_operator( + node, + tosa_graph, ts.TosaOp.Op().REDUCE_SUM, [tensor.name], [output.name], diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py index 8132751f6f0..106ab35363c 100644 --- a/examples/arm/aot_arm_compiler.py +++ b/examples/arm/aot_arm_compiler.py @@ -18,6 +18,7 @@ import torch from examples.devtools.scripts.export_bundled_program import save_bundled_program +from executorch.backends.arm.common.arm_compile_spec import ArmCompileSpec from executorch.backends.arm.ethosu import EthosUCompileSpec, EthosUPartitioner from executorch.backends.arm.quantizer import ( EthosUQuantizer, @@ -386,6 +387,7 @@ def get_compile_spec( memory_mode: Optional[str] = None, quantize: bool = False, config: Optional[str] = None, + debug_mode: Optional[str] = None, ) -> TosaCompileSpec | EthosUCompileSpec | VgfCompileSpec: compile_spec = None if target.startswith("TOSA"): @@ -414,6 +416,10 @@ def get_compile_spec( if intermediates is not None: compile_spec.dump_intermediate_artifacts_to(intermediates) + if debug_mode is not None: + mode = ArmCompileSpec.DebugMode[debug_mode.upper()] + compile_spec.dump_debug_info(mode) + return compile_spec @@ -601,6 +607,12 @@ def get_args(): action="store_true", help="Enable the QuantizedOpFusionPass fusion step", ) + parser.add_argument( + "--enable_debug_mode", + required=False, + choices=["json", "tosa"], + help="Flag to enable ATen-to-TOSA debug mode.", + ) args = parser.parse_args() if args.evaluate and ( @@ -735,6 +747,7 @@ def to_edge_TOSA_delegate( args.memory_mode, args.quantize, args.config, + args.enable_debug_mode, ) model_int8 = None @@ -776,6 +789,7 @@ def to_edge_no_delegate(exported_program, args, model: torch.nn.Module, example_ args.memory_mode, args.quantize, args.config, + args.enable_debug_mode, ) model, exported_program = quantize_model( args, model, example_inputs, compile_spec @@ -824,12 +838,21 @@ def transform_for_cortex_m_backend(edge, args): exported_program = torch.export.export( model, example_inputs, strict=args.strict_export ) + model = exported_program.module() model_fp32 = model + model_name = os.path.basename(os.path.splitext(args.model_name)[0]) if args.intermediates: os.makedirs(args.intermediates, exist_ok=True) + # We only support Python3.10 and above, so use a later pickle protocol + torch.export.save( + exported_program, + f"{args.intermediates}/{model_name}_exported_program.pt2", + pickle_protocol=5, + ) + # Quantize if required model_int8 = None if args.delegate: @@ -862,7 +885,6 @@ def transform_for_cortex_m_backend(edge, args): else: raise e - model_name = os.path.basename(os.path.splitext(args.model_name)[0]) output_name = f"{model_name}" + ( f"_arm_delegate_{args.target}" if args.delegate is True