From e49880005e7826c134af05dfa0e4027881052cfb Mon Sep 17 00:00:00 2001 From: "Yanan Cao (PyTorch)" Date: Mon, 31 Mar 2025 22:25:13 -0700 Subject: [PATCH] executorch (#9661) Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/9661 Reviewed By: digantdesai Differential Revision: D71520535 --- .../coreml/test/test_coreml_quantizer.py | 4 ++- backends/apple/mps/test/test_mps_utils.py | 2 +- backends/cadence/aot/compiler.py | 2 +- .../aot/tests/test_remove_ops_passes.py | 2 +- backends/example/test_example_delegate.py | 8 +++-- backends/mediatek/quantizer/annotator.py | 6 ++-- backends/qualcomm/tests/utils.py | 2 +- .../test_duplicate_dynamic_quant_chain.py | 5 +-- .../test/quantizer/test_pt2e_quantization.py | 35 +++++++++---------- .../test/quantizer/test_representation.py | 5 +-- .../test/quantizer/test_xnnpack_quantizer.py | 18 +++------- backends/xnnpack/test/test_xnnpack_utils.py | 5 +-- backends/xnnpack/test/tester/tester.py | 2 +- .../export-to-executorch-tutorial.py | 6 ++-- examples/apple/mps/scripts/mps_example.py | 4 ++- examples/arm/aot_arm_compiler.py | 9 +++-- 16 files changed, 55 insertions(+), 60 deletions(-) diff --git a/backends/apple/coreml/test/test_coreml_quantizer.py b/backends/apple/coreml/test/test_coreml_quantizer.py index 8dfb46cbbdc..db75631dbc8 100644 --- a/backends/apple/coreml/test/test_coreml_quantizer.py +++ b/backends/apple/coreml/test/test_coreml_quantizer.py @@ -32,7 +32,9 @@ def quantize_and_compare( ) -> None: assert quantization_type in {"PTQ", "QAT"} - pre_autograd_aten_dialect = export_for_training(model, example_inputs).module() + pre_autograd_aten_dialect = export_for_training( + model, example_inputs, strict=True + ).module() quantization_config = LinearQuantizerConfig.from_dict( { diff --git a/backends/apple/mps/test/test_mps_utils.py b/backends/apple/mps/test/test_mps_utils.py index a39583c66c6..674a4b0ba62 100644 --- a/backends/apple/mps/test/test_mps_utils.py +++ b/backends/apple/mps/test/test_mps_utils.py @@ -207,7 +207,7 @@ def lower_module_and_test_output( expected_output = model(*sample_inputs) model = torch.export.export_for_training( - model, sample_inputs, dynamic_shapes=dynamic_shapes + model, sample_inputs, dynamic_shapes=dynamic_shapes, strict=True ).module() edge_program = export_to_edge( diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py index 93b3f85c529..3c7d94f8fe1 100644 --- a/backends/cadence/aot/compiler.py +++ b/backends/cadence/aot/compiler.py @@ -82,7 +82,7 @@ def convert_pt2( remove_decompositions(decomp_table, ops_to_keep) # Export with dynamo model_gm = ( - torch.export.export_for_training(model, inputs) + torch.export.export_for_training(model, inputs, strict=True) .run_decompositions(decomp_table) .module() ) diff --git a/backends/cadence/aot/tests/test_remove_ops_passes.py b/backends/cadence/aot/tests/test_remove_ops_passes.py index 42f4b87bdcb..dba4f711864 100644 --- a/backends/cadence/aot/tests/test_remove_ops_passes.py +++ b/backends/cadence/aot/tests/test_remove_ops_passes.py @@ -474,7 +474,7 @@ def forward(self, x): # Run the standard quant/convert steps, but without fusing # this leaves two redundant quant/dequant pairs to test with quantizer = CadenceDefaultQuantizer() - model_exp = export_for_training(M(), (inp,)).module() + model_exp = export_for_training(M(), (inp,), strict=True).module() prepared_model = prepare_pt2e(model_exp, quantizer) prepared_model(inp) converted_model = convert_pt2e(prepared_model) diff --git a/backends/example/test_example_delegate.py b/backends/example/test_example_delegate.py index 9e2b4e458cf..a382273af07 100644 --- a/backends/example/test_example_delegate.py +++ b/backends/example/test_example_delegate.py @@ -46,7 +46,9 @@ def get_example_inputs(): ) m = model.eval() - m = torch.export.export_for_training(m, copy.deepcopy(example_inputs)).module() + m = torch.export.export_for_training( + m, copy.deepcopy(example_inputs), strict=True + ).module() # print("original model:", m) quantizer = ExampleQuantizer() # quantizer = XNNPACKQuantizer() @@ -82,7 +84,9 @@ def test_delegate_mobilenet_v2(self): ) m = model.eval() - m = torch.export.export_for_training(m, copy.deepcopy(example_inputs)).module() + m = torch.export.export_for_training( + m, copy.deepcopy(example_inputs), strict=True + ).module() quantizer = ExampleQuantizer() m = prepare_pt2e(m, quantizer) diff --git a/backends/mediatek/quantizer/annotator.py b/backends/mediatek/quantizer/annotator.py index e315599cf7f..d250b774af8 100644 --- a/backends/mediatek/quantizer/annotator.py +++ b/backends/mediatek/quantizer/annotator.py @@ -44,7 +44,6 @@ def annotate(graph: Graph, quant_config: QuantizationConfig) -> None: def register_annotator(ops: List[OpOverload]): - def decorator(annotator_fn: Callable): for op in ops: OP_TO_ANNOTATOR[op] = annotator_fn @@ -147,7 +146,6 @@ def _annotate_fused_activation_pattern( def _annotate_rmsnorm_pattern(graph: Graph, quant_config: QuantizationConfig) -> None: - class ExecuTorchPattern(torch.nn.Module): def forward(self, x): norm = x * torch.rsqrt((x * x).mean(-1, keepdim=True) + 1e-6) @@ -159,7 +157,9 @@ def forward(self, x): return norm, {} for pattern_cls in (ExecuTorchPattern, MTKPattern): - pattern_gm = export_for_training(pattern_cls(), (torch.randn(3, 3),)).module() + pattern_gm = export_for_training( + pattern_cls(), (torch.randn(3, 3),), strict=True + ).module() matcher = SubgraphMatcherWithNameNodeMap( pattern_gm, ignore_literals=True, remove_overlapping_matches=False ) diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py index 769f24ba0d8..cf3dc1d528b 100644 --- a/backends/qualcomm/tests/utils.py +++ b/backends/qualcomm/tests/utils.py @@ -567,7 +567,7 @@ def get_prepared_qat_module( custom_quant_annotations: Tuple[Callable] = (), quant_dtype: QuantDtype = QuantDtype.use_8a8w, ) -> torch.fx.GraphModule: - m = torch.export.export_for_training(module, inputs).module() + m = torch.export.export_for_training(module, inputs, strict=True).module() quantizer = make_quantizer( quant_dtype=quant_dtype, diff --git a/backends/transforms/test/test_duplicate_dynamic_quant_chain.py b/backends/transforms/test/test_duplicate_dynamic_quant_chain.py index 91d2ddc916a..ab965dd347d 100644 --- a/backends/transforms/test/test_duplicate_dynamic_quant_chain.py +++ b/backends/transforms/test/test_duplicate_dynamic_quant_chain.py @@ -58,10 +58,7 @@ def _test_duplicate_chain( # program capture m = copy.deepcopy(m_eager) - m = torch.export.export_for_training( - m, - example_inputs, - ).module() + m = torch.export.export_for_training(m, example_inputs, strict=True).module() m = prepare_pt2e(m, quantizer) # Calibrate diff --git a/backends/xnnpack/test/quantizer/test_pt2e_quantization.py b/backends/xnnpack/test/quantizer/test_pt2e_quantization.py index ea6116a6f0a..34b6f745044 100644 --- a/backends/xnnpack/test/quantizer/test_pt2e_quantization.py +++ b/backends/xnnpack/test/quantizer/test_pt2e_quantization.py @@ -326,7 +326,7 @@ def test_disallow_eval_train(self) -> None: m.train() # After export: this is not OK - m = export_for_training(m, example_inputs).module() + m = export_for_training(m, example_inputs, strict=True).module() with self.assertRaises(NotImplementedError): m.eval() with self.assertRaises(NotImplementedError): @@ -380,7 +380,7 @@ def forward(self, x): m = M().train() example_inputs = (torch.randn(1, 3, 3, 3),) bn_train_op, bn_eval_op = self._get_bn_train_eval_ops() # pyre-ignore[23] - m = export_for_training(m, example_inputs).module() + m = export_for_training(m, example_inputs, strict=True).module() def _assert_ops_are_correct(m: torch.fx.GraphModule, train: bool) -> None: bn_op = bn_train_op if train else bn_eval_op @@ -449,10 +449,7 @@ def forward(self, x): quantizer.set_global(operator_config) example_inputs = (torch.randn(2, 2),) m = M().eval() - m = export_for_training( - m, - example_inputs, - ).module() + m = export_for_training(m, example_inputs, strict=True).module() weight_meta = None for n in m.graph.nodes: # pyre-ignore[16] if ( @@ -481,7 +478,7 @@ def test_reentrant(self) -> None: get_symmetric_quantization_config(is_per_channel=True, is_qat=True) ) m.conv_bn_relu = export_for_training( # pyre-ignore[8] - m.conv_bn_relu, example_inputs + m.conv_bn_relu, example_inputs, strict=True ).module() m.conv_bn_relu = prepare_qat_pt2e(m.conv_bn_relu, quantizer) # pyre-ignore[6,8] m(*example_inputs) @@ -490,7 +487,7 @@ def test_reentrant(self) -> None: quantizer = XNNPACKQuantizer().set_module_type( torch.nn.Linear, get_symmetric_quantization_config(is_per_channel=False) ) - m = export_for_training(m, example_inputs).module() + m = export_for_training(m, example_inputs, strict=True).module() m = prepare_pt2e(m, quantizer) # pyre-ignore[6] m = convert_pt2e(m) @@ -553,7 +550,7 @@ def check_nn_module(node: torch.fx.Node) -> None: ) m.conv_bn_relu = export_for_training( # pyre-ignore[8] - m.conv_bn_relu, example_inputs + m.conv_bn_relu, example_inputs, strict=True ).module() for node in m.conv_bn_relu.graph.nodes: # pyre-ignore[16] if node.op not in ["placeholder", "output", "get_attr"]: @@ -568,7 +565,7 @@ def test_speed(self) -> None: def dynamic_quantize_pt2e(model, example_inputs) -> torch.fx.GraphModule: torch._dynamo.reset() - model = export_for_training(model, example_inputs).module() + model = export_for_training(model, example_inputs, strict=True).module() # Per channel quantization for weight # Dynamic quantization for activation # Please read a detail: https://fburl.com/code/30zds51q @@ -625,7 +622,7 @@ def forward(self, x): example_inputs = (torch.randn(1, 3, 5, 5),) m = M() - m = export_for_training(m, example_inputs).module() + m = export_for_training(m, example_inputs, strict=True).module() quantizer = XNNPACKQuantizer().set_global( get_symmetric_quantization_config(), ) @@ -701,7 +698,6 @@ def test_save_load(self) -> None: class TestNumericDebugger(TestCase): - def _extract_debug_handles(self, model) -> Dict[str, int]: debug_handle_map: Dict[str, int] = {} @@ -731,7 +727,7 @@ def _assert_node_has_debug_handle(node: torch.fx.Node) -> None: def test_quantize_pt2e_preserve_handle(self) -> None: m = TestHelperModules.Conv2dThenConv1d() example_inputs = m.example_inputs() - ep = export_for_training(m, example_inputs) + ep = export_for_training(m, example_inputs, strict=True) generate_numeric_debug_handle(ep) m = ep.module() @@ -761,7 +757,7 @@ def test_quantize_pt2e_preserve_handle(self) -> None: def test_extract_results_from_loggers(self) -> None: m = TestHelperModules.Conv2dThenConv1d() example_inputs = m.example_inputs() - ep = export_for_training(m, example_inputs) + ep = export_for_training(m, example_inputs, strict=True) generate_numeric_debug_handle(ep) m = ep.module() m_ref_logger = prepare_for_propagation_comparison(m) # pyre-ignore[6] @@ -779,18 +775,20 @@ def test_extract_results_from_loggers(self) -> None: ref_results = extract_results_from_loggers(m_ref_logger) quant_results = extract_results_from_loggers(m_quant_logger) comparison_results = compare_results( - ref_results, quant_results # pyre-ignore[6] + ref_results, + quant_results, # pyre-ignore[6] ) for node_summary in comparison_results.values(): if len(node_summary.results) > 0: self.assertGreaterEqual( - node_summary.results[0].sqnr, 35 # pyre-ignore[6] + node_summary.results[0].sqnr, + 35, # pyre-ignore[6] ) def test_extract_results_from_loggers_list_output(self) -> None: m = TestHelperModules.Conv2dWithSplit() example_inputs = m.example_inputs() - ep = export_for_training(m, example_inputs) + ep = export_for_training(m, example_inputs, strict=True) generate_numeric_debug_handle(ep) m = ep.module() m_ref_logger = prepare_for_propagation_comparison(m) # pyre-ignore[6] @@ -808,7 +806,8 @@ def test_extract_results_from_loggers_list_output(self) -> None: ref_results = extract_results_from_loggers(m_ref_logger) quant_results = extract_results_from_loggers(m_quant_logger) comparison_results = compare_results( - ref_results, quant_results # pyre-ignore[6] + ref_results, + quant_results, # pyre-ignore[6] ) for node_summary in comparison_results.values(): if len(node_summary.results) > 0: diff --git a/backends/xnnpack/test/quantizer/test_representation.py b/backends/xnnpack/test/quantizer/test_representation.py index 83cecaec5ad..e52bbbd7ae7 100644 --- a/backends/xnnpack/test/quantizer/test_representation.py +++ b/backends/xnnpack/test/quantizer/test_representation.py @@ -33,10 +33,7 @@ def _test_representation( ) -> None: # resetting dynamo cache torch._dynamo.reset() - model = export_for_training( - model, - example_inputs, - ).module() + model = export_for_training(model, example_inputs, strict=True).module() model_copy = copy.deepcopy(model) model = prepare_pt2e(model, quantizer) # pyre-ignore[6] diff --git a/backends/xnnpack/test/quantizer/test_xnnpack_quantizer.py b/backends/xnnpack/test/quantizer/test_xnnpack_quantizer.py index 57aacf55263..856030755af 100644 --- a/backends/xnnpack/test/quantizer/test_xnnpack_quantizer.py +++ b/backends/xnnpack/test/quantizer/test_xnnpack_quantizer.py @@ -361,7 +361,7 @@ def forward(self, x): ) example_inputs = (torch.randn(2, 2),) m = M().eval() - m = export_for_training(m, example_inputs).module() + m = export_for_training(m, example_inputs, strict=True).module() m = prepare_pt2e(m, quantizer) # pyre-ignore[6] # Use a linear count instead of names because the names might change, but # the order should be the same. @@ -497,10 +497,7 @@ def test_propagate_annotation(self): example_inputs = (torch.randn(1, 3, 5, 5),) # program capture - m = export_for_training( - m, - example_inputs, - ).module() + m = export_for_training(m, example_inputs, strict=True).module() m = prepare_pt2e(m, quantizer) m(*example_inputs) @@ -766,8 +763,7 @@ def forward(self, input_tensor, hidden_tensor): with torchdynamo.config.patch(allow_rnn=True): model_graph = export_for_training( - model_graph, - example_inputs, + model_graph, example_inputs, strict=True ).module() quantizer = XNNPACKQuantizer() quantization_config = get_symmetric_quantization_config( @@ -829,8 +825,7 @@ def forward(self, input_tensor, hidden_tensor): with torchdynamo.config.patch(allow_rnn=True): model_graph = export_for_training( - model_graph, - example_inputs, + model_graph, example_inputs, strict=True ).module() quantizer = XNNPACKQuantizer() quantization_config = get_symmetric_quantization_config( @@ -1039,10 +1034,7 @@ def test_resnet18(self): m = torchvision.models.resnet18().eval() m_copy = copy.deepcopy(m) # program capture - m = export_for_training( - m, - example_inputs, - ).module() + m = export_for_training(m, example_inputs, strict=True).module() quantizer = XNNPACKQuantizer() quantization_config = get_symmetric_quantization_config(is_per_channel=True) diff --git a/backends/xnnpack/test/test_xnnpack_utils.py b/backends/xnnpack/test/test_xnnpack_utils.py index f11075cf261..3ff2f0e4c1e 100644 --- a/backends/xnnpack/test/test_xnnpack_utils.py +++ b/backends/xnnpack/test/test_xnnpack_utils.py @@ -317,10 +317,7 @@ def quantize_and_test_model_with_quantizer( module.eval() # program capture - m = export_for_training( - module, - example_inputs, - ).module() + m = export_for_training(module, example_inputs, strict=True).module() quantizer = XNNPACKQuantizer() quantization_config = get_symmetric_quantization_config() diff --git a/backends/xnnpack/test/tester/tester.py b/backends/xnnpack/test/tester/tester.py index a82688cd52c..cbce817cf4b 100644 --- a/backends/xnnpack/test/tester/tester.py +++ b/backends/xnnpack/test/tester/tester.py @@ -166,7 +166,7 @@ def run( self, artifact: torch.nn.Module, inputs: Optional[Tuple[torch.Tensor]] ) -> None: assert inputs is not None - captured_graph = export_for_training(artifact, inputs).module() + captured_graph = export_for_training(artifact, inputs, strict=True).module() assert isinstance(captured_graph, torch.fx.GraphModule) prepared = prepare_pt2e(captured_graph, self.quantizer) diff --git a/docs/source/tutorials_source/export-to-executorch-tutorial.py b/docs/source/tutorials_source/export-to-executorch-tutorial.py index 86a816f1435..de42cb51bce 100644 --- a/docs/source/tutorials_source/export-to-executorch-tutorial.py +++ b/docs/source/tutorials_source/export-to-executorch-tutorial.py @@ -190,7 +190,9 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: from torch.export import export_for_training example_args = (torch.randn(1, 3, 256, 256),) -pre_autograd_aten_dialect = export_for_training(SimpleConv(), example_args).module() +pre_autograd_aten_dialect = export_for_training( + SimpleConv(), example_args, strict=True +).module() print("Pre-Autograd ATen Dialect Graph") print(pre_autograd_aten_dialect) @@ -555,7 +557,7 @@ def forward(self, x): example_args = (torch.randn(3, 4),) -pre_autograd_aten_dialect = export_for_training(M(), example_args).module() +pre_autograd_aten_dialect = export_for_training(M(), example_args, strict=True).module() # Optionally do quantization: # pre_autograd_aten_dialect = convert_pt2e(prepare_pt2e(pre_autograd_aten_dialect, CustomBackendQuantizer)) aten_dialect = export(pre_autograd_aten_dialect, example_args, strict=True) diff --git a/examples/apple/mps/scripts/mps_example.py b/examples/apple/mps/scripts/mps_example.py index fb1c4e971f9..c1a2e150286 100644 --- a/examples/apple/mps/scripts/mps_example.py +++ b/examples/apple/mps/scripts/mps_example.py @@ -166,7 +166,9 @@ def get_model_config(args): # pre-autograd export. eventually this will become torch.export with torch.no_grad(): - model = torch.export.export_for_training(model, example_inputs).module() + model = torch.export.export_for_training( + model, example_inputs, strict=True + ).module() edge: EdgeProgramManager = export_to_edge( model, example_inputs, diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py index 5fb12342a2d..446d1a4eca4 100644 --- a/examples/arm/aot_arm_compiler.py +++ b/examples/arm/aot_arm_compiler.py @@ -224,7 +224,6 @@ def forward(self, x): class MultipleOutputsModule(torch.nn.Module): - def forward(self, x: torch.Tensor, y: torch.Tensor): return (x * y, x.sum(dim=-1, keepdim=True)) @@ -648,7 +647,9 @@ def to_edge_TOSA_delegate( ) model_int8 = model # Wrap quantized model back into an exported_program - exported_program = torch.export.export_for_training(model, example_inputs) + exported_program = torch.export.export_for_training( + model, example_inputs, strict=True + ) if args.intermediates: os.makedirs(args.intermediates, exist_ok=True) @@ -681,7 +682,9 @@ def to_edge_TOSA_delegate( # export_for_training under the assumption we quantize, the exported form also works # in to_edge if we don't quantize - exported_program = torch.export.export_for_training(model, example_inputs) + exported_program = torch.export.export_for_training( + model, example_inputs, strict=True + ) model = exported_program.module() model_fp32 = model