From 3cd94580af0be64b2fd382739218dfef22c8c711 Mon Sep 17 00:00:00 2001 From: Jake Stevens Date: Tue, 9 Sep 2025 07:18:54 -0700 Subject: [PATCH] Remove SoftmaxQuantizer (#14089) Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/14089 Softmax not supported on current platforms. This introduces quant/dequants into the graph, which are unneeded. This diff removes the quantizer until supported. Differential Revision: D81964057 --- backends/nxp/quantizer/neutron_quantizer.py | 2 - .../test_remove_io_quant_ops_pass.py | 9 ++--- backends/nxp/tests/test_edge_passes.py | 2 +- backends/nxp/tests/test_integration.py | 4 +- backends/nxp/tests/test_quantizer.py | 37 ------------------- 5 files changed, 6 insertions(+), 48 deletions(-) diff --git a/backends/nxp/quantizer/neutron_quantizer.py b/backends/nxp/quantizer/neutron_quantizer.py index d3f84144aa3..c713c2b1014 100644 --- a/backends/nxp/quantizer/neutron_quantizer.py +++ b/backends/nxp/quantizer/neutron_quantizer.py @@ -35,7 +35,6 @@ ReshapePattern, SharedSpecPattern, SigmoidPattern, - SoftMaxPattern, TanhInPlacePattern, TanhPattern, ViewPattern, @@ -225,7 +224,6 @@ def __init__(self): NeutronAtenQuantizer(ReluInPlacePattern(), static_qconfig), NeutronAtenQuantizer(ReshapePattern(), static_qconfig), NeutronAtenQuantizer(SigmoidPattern(), static_qconfig), - NeutronAtenQuantizer(SoftMaxPattern(), static_qconfig), NeutronAtenQuantizer(TanhPattern(), static_qconfig), NeutronAtenQuantizer(TanhInPlacePattern(), static_qconfig), NeutronAtenQuantizer(ViewPattern(), static_qconfig), diff --git a/backends/nxp/tests/ir/edge_passes/test_remove_io_quant_ops_pass.py b/backends/nxp/tests/ir/edge_passes/test_remove_io_quant_ops_pass.py index 17b040fbc3d..6ca0e48eefb 100644 --- a/backends/nxp/tests/ir/edge_passes/test_remove_io_quant_ops_pass.py +++ b/backends/nxp/tests/ir/edge_passes/test_remove_io_quant_ops_pass.py @@ -59,22 +59,19 @@ def test_remove_io_quant_ops_pass__cifarnet(): ) nodes = list(exec_prog.exported_program().graph.nodes) - assert len(nodes) == 11 + assert len(nodes) == 9 assert ( nodes[0].meta["val"].dtype == torch.int8 ), "Input tensor doesn't have type INT8." + # Currently, softmax is not quantized assert ( - nodes[10].meta["val"][0].dtype == torch.int8 + nodes[8].meta["val"][0].dtype == torch.float32 ), "Output tensor doesn't have type INT8." assert ( get_config_method_name(None, "input", 0, "scale") in exec_prog._config_methods ) assert get_config_method_name(None, "input", 0, "zp") in exec_prog._config_methods - assert ( - get_config_method_name(None, "output", 0, "scale") in exec_prog._config_methods - ) - assert get_config_method_name(None, "output", 0, "zp") in exec_prog._config_methods class MultiInputOutputModule(torch.nn.Module): diff --git a/backends/nxp/tests/test_edge_passes.py b/backends/nxp/tests/test_edge_passes.py index a189299be52..282a7ab379b 100644 --- a/backends/nxp/tests/test_edge_passes.py +++ b/backends/nxp/tests/test_edge_passes.py @@ -72,7 +72,7 @@ def unsupported_target(*_): exported_program = epm.exported_program() nodes = list(exported_program.graph_module.graph.nodes) - assert len(nodes) == 28 + assert len(nodes) == 26 view_copy_indices = _find_view_copy_node_indices(nodes) diff --git a/backends/nxp/tests/test_integration.py b/backends/nxp/tests/test_integration.py index d31b22c9ce9..40d99e0afe2 100644 --- a/backends/nxp/tests/test_integration.py +++ b/backends/nxp/tests/test_integration.py @@ -27,7 +27,7 @@ def test_conv_fc_softmax__to_executorch_program(): delegation_info = get_delegation_info(program.graph_module) assert delegation_info.num_delegated_subgraphs == 1 - assert delegation_info.num_non_delegated_nodes == 11 + assert delegation_info.num_non_delegated_nodes == 7 assert delegation_info.num_delegated_nodes == 13 for node in program.graph.nodes: @@ -43,7 +43,7 @@ def test_cifarnet(): delegation_info = get_delegation_info(exec_prog.exported_program().graph_module) assert delegation_info.num_delegated_subgraphs == 1 - assert delegation_info.num_non_delegated_nodes == 11 + assert delegation_info.num_non_delegated_nodes == 7 assert delegation_info.num_delegated_nodes == 45 nodes = list(exec_prog.exported_program().graph.nodes) diff --git a/backends/nxp/tests/test_quantizer.py b/backends/nxp/tests/test_quantizer.py index ef5fbb0cbca..fcad48d98d3 100644 --- a/backends/nxp/tests/test_quantizer.py +++ b/backends/nxp/tests/test_quantizer.py @@ -131,43 +131,6 @@ def test_quantizer_maxpool2d(): assert input_quant == output_quant -def test_quantizer_softmax(): - model = models.SoftmaxModule(dim=0) - model.eval() - - example_input = (torch.ones(1, 10),) - quantizer = NeutronQuantizer() - graph_module = torch.export.export(model, example_input, strict=True).module() - - # noinspection PyTypeChecker - m = prepare_pt2e(graph_module, quantizer) - m(*example_input) - m = convert_pt2e(m) - - # Dry run - m(*example_input) - - nodes = list(m.graph.nodes) - assert len(nodes) == 7 - # Check if QDQ pattern: - assert nodes[3].name == "softmax" - assert ( - _get_target_name(nodes[3].args[0]) - == "torch.ops.quantized_decomposed.dequantize_per_tensor.default" - ) - assert ( - _get_target_name(nodes[4]) - == "torch.ops.quantized_decomposed.quantize_per_tensor.default" - ) - assert nodes[4].args[0].name == "softmax" - - # Check output quantization - scale, zp, _, _, dtype = nodes[4].args[1:] - assert scale == 1.0 / 256.0 - assert zp == -128 - assert dtype == torch.int8 - - def test_quantizer_single_maxpool2d(): model = models.MaxPool2dModule() model.eval()