From 3cd94580af0be64b2fd382739218dfef22c8c711 Mon Sep 17 00:00:00 2001
From: Jake Stevens <jrstevens@meta.com>
Date: Tue, 9 Sep 2025 07:18:54 -0700
Subject: [PATCH] Remove SoftmaxQuantizer (#14089)

Summary:
Pull Request resolved: https://github.com/pytorch/executorch/pull/14089

Softmax not supported on current platforms.

This introduces quant/dequants into the graph, which are unneeded.

This diff removes the quantizer until supported.

Differential Revision: D81964057
---
 backends/nxp/quantizer/neutron_quantizer.py   |  2 -
 .../test_remove_io_quant_ops_pass.py          |  9 ++---
 backends/nxp/tests/test_edge_passes.py        |  2 +-
 backends/nxp/tests/test_integration.py        |  4 +-
 backends/nxp/tests/test_quantizer.py          | 37 -------------------
 5 files changed, 6 insertions(+), 48 deletions(-)

diff --git a/backends/nxp/quantizer/neutron_quantizer.py b/backends/nxp/quantizer/neutron_quantizer.py
index d3f84144aa3..c713c2b1014 100644
--- a/backends/nxp/quantizer/neutron_quantizer.py
+++ b/backends/nxp/quantizer/neutron_quantizer.py
@@ -35,7 +35,6 @@
     ReshapePattern,
     SharedSpecPattern,
     SigmoidPattern,
-    SoftMaxPattern,
     TanhInPlacePattern,
     TanhPattern,
     ViewPattern,
@@ -225,7 +224,6 @@ def __init__(self):
                 NeutronAtenQuantizer(ReluInPlacePattern(), static_qconfig),
                 NeutronAtenQuantizer(ReshapePattern(), static_qconfig),
                 NeutronAtenQuantizer(SigmoidPattern(), static_qconfig),
-                NeutronAtenQuantizer(SoftMaxPattern(), static_qconfig),
                 NeutronAtenQuantizer(TanhPattern(), static_qconfig),
                 NeutronAtenQuantizer(TanhInPlacePattern(), static_qconfig),
                 NeutronAtenQuantizer(ViewPattern(), static_qconfig),
diff --git a/backends/nxp/tests/ir/edge_passes/test_remove_io_quant_ops_pass.py b/backends/nxp/tests/ir/edge_passes/test_remove_io_quant_ops_pass.py
index 17b040fbc3d..6ca0e48eefb 100644
--- a/backends/nxp/tests/ir/edge_passes/test_remove_io_quant_ops_pass.py
+++ b/backends/nxp/tests/ir/edge_passes/test_remove_io_quant_ops_pass.py
@@ -59,22 +59,19 @@ def test_remove_io_quant_ops_pass__cifarnet():
     )
 
     nodes = list(exec_prog.exported_program().graph.nodes)
-    assert len(nodes) == 11
+    assert len(nodes) == 9
     assert (
         nodes[0].meta["val"].dtype == torch.int8
     ), "Input tensor doesn't have type INT8."
+    # Currently, softmax is not quantized
     assert (
-        nodes[10].meta["val"][0].dtype == torch.int8
+        nodes[8].meta["val"][0].dtype == torch.float32
     ), "Output tensor doesn't have type INT8."
 
     assert (
         get_config_method_name(None, "input", 0, "scale") in exec_prog._config_methods
     )
     assert get_config_method_name(None, "input", 0, "zp") in exec_prog._config_methods
-    assert (
-        get_config_method_name(None, "output", 0, "scale") in exec_prog._config_methods
-    )
-    assert get_config_method_name(None, "output", 0, "zp") in exec_prog._config_methods
 
 
 class MultiInputOutputModule(torch.nn.Module):
diff --git a/backends/nxp/tests/test_edge_passes.py b/backends/nxp/tests/test_edge_passes.py
index a189299be52..282a7ab379b 100644
--- a/backends/nxp/tests/test_edge_passes.py
+++ b/backends/nxp/tests/test_edge_passes.py
@@ -72,7 +72,7 @@ def unsupported_target(*_):
         exported_program = epm.exported_program()
 
         nodes = list(exported_program.graph_module.graph.nodes)
-        assert len(nodes) == 28
+        assert len(nodes) == 26
 
         view_copy_indices = _find_view_copy_node_indices(nodes)
 
diff --git a/backends/nxp/tests/test_integration.py b/backends/nxp/tests/test_integration.py
index d31b22c9ce9..40d99e0afe2 100644
--- a/backends/nxp/tests/test_integration.py
+++ b/backends/nxp/tests/test_integration.py
@@ -27,7 +27,7 @@ def test_conv_fc_softmax__to_executorch_program():
 
     delegation_info = get_delegation_info(program.graph_module)
     assert delegation_info.num_delegated_subgraphs == 1
-    assert delegation_info.num_non_delegated_nodes == 11
+    assert delegation_info.num_non_delegated_nodes == 7
     assert delegation_info.num_delegated_nodes == 13
 
     for node in program.graph.nodes:
@@ -43,7 +43,7 @@ def test_cifarnet():
 
     delegation_info = get_delegation_info(exec_prog.exported_program().graph_module)
     assert delegation_info.num_delegated_subgraphs == 1
-    assert delegation_info.num_non_delegated_nodes == 11
+    assert delegation_info.num_non_delegated_nodes == 7
     assert delegation_info.num_delegated_nodes == 45
 
     nodes = list(exec_prog.exported_program().graph.nodes)
diff --git a/backends/nxp/tests/test_quantizer.py b/backends/nxp/tests/test_quantizer.py
index ef5fbb0cbca..fcad48d98d3 100644
--- a/backends/nxp/tests/test_quantizer.py
+++ b/backends/nxp/tests/test_quantizer.py
@@ -131,43 +131,6 @@ def test_quantizer_maxpool2d():
     assert input_quant == output_quant
 
 
-def test_quantizer_softmax():
-    model = models.SoftmaxModule(dim=0)
-    model.eval()
-
-    example_input = (torch.ones(1, 10),)
-    quantizer = NeutronQuantizer()
-    graph_module = torch.export.export(model, example_input, strict=True).module()
-
-    # noinspection PyTypeChecker
-    m = prepare_pt2e(graph_module, quantizer)
-    m(*example_input)
-    m = convert_pt2e(m)
-
-    # Dry run
-    m(*example_input)
-
-    nodes = list(m.graph.nodes)
-    assert len(nodes) == 7
-    # Check if QDQ pattern:
-    assert nodes[3].name == "softmax"
-    assert (
-        _get_target_name(nodes[3].args[0])
-        == "torch.ops.quantized_decomposed.dequantize_per_tensor.default"
-    )
-    assert (
-        _get_target_name(nodes[4])
-        == "torch.ops.quantized_decomposed.quantize_per_tensor.default"
-    )
-    assert nodes[4].args[0].name == "softmax"
-
-    # Check output quantization
-    scale, zp, _, _, dtype = nodes[4].args[1:]
-    assert scale == 1.0 / 256.0
-    assert zp == -128
-    assert dtype == torch.int8
-
-
 def test_quantizer_single_maxpool2d():
     model = models.MaxPool2dModule()
     model.eval()