pytorch · facebook-github-bot · Jun 2, 2025 · Jun 2, 2025
diff --git a/backends/cadence/aot/replace_ops.py b/backends/cadence/aot/replace_ops.py
@@ -2065,11 +2065,10 @@ def call_operator(
         return super().call_operator(op, args, kwargs, meta)
 
 
-@register_cadence_pass(CadencePassAttribute(opt_level=2))
-class ReplaceGeluWithApproximateGeluPass(ExportPass):
+@register_cadence_pass(CadencePassAttribute(opt_level=0))
+class ReplaceAtenApproxGeluWithApproxGeluPass(ExportPass):
     """
-    Replace the gelu op with an approximate gelu op. The approximate gelu op
-    is more efficient on DSP backends.
+    Replace the aten gelu op with an approximate arg with an approximate gelu op.
     """
 
     def call_operator(
@@ -2079,6 +2078,9 @@ def call_operator(
         kwargs: Dict[str, Argument],
         meta: NodeMetadata,
     ) -> ProxyValue:
+        if "approximate" not in kwargs:
+            return super().call_operator(op, args, kwargs, meta)
+
         if op not in {
             exir_ops.edge.aten.gelu.default,
         }:
@@ -2414,7 +2416,7 @@ class CadenceReplaceOpsInGraph:
         ReplaceSingleElementTensorArgumentsFromFullOpWithScalarPass,
         ReplaceAtenAvgPoolWithJarvisAvgPoolPass,
         ReplaceWhereWithFullArgsWithWhereScalar,
-        ReplaceGeluWithApproximateGeluPass,
+        ReplaceAtenApproxGeluWithApproxGeluPass,
         ReplaceSplitWithSlicePass,
         ReplacePowWithMulPass,
     ]
diff --git a/backends/cadence/aot/tests/test_replace_ops_passes.py b/backends/cadence/aot/tests/test_replace_ops_passes.py
@@ -26,13 +26,13 @@
     ForceChannelLastForConvPass,
     MakeSliceAndCatDimOutermostPass,
     ReplaceAddMMWithLinearPass,
+    ReplaceAtenApproxGeluWithApproxGeluPass,
     ReplaceAtenConvolutionWithJarvisConvolutionPass,
     ReplaceConstantPadNdWithSlicePass,
     ReplaceConvolutionOptionalArgsWithConcreteArgsPass,
     ReplaceConvWithIm2RowAndLinear,
     ReplaceEmptyTensorsWithFullPass,
     ReplaceFunctionallyEquivalentOpTargets,
-    ReplaceGeluWithApproximateGeluPass,
     ReplaceIm2RowWithViewPass,
     ReplaceLinearWithFullyConnectedOpPass,
     ReplaceMatmulWithTransposedMatmulPass,
@@ -1287,17 +1287,41 @@ def forward(self, cond: torch.Tensor):
             1,
         )
 
-    def test_replace_aten_gelu_with_approximate_gelu(self):
-        class Gelu(torch.nn.Module):
-            def forward(self, input):
-                return torch.nn.functional.gelu(input)
+    def test_no_replace_aten_gelu_with_approximate_gelu(self):
+        inputs = torch.randn(2, 1, 64)
+
+        gm = single_op_builder(
+            placeholders=(inputs,),
+            op=exir_ops.edge.aten.gelu.default,
+            args=(inputs,),
+        )
+        gm = ExportPass().call(gm).graph_module
+
+        p = ReplaceAtenApproxGeluWithApproxGeluPass()
+        graph_after_passes = p.call(gm).graph_module
 
+        # Assert that aten.gelu op was not decomposed, since it didn't have an approximate argument
+        self.assertEqual(
+            count_node(
+                graph_after_passes,
+                exir_ops.edge.aten.gelu.default,
+            ),
+            1,
+        )
+
+    def test_replace_aten_approximate_gelu_with_approximate_gelu(self):
         inputs = torch.randn(2, 1, 64)
 
-        graph_module = export_to_edge(Gelu(), (inputs,)).exported_program().graph_module
+        gm = single_op_builder(
+            placeholders=(inputs,),
+            op=exir_ops.edge.aten.gelu.default,
+            args=(inputs,),
+            kwargs={"approximate": "tanh"},
+        )
+        gm = ExportPass().call(gm).graph_module
 
-        p = ReplaceGeluWithApproximateGeluPass()
-        graph_after_passes = cast(PassResult, p(graph_module)).graph_module
+        p = ReplaceAtenApproxGeluWithApproxGeluPass()
+        graph_after_passes = p.call(gm).graph_module
 
         # Assert that aten.gelu op was decomposed
         self.assertEqual(