pytorch · SS-JIA · Oct 10, 2025 · Oct 9, 2025
@@ -0,0 +1,123 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import sys
+
+import executorch.backends.vulkan.custom_ops_lib  # noqa
+
+import torch
+
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass, PassResult
+
+
+class FuseClampBinaryOpPass(ExportPass):
+
+    FUSEABLE_CLAMP_OPS = [
+        exir_ops.edge.aten.relu.default,
+        exir_ops.edge.aten.hardtanh.default,
+        exir_ops.edge.aten.clamp.default,
+    ]
+    FUSEABLE_BINARY_OPS = [
+        exir_ops.edge.aten.add.Tensor,
+        exir_ops.edge.aten.sub.Tensor,
+        exir_ops.edge.aten.mul.Tensor,
+        exir_ops.edge.aten.div.Tensor,
+    ]
+
+    def exists_before(self, graph_module, node_a, node_b):
+        seen_a = False
+        for n in graph_module.graph.nodes:
+            if n is node_a:
+                seen_a = True
+            if n is node_b:
+                return seen_a
+        return False
+
+    def get_output_min_max_from_activation(self, activation_node):
+        if activation_node.target == exir_ops.edge.aten.relu.default:
+            output_min = 0.0
+            output_max = sys.float_info.max
+        elif activation_node.target == exir_ops.edge.aten.hardtanh.default:
+            output_min = -1.0
+            output_max = 1.0
+            if len(activation_node.args) > 1:
+                output_min = activation_node.args[1]
+                output_max = activation_node.args[2]
+        elif activation_node.target == exir_ops.edge.aten.clamp.default:
+            output_min = None
+            output_max = None
+            if len(activation_node.args) >= 2:
+                output_min = activation_node.args[1]
+            if len(activation_node.args) >= 3:
+                output_max = activation_node.args[2]
+
+        return output_min, output_max
+
+    def fuse_binary_op_with_clamp(self, graph_module: torch.fx.GraphModule):
+        fuseAdded = False
+        for clamp_node in graph_module.graph.nodes:
+            if clamp_node.op == "call_function":
+                if clamp_node.target in self.FUSEABLE_CLAMP_OPS:
+                    preceding_op = clamp_node.args[0]
+
+                    if (
+                        preceding_op.op == "call_function"
+                        and preceding_op.target in self.FUSEABLE_BINARY_OPS
+                    ):
+                        # Delete activation
+                        output_min_max = self.get_output_min_max_from_activation(
+                            clamp_node
+                        )
+                        new_args = list(preceding_op.args)
+                        new_args.append(output_min_max[0])
+                        new_args.append(output_min_max[1])
+                        new_args = tuple(new_args)
+                        clamp_node.replace_all_uses_with(preceding_op)
+                        graph_module.graph.erase_node(clamp_node)
+
+                        new_op = None
+                        match preceding_op.target:
+                            case exir_ops.edge.aten.add.Tensor:
+                                new_op = (
+                                    exir_ops.edge.et_vk.binary_add_with_clamp.default
+                                )
+                            case exir_ops.edge.aten.sub.Tensor:
+                                new_op = (
+                                    exir_ops.edge.et_vk.binary_sub_with_clamp.default
+                                )
+                            case exir_ops.edge.aten.mul.Tensor:
+                                new_op = (
+                                    exir_ops.edge.et_vk.binary_mul_with_clamp.default
+                                )
+                            case exir_ops.edge.aten.div.Tensor:
+                                new_op = (
+                                    exir_ops.edge.et_vk.binary_div_with_clamp.default
+                                )
+
+                        # Create and insert node of custom op `binary_<op>_with_clamp`
+                        with graph_module.graph.inserting_before(preceding_op):
+                            binary_op_clamp_node = graph_module.graph.create_node(
+                                "call_function",
+                                new_op,
+                                new_args,
+                            )
+
+                            preceding_op.replace_all_uses_with(binary_op_clamp_node)
+                            graph_module.graph.erase_node(preceding_op)
+
+                            fuseAdded = True
+
+        graph_module.recompile()
+        graph_module = super().call(graph_module).graph_module
+        return [fuseAdded, graph_module]
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        fuseAdded = True
+        while fuseAdded:
+            fuseAdded, graph_module = self.fuse_binary_op_with_clamp(graph_module)
+
+        return PassResult(graph_module, True)
@@ -0,0 +1,105 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import sys
+
+import executorch.backends.vulkan.custom_ops_lib  # noqa
+
+import torch
+
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass, PassResult
+
+
+class FuseClampsPass(ExportPass):
+
+    FUSEABLE_CLAMPS = [
+        exir_ops.edge.aten.relu.default,
+        exir_ops.edge.aten.hardtanh.default,
+        exir_ops.edge.aten.clamp.default,
+    ]
+
+    def get_output_min_max_from_activation(self, activation_node):
+        if activation_node.target == exir_ops.edge.aten.relu.default:
+            output_min = 0.0
+            output_max = sys.float_info.max
+        elif activation_node.target == exir_ops.edge.aten.hardtanh.default:
+            output_min = -1.0
+            output_max = 1.0
+            if len(activation_node.args) > 1:
+                output_min = activation_node.args[1]
+                output_max = activation_node.args[2]
+        elif activation_node.target == exir_ops.edge.aten.clamp.default:
+            output_min = None
+            output_max = None
+            if len(activation_node.args) >= 2:
+                output_min = activation_node.args[1]
+            if len(activation_node.args) >= 3:
+                output_max = activation_node.args[2]
+
+        return output_min, output_max
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        fuseAdded = True
+        while fuseAdded:
+            fuseAdded = False
+            for clamp_2_node in graph_module.graph.nodes:
+                if clamp_2_node.op == "call_function":
+                    if clamp_2_node.target in self.FUSEABLE_CLAMPS:
+                        preceding_op = clamp_2_node.args[0]
+                        if (
+                            preceding_op.op == "call_function"
+                            and preceding_op.target in self.FUSEABLE_CLAMPS
+                        ):
+                            # Ensure the shapes match
+                            if (
+                                "val" not in clamp_2_node.args[0].meta
+                                or "val" not in preceding_op.args[0].meta
+                            ):
+                                continue
+                            if len(clamp_2_node.args[0].meta["val"].shape) != len(
+                                preceding_op.args[0].meta["val"].shape
+                            ):
+                                continue
+
+                            min_max1 = self.get_output_min_max_from_activation(
+                                preceding_op
+                            )
+                            min_max2 = self.get_output_min_max_from_activation(
+                                clamp_2_node
+                            )
+
+                            min_max = [None, None]
+
+                            if min_max1[0] is None and min_max2[0] is not None:
+                                min_max[0] = min_max2[0]
+                            elif min_max1[0] is not None and min_max2[0] is None:
+                                min_max[0] = min_max1[0]
+                            else:
+                                min_max[0] = min(min_max1[0], min_max2[0])
+
+                            if min_max1[1] is None and min_max2[1] is not None:
+                                min_max[1] = min_max2[1]
+                            elif min_max1[1] is not None and min_max2[1] is None:
+                                min_max[1] = min_max1[1]
+                            else:
+                                min_max[1] = max(min_max1[1], min_max2[1])
+
+                            new_args = list(preceding_op.args)
+
+                            # Insert the new min/max at indices 1 and 2
+                            new_args.insert(1, min_max[0])
+                            new_args.insert(2, min_max[1])
+                            new_args = new_args[0:3]
+                            preceding_op.args = tuple(new_args)
+                            clamp_2_node.replace_all_uses_with(preceding_op)
+                            graph_module.graph.erase_node(clamp_2_node)
+                            fuseAdded = True
+
+            graph_module.recompile()
+            graph_module = super().call(graph_module).graph_module
+
+        return PassResult(graph_module, True)
@@ -14,7 +14,7 @@
 from executorch.exir.pass_base import ExportPass, PassResult
 
 
-class FuseClampPass(ExportPass):
+class FuseConvClampPass(ExportPass):
     """
     Some activations like ReLU and hardtanh can be fused with certain operators (e.g. convolution) preceding it.
     """
@@ -25,6 +25,7 @@ class FuseClampPass(ExportPass):
     FUSEABLE_ACTIVATIONS = [
         exir_ops.edge.aten.relu.default,
         exir_ops.edge.aten.hardtanh.default,
+        exir_ops.edge.aten.clamp.default,
     ]
 
     def get_output_min_max_from_activation(self, activation_node):
@@ -37,6 +38,13 @@ def get_output_min_max_from_activation(self, activation_node):
             if len(activation_node.args) > 1:
                 output_min = activation_node.args[1]
                 output_max = activation_node.args[2]
+        elif activation_node.target == exir_ops.edge.aten.clamp.default:
+            output_min = None
+            output_max = None
+            if len(activation_node.args) >= 2:
+                output_min = activation_node.args[1]
+            if len(activation_node.args) >= 3:
+                output_max = activation_node.args[2]
 
         return output_min, output_max
 

@@ -77,6 +77,38 @@ def define_common_targets():
         ],
     )
 
+    runtime.python_library(
+        name = "fuse_clamps",
+        srcs = ["fuse_clamps.py"],
+        visibility = [
+            "//executorch/backends/...",
+        ],
+        deps = [
+            ":utils",
+            "//caffe2:torch",
+            "//executorch/backends/vulkan:custom_ops_lib",
+            "//executorch/exir:pass_base",
+            "//executorch/exir:sym_util",
+            "//executorch/exir/dialects:lib",
+        ],
+    )
+
+    runtime.python_library(
+        name = "fuse_clamp_with_binary_op",
+        srcs = ["fuse_clamp_with_binary_op.py"],
+        visibility = [
+            "//executorch/backends/...",
+        ],
+        deps = [
+            ":utils",
+            "//caffe2:torch",
+            "//executorch/backends/vulkan:custom_ops_lib",
+            "//executorch/exir:pass_base",
+            "//executorch/exir:sym_util",
+            "//executorch/exir/dialects:lib",
+        ],
+    )
+
     runtime.python_library(
         name = "view_copy_to_squeeze_unsqueeze",
         srcs = ["view_copy_to_squeeze_unsqueeze.py"],