From a526b61924031caa680aab44ac697771aed04415 Mon Sep 17 00:00:00 2001
From: Andrew Grebenisan <agrebenisan@meta.com>
Date: Sat, 13 Sep 2025 16:47:21 -0700
Subject: [PATCH] Remove non-per-tensor quantized add and replace with
 per-tensor variant (#14093)

Summary:

As discussed offline, we don't need a non-per-tensor variant of quantized_add, so removing from ref implementations.

Reviewed By: hsharma35

Differential Revision: D81950579
---
 backends/cadence/aot/ref_implementations.py   | 28 +++++++++----------
 .../aot/tests/test_ref_implementations.py     |  8 +++---
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/backends/cadence/aot/ref_implementations.py b/backends/cadence/aot/ref_implementations.py
index 6d5b2a89c05..f3856d2fd1c 100644
--- a/backends/cadence/aot/ref_implementations.py
+++ b/backends/cadence/aot/ref_implementations.py
@@ -127,14 +127,14 @@ def dequantize_per_tensor(
     return (input_tensor - zero_point).to(dtype) * scale
 
 
-@impl(m, "quantized_add")
-def quantized_add(
+@impl(m, "quantized_add.per_tensor")
+def quantized_add_per_tensor(
     X: torch.Tensor,
-    X_scale: torch.Tensor,
-    X_zero_point: torch.Tensor,
+    X_scale: float,
+    X_zero_point: int,
     Y: torch.Tensor,
-    Y_scale: torch.Tensor,
-    Y_zero_point: torch.Tensor,
+    Y_scale: float,
+    Y_zero_point: int,
     out_scale: float,
     out_zero_point: int,
 ) -> torch.Tensor:
@@ -149,17 +149,17 @@ def quantized_add(
     out = (X_scale(X - X_zero_point) + Y_scale(Y - Y_zero_point)) / out_scale + out_zero_point
 
     Args:
-        - X (Tensor): The first operand
-        - X_scale (Tensor): The ratio between the sizes of X's floating point and quantized
+        - X: The first operand
+        - X_scale: The ratio between the sizes of X's floating point and quantized
             ranges
-        - X_zero_point (Tensor): The quantized mapping of zero for X
-        - Y (Tensor): The second operand
-        - Y_scale (Tensor): The ratio between the sizes of Y's floating point and quantized
+        - X_zero_point: The quantized mapping of zero for X
+        - Y: The second operand
+        - Y_scale: The ratio between the sizes of Y's floating point and quantized
             ranges
-        - Y_zero_point (Tensor): The quantized mapping of zero for Y
-        - out_scale (float): The ratio between the sizes of the output's floating point and
+        - Y_zero_point: The quantized mapping of zero for Y
+        - out_scale: The ratio between the sizes of the output's floating point and
             quantized ranges
-        - out_zero_point (int): The quantized mapping of zero for the output
+        - out_zero_point: The quantized mapping of zero for the output
     """
     supported_dtypes = [torch.int8, torch.uint8]
     if X.dtype != Y.dtype:
diff --git a/backends/cadence/aot/tests/test_ref_implementations.py b/backends/cadence/aot/tests/test_ref_implementations.py
index 04b3e8e75ba..53ed526f759 100644
--- a/backends/cadence/aot/tests/test_ref_implementations.py
+++ b/backends/cadence/aot/tests/test_ref_implementations.py
@@ -124,11 +124,11 @@ def test_quantized_add(
 
         output = torch.ops.cadence.quantized_add(
             X_tensor,
-            torch.tensor(X_scale),
-            torch.tensor(X_zero_point, dtype=dtype),
+            X_scale,
+            X_zero_point,
             Y_tensor,
-            torch.tensor(Y_scale),
-            torch.tensor(Y_zero_point, dtype=dtype),
+            Y_scale,
+            Y_zero_point,
             out_scale,
             out_zero_point,
         )