From a526b61924031caa680aab44ac697771aed04415 Mon Sep 17 00:00:00 2001 From: Andrew Grebenisan Date: Sat, 13 Sep 2025 16:47:21 -0700 Subject: [PATCH] Remove non-per-tensor quantized add and replace with per-tensor variant (#14093) Summary: As discussed offline, we don't need a non-per-tensor variant of quantized_add, so removing from ref implementations. Reviewed By: hsharma35 Differential Revision: D81950579 --- backends/cadence/aot/ref_implementations.py | 28 +++++++++---------- .../aot/tests/test_ref_implementations.py | 8 +++--- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/backends/cadence/aot/ref_implementations.py b/backends/cadence/aot/ref_implementations.py index 6d5b2a89c05..f3856d2fd1c 100644 --- a/backends/cadence/aot/ref_implementations.py +++ b/backends/cadence/aot/ref_implementations.py @@ -127,14 +127,14 @@ def dequantize_per_tensor( return (input_tensor - zero_point).to(dtype) * scale -@impl(m, "quantized_add") -def quantized_add( +@impl(m, "quantized_add.per_tensor") +def quantized_add_per_tensor( X: torch.Tensor, - X_scale: torch.Tensor, - X_zero_point: torch.Tensor, + X_scale: float, + X_zero_point: int, Y: torch.Tensor, - Y_scale: torch.Tensor, - Y_zero_point: torch.Tensor, + Y_scale: float, + Y_zero_point: int, out_scale: float, out_zero_point: int, ) -> torch.Tensor: @@ -149,17 +149,17 @@ def quantized_add( out = (X_scale(X - X_zero_point) + Y_scale(Y - Y_zero_point)) / out_scale + out_zero_point Args: - - X (Tensor): The first operand - - X_scale (Tensor): The ratio between the sizes of X's floating point and quantized + - X: The first operand + - X_scale: The ratio between the sizes of X's floating point and quantized ranges - - X_zero_point (Tensor): The quantized mapping of zero for X - - Y (Tensor): The second operand - - Y_scale (Tensor): The ratio between the sizes of Y's floating point and quantized + - X_zero_point: The quantized mapping of zero for X + - Y: The second operand + - Y_scale: The ratio between the sizes of Y's floating point and quantized ranges - - Y_zero_point (Tensor): The quantized mapping of zero for Y - - out_scale (float): The ratio between the sizes of the output's floating point and + - Y_zero_point: The quantized mapping of zero for Y + - out_scale: The ratio between the sizes of the output's floating point and quantized ranges - - out_zero_point (int): The quantized mapping of zero for the output + - out_zero_point: The quantized mapping of zero for the output """ supported_dtypes = [torch.int8, torch.uint8] if X.dtype != Y.dtype: diff --git a/backends/cadence/aot/tests/test_ref_implementations.py b/backends/cadence/aot/tests/test_ref_implementations.py index 04b3e8e75ba..53ed526f759 100644 --- a/backends/cadence/aot/tests/test_ref_implementations.py +++ b/backends/cadence/aot/tests/test_ref_implementations.py @@ -124,11 +124,11 @@ def test_quantized_add( output = torch.ops.cadence.quantized_add( X_tensor, - torch.tensor(X_scale), - torch.tensor(X_zero_point, dtype=dtype), + X_scale, + X_zero_point, Y_tensor, - torch.tensor(Y_scale), - torch.tensor(Y_zero_point, dtype=dtype), + Y_scale, + Y_zero_point, out_scale, out_zero_point, )