From f7ebb5822a593d1126c0dd973b63a086c45eb557 Mon Sep 17 00:00:00 2001
From: Rahul Chandra <rahulc7@meta.com>
Date: Wed, 3 Dec 2025 15:12:26 -0800
Subject: [PATCH] Using generic implemntation for 16-bit activations and 8 bit
 weights for Conv2D in Backends (#16007)

Summary:

# Context
We continue from D84284794 to add support for 16-bit activations. Note that right now, all though they support 16-bit activations already, it's only if the weights are also 16-bits. To do this, we need to change the way we template some functions.


# Current Behavior
Right now, we're composing two macros together, the `ET_FORALL_JARVIS_QUANTIZED_TYPES_WITH_INT16` macro:

https://www.internalfb.com/code/fbsource/[9e8c6d8466107f58aa3de1b9e4ec71c49d670a8f]/fbcode/on_device_ai/Assistant/Jarvis/min_runtime/operators/generic/operators.h?lines=22-25


and the function macro(`quantized_linear` chosen for example):

https://www.internalfb.com/code/fbsource/[9e8c6d8466107f58aa3de1b9e4ec71c49d670a8f]/fbcode/on_device_ai/Assistant/Jarvis/min_runtime/operators/generic/quantized_linear_out.cpp?lines=30-41


so together, it just becomes a switch statement, calling the `quantized_linear` function with the correct template parameter.

However, note that it assumes that both the input activations and weights are the same dtype, which is not the case.

# This Diff
We finish by using the generic implementation for all the backends and adding e2e tests as well as unit tests.

Reviewed By: hsharma35

Differential Revision: D87993325
---
 backends/cadence/aot/quantizer/quantizer.py   |  14 +
 .../op_quantized_conv2d_nchw_out.cpp          |  49 +++
 .../op_quantized_conv2d_nhwc_out.cpp          |  53 ++-
 backends/cadence/hifi/operators/targets.bzl   |   6 +-
 .../tests/test_op_quantized_conv2d_out.cpp    | 304 ++++++++++++++++++
 5 files changed, 421 insertions(+), 5 deletions(-)
 create mode 100644 backends/cadence/hifi/operators/tests/test_op_quantized_conv2d_out.cpp

diff --git a/backends/cadence/aot/quantizer/quantizer.py b/backends/cadence/aot/quantizer/quantizer.py
index 70b16b86fda..7dac4049feb 100644
--- a/backends/cadence/aot/quantizer/quantizer.py
+++ b/backends/cadence/aot/quantizer/quantizer.py
@@ -372,3 +372,17 @@ def __init__(self, quantizers: Optional[list[Quantizer]] = None) -> None:
         # Add 16-bit quantizers for LinearPattern
         quantizers.append(CadenceAtenQuantizer(LinearPattern(), qconfig_A16))
         super().__init__(quantizers)
+
+
+class CadenceWith16BitConvActivationsQuantizer(CadenceQuantizer):
+    """
+    Quantizer including A16 conv
+    """
+
+    def __init__(self, quantizers: Optional[list[Quantizer]] = None) -> None:
+        if quantizers is None:
+            quantizers = []
+        # Add 16-bit quantizers for Conv patterns
+        quantizers.append(CadenceAtenQuantizer(Conv1dPattern(), qconfig_A16))
+        quantizers.append(CadenceAtenQuantizer(Conv2dPattern(), qconfig_A16))
+        super().__init__(quantizers)
diff --git a/backends/cadence/hifi/operators/op_quantized_conv2d_nchw_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv2d_nchw_out.cpp
index 984747d9316..fdc2c9ad5dc 100644
--- a/backends/cadence/hifi/operators/op_quantized_conv2d_nchw_out.cpp
+++ b/backends/cadence/hifi/operators/op_quantized_conv2d_nchw_out.cpp
@@ -9,6 +9,7 @@
 #include <executorch/backends/cadence/hifi/kernels/kernels.h>
 #include <executorch/backends/cadence/hifi/operators/operators.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
+#include <on_device_ai/Assistant/Jarvis/min_runtime/operators/generic/op_quantized_conv2d.h>
 
 #define ALIGN_PTR(x, bytes) ((((unsigned)(x)) + (bytes - 1)) & (~(bytes - 1)))
 
@@ -532,6 +533,30 @@ void quantized_conv2d_nchw_out(
     __ET_UNUSED const Tensor& out_multiplier,
     __ET_UNUSED const Tensor& out_shift,
     Tensor& out) {
+  // Handle W8A16 heterogeneous type (int16_t activations, int8_t weights)
+  if (out.scalar_type() == ::executorch::aten::ScalarType::Short &&
+      input.scalar_type() == ::executorch::aten::ScalarType::Short &&
+      weight.scalar_type() == ::executorch::aten::ScalarType::Char) {
+    ::impl::generic::native::quantized_conv2d_nchw_out(
+        ctx,
+        input,
+        weight,
+        bias,
+        stride,
+        padding,
+        dilation,
+        groups,
+        in_zero_point,
+        weight_zero_point,
+        bias_scale,
+        output_scale,
+        output_zero_point,
+        out_multiplier,
+        out_shift,
+        out);
+    return;
+  }
+
   const float bias_scale_float = bias_scale.const_data_ptr<float>()[0];
   const int32_t weight_zero_point_int =
       weight_zero_point.const_data_ptr<int32_t>()[0];
@@ -596,6 +621,30 @@ void quantized_conv2d_nchw_per_tensor_out(
     __ET_UNUSED int64_t out_multiplier,
     __ET_UNUSED int64_t out_shift,
     Tensor& out) {
+  // Handle W8A16 heterogeneous type (int16_t activations, int8_t weights)
+  if (out.scalar_type() == ::executorch::aten::ScalarType::Short &&
+      input.scalar_type() == ::executorch::aten::ScalarType::Short &&
+      weight.scalar_type() == ::executorch::aten::ScalarType::Char) {
+    ::impl::generic::native::quantized_conv2d_nchw_per_tensor_out(
+        ctx,
+        input,
+        weight,
+        bias,
+        stride,
+        padding,
+        dilation,
+        groups,
+        in_zero_point,
+        weight_zero_point,
+        bias_scale,
+        output_scale,
+        output_zero_point,
+        out_multiplier,
+        out_shift,
+        out);
+    return;
+  }
+
   bool optimized = 0;
 
   if ((input.scalar_type() == ScalarType::Char) ||
diff --git a/backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_out.cpp
index a5d503853c4..1b4870d5f1b 100644
--- a/backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_out.cpp
+++ b/backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_out.cpp
@@ -9,6 +9,7 @@
 #include <executorch/backends/cadence/hifi/kernels/kernels.h>
 #include <executorch/backends/cadence/hifi/operators/operators.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
+#include <on_device_ai/Assistant/Jarvis/min_runtime/operators/generic/op_quantized_conv2d.h>
 
 #define ALIGN_PTR(x, bytes) ((((unsigned)(x)) + (bytes - 1)) & (~(bytes - 1)))
 
@@ -435,9 +436,32 @@ void quantized_conv2d_nhwc_out(
     const Tensor& bias_scale,
     double output_scale,
     int64_t output_zero_point,
-    __ET_UNUSED const Tensor& out_multiplier,
-    __ET_UNUSED const Tensor& out_shift,
+    const Tensor& out_multiplier,
+    const Tensor& out_shift,
     Tensor& out) {
+  // Handle W8A16 heterogeneous type (int16_t activations, int8_t weights)
+  if (out.scalar_type() == ::executorch::aten::ScalarType::Short &&
+      input.scalar_type() == ::executorch::aten::ScalarType::Short &&
+      weight.scalar_type() == ::executorch::aten::ScalarType::Char) {
+    ::impl::generic::native::quantized_conv2d_nhwc_out(
+        ctx,
+        input,
+        weight,
+        bias,
+        stride,
+        padding,
+        dilation,
+        groups,
+        in_zero_point,
+        weight_zero_point,
+        bias_scale,
+        output_scale,
+        output_zero_point,
+        out_multiplier,
+        out_shift,
+        out);
+    return;
+  }
   const float bias_scale_float = bias_scale.const_data_ptr<float>()[0];
   const int32_t weight_zero_point_int =
       weight_zero_point.const_data_ptr<int32_t>()[0];
@@ -502,8 +526,31 @@ void quantized_conv2d_nhwc_per_tensor_out(
     __ET_UNUSED int64_t out_multiplier,
     __ET_UNUSED int64_t out_shift,
     Tensor& out) {
-  bool optimized = 0;
+  // Handle W8A16 heterogeneous type (int16_t activations, int8_t weights)
+  if (out.scalar_type() == ::executorch::aten::ScalarType::Short &&
+      input.scalar_type() == ::executorch::aten::ScalarType::Short &&
+      weight.scalar_type() == ::executorch::aten::ScalarType::Char) {
+    ::impl::generic::native::quantized_conv2d_nhwc_per_tensor_out(
+        ctx,
+        input,
+        weight,
+        bias,
+        stride,
+        padding,
+        dilation,
+        groups,
+        in_zero_point,
+        weight_zero_point,
+        bias_scale,
+        output_scale,
+        output_zero_point,
+        out_multiplier,
+        out_shift,
+        out);
+    return;
+  }
 
+  bool optimized = 0;
   if ((input.scalar_type() == ScalarType::Char) ||
       (input.scalar_type() == ScalarType::Byte))
     optimized = 1;
diff --git a/backends/cadence/hifi/operators/targets.bzl b/backends/cadence/hifi/operators/targets.bzl
index ca36347da30..c993745c4c0 100644
--- a/backends/cadence/hifi/operators/targets.bzl
+++ b/backends/cadence/hifi/operators/targets.bzl
@@ -65,7 +65,6 @@ OPERATORS = [
     "ne",
     "permute_copy",
     "pow",
-    "quantized_conv2d_nchw_out",
     "quantized_conv2d_nchw_asym8sxsym8s_asym8s_per_tensor_out",
     "quantized_conv2d_nchw_asym8uxsym8u_asym8u_per_tensor_out",
     "quantized_conv1d_ncl_asym8sxsym8s_asym8s_per_tensor_out",
@@ -74,7 +73,6 @@ OPERATORS = [
     "quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out",
     "quantized_conv2d_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out",
     "quantized_conv2d_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out",
-    "quantized_conv2d_nhwc_out",
     "quantized_conv2d_nhwc_asym8sxsym8s_asym8s_per_tensor_out",
     "quantized_conv2d_nhwc_asym8uxsym8u_asym8u_per_tensor_out",
     "quantized_conv1d_nlc_asym8sxsym8s_asym8s_per_tensor_out",
@@ -125,3 +123,7 @@ def define_common_targets():
     # quantized_linear_out and quantized_linear_per_tensor_out needs additional dependency for int16 support
     define_operator("quantized_linear_out", deps=["fbcode//on_device_ai/Assistant/Jarvis/min_runtime/operators/generic:op_quantized_linear"])
     define_operator("quantized_linear_per_tensor_out", deps=["fbcode//on_device_ai/Assistant/Jarvis/min_runtime/operators/generic:op_quantized_linear"])
+
+    # quantized_conv2d_nchw_out and quantized_conv2d_nhwc_out need additional dependency for int16 support
+    define_operator("quantized_conv2d_nchw_out", deps=["fbcode//on_device_ai/Assistant/Jarvis/min_runtime/operators/generic:op_quantized_conv2d"])
+    define_operator("quantized_conv2d_nhwc_out", deps=["fbcode//on_device_ai/Assistant/Jarvis/min_runtime/operators/generic:op_quantized_conv2d"])
diff --git a/backends/cadence/hifi/operators/tests/test_op_quantized_conv2d_out.cpp b/backends/cadence/hifi/operators/tests/test_op_quantized_conv2d_out.cpp
new file mode 100644
index 00000000000..70afc030b4c
--- /dev/null
+++ b/backends/cadence/hifi/operators/tests/test_op_quantized_conv2d_out.cpp
@@ -0,0 +1,304 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/cadence/hifi/operators/operators.h>
+
+#include <array>
+
+#include <executorch/kernels/test/TestUtil.h>
+#include <executorch/runtime/core/error.h>
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h>
+#include <executorch/runtime/platform/runtime.h>
+#include <gtest/gtest.h>
+
+namespace impl {
+namespace HiFi {
+namespace native {
+namespace {
+
+using ::executorch::aten::Scalar;
+using ::executorch::aten::ScalarType;
+using ::executorch::aten::Tensor;
+using ::executorch::aten::TensorImpl;
+using ::executorch::runtime::Error;
+using ::executorch::runtime::KernelRuntimeContext;
+using ::executorch::runtime::runtime_init;
+using ::executorch::runtime::testing::TensorFactory;
+
+class HiFiQuantizedConv2dTest : public OperatorTest {
+ public:
+ protected:
+  void quantized_conv2d_nchw_out(
+      const Tensor& input,
+      const Tensor& weight,
+      const Tensor& bias,
+      ::executorch::aten::IntArrayRef stride,
+      ::executorch::aten::IntArrayRef padding,
+      ::executorch::aten::IntArrayRef dilation,
+      int64_t groups,
+      int64_t in_zero_point,
+      const Tensor& weight_zero_point,
+      const Tensor& bias_scale,
+      double output_scale,
+      int64_t output_zero_point,
+      const Tensor& out_multiplier,
+      const Tensor& out_shift,
+      Tensor& output) {
+    return ::impl::HiFi::native::quantized_conv2d_nchw_out(
+        context_,
+        input,
+        weight,
+        bias,
+        stride,
+        padding,
+        dilation,
+        groups,
+        in_zero_point,
+        weight_zero_point,
+        bias_scale,
+        output_scale,
+        output_zero_point,
+        out_multiplier,
+        out_shift,
+        output);
+  }
+
+  void quantized_conv2d_nhwc_out(
+      const Tensor& input,
+      const Tensor& weight,
+      const Tensor& bias,
+      ::executorch::aten::IntArrayRef stride,
+      ::executorch::aten::IntArrayRef padding,
+      ::executorch::aten::IntArrayRef dilation,
+      int64_t groups,
+      int64_t in_zero_point,
+      const Tensor& weight_zero_point,
+      const Tensor& bias_scale,
+      double output_scale,
+      int64_t output_zero_point,
+      const Tensor& out_multiplier,
+      const Tensor& out_shift,
+      Tensor& output) {
+    return ::impl::HiFi::native::quantized_conv2d_nhwc_out(
+        context_,
+        input,
+        weight,
+        bias,
+        stride,
+        padding,
+        dilation,
+        groups,
+        in_zero_point,
+        weight_zero_point,
+        bias_scale,
+        output_scale,
+        output_zero_point,
+        out_multiplier,
+        out_shift,
+        output);
+  }
+};
+
+// Test quantized_conv2d_nchw_out with int16 activations and int8 weights
+TEST_F(HiFiQuantizedConv2dTest, QuantizedConv2dNchwInt16Test) {
+  TensorFactory<ScalarType::Short> tf_int16;
+  TensorFactory<ScalarType::Int> tf_int32;
+  TensorFactory<ScalarType::Char> tf_int8;
+  TensorFactory<ScalarType::Float> tf_float;
+
+  // Minimal test case: input [1, 2, 3, 3], kernel [1, 2, 2, 2] -> output [1, 1,
+  // 2, 2] Small enough to verify by hand calculation
+  //
+  // Input Channel 0 (3x3):     Input Channel 1 (3x3):
+  // 1  2  3                    1  1  1
+  // 4  5  6                    1  1  1
+  // 7  8  9                    1  1  1
+  //
+  // Weight Out Ch 0, In Ch 0:  Weight Out Ch 0, In Ch 1:
+  // 1  0                       1  1
+  // 0  1                       1  1
+  //
+  // Hand calculation for each output position:
+  // (0,0): Ch0: 1*1+2*0+4*0+5*1=6,  Ch1: 1*1+1*1+1*1+1*1=4  -> 10
+  // (0,1): Ch0: 2*1+3*0+5*0+6*1=8,  Ch1: 1*1+1*1+1*1+1*1=4  -> 12
+  // (1,0): Ch0: 4*1+5*0+7*0+8*1=12, Ch1: 1*1+1*1+1*1+1*1=4  -> 16
+  // (1,1): Ch0: 5*1+6*0+8*0+9*1=14, Ch1: 1*1+1*1+1*1+1*1=4  -> 18
+  Tensor input = tf_int16.make(
+      {1, 2, 3, 3},
+      {1,
+       2,
+       3,
+       4,
+       5,
+       6,
+       7,
+       8,
+       9, // Channel 0
+       1,
+       1,
+       1,
+       1,
+       1,
+       1,
+       1,
+       1,
+       1}); // Channel 1
+  Tensor weight = tf_int8.make(
+      {1, 2, 2, 2},
+      {1,
+       0,
+       0,
+       1, // Out Ch 0, In Ch 0: diagonal pattern
+       1,
+       1,
+       1,
+       1}); // Out Ch 0, In Ch 1: all ones
+  Tensor bias = tf_int32.zeros({1});
+
+  // Output dimensions: (3-2)/1+1=2 for each spatial dimension
+  Tensor output = tf_int16.zeros({1, 1, 2, 2});
+
+  int64_t in_zero_point = 0;
+  Tensor weight_zero_point = tf_int32.make({1}, {0});
+  Tensor bias_scale = tf_float.make({1}, {1.0f});
+  double output_scale = 1.0;
+  int64_t output_zero_point = 0;
+  Tensor out_multiplier = tf_int32.make({1}, {1073741824}); // 0.5 * 2^31
+  Tensor out_shift = tf_int32.make({1}, {0});
+
+  std::array<int64_t, 2> stride_arr = {1, 1};
+  std::array<int64_t, 2> padding_arr = {0, 0};
+  std::array<int64_t, 2> dilation_arr = {1, 1};
+
+  ::executorch::aten::ArrayRef<int64_t> stride(stride_arr.data(), 2);
+  ::executorch::aten::ArrayRef<int64_t> padding(padding_arr.data(), 2);
+  ::executorch::aten::ArrayRef<int64_t> dilation(dilation_arr.data(), 2);
+
+  quantized_conv2d_nchw_out(
+      input,
+      weight,
+      bias,
+      stride,
+      padding,
+      dilation,
+      1, // groups
+      in_zero_point,
+      weight_zero_point,
+      bias_scale,
+      output_scale,
+      output_zero_point,
+      out_multiplier,
+      out_shift,
+      output);
+
+  Tensor expected = tf_int16.make({1, 1, 2, 2}, {10, 12, 16, 18});
+  EXPECT_TENSOR_EQ(output, expected);
+}
+
+// Test quantized_conv2d_nhwc_out with int16 activations and int8 weights
+TEST_F(HiFiQuantizedConv2dTest, QuantizedConv2dNhwcInt16Test) {
+  TensorFactory<ScalarType::Short> tf_int16;
+  TensorFactory<ScalarType::Int> tf_int32;
+  TensorFactory<ScalarType::Char> tf_int8;
+  TensorFactory<ScalarType::Float> tf_float;
+
+  // Minimal test case in NHWC format: input [1, 3, 3, 2], kernel [1, 2, 2, 2]
+  // -> output [1, 2, 2, 1] Same values as NCHW test, just different layout
+  //
+  // Input (H=3, W=3, C=2):
+  // Position (h,w): [Ch0, Ch1]
+  // (0,0): [1, 1]  (0,1): [2, 1]  (0,2): [3, 1]
+  // (1,0): [4, 1]  (1,1): [5, 1]  (1,2): [6, 1]
+  // (2,0): [7, 1]  (2,1): [8, 1]  (2,2): [9, 1]
+  //
+  // Weight (Out=1, H=2, W=2, In=2):
+  // For output channel 0:
+  // Position (h,w): [In0, In1]
+  // (0,0): [1, 1]  (0,1): [0, 1]
+  // (1,0): [0, 1]  (1,1): [1, 1]
+  //
+  // Hand calculation matches NCHW test:
+  // Output (0,0): 10, (0,1): 12, (1,0): 16, (1,1): 18
+  Tensor input = tf_int16.make(
+      {1, 3, 3, 2},
+      {1,
+       1,
+       2,
+       1,
+       3,
+       1, // Row 0: (Ch0,Ch1) pairs
+       4,
+       1,
+       5,
+       1,
+       6,
+       1, // Row 1
+       7,
+       1,
+       8,
+       1,
+       9,
+       1}); // Row 2
+  Tensor weight = tf_int8.make(
+      {1, 2, 2, 2},
+      {1,
+       1,
+       0,
+       1, // Row 0: (In0,In1) pairs
+       0,
+       1,
+       1,
+       1}); // Row 1
+  Tensor bias = tf_int32.zeros({1});
+
+  // Output dimensions: (3-2)/1+1=2 for each spatial dimension
+  Tensor output = tf_int16.zeros({1, 2, 2, 1});
+
+  int64_t in_zero_point = 0;
+  Tensor weight_zero_point = tf_int32.make({1}, {0});
+  Tensor bias_scale = tf_float.make({1}, {1.0f});
+  double output_scale = 1.0;
+  int64_t output_zero_point = 0;
+  Tensor out_multiplier = tf_int32.make({1}, {1073741824}); // 0.5 * 2^31
+  Tensor out_shift = tf_int32.make({1}, {0});
+
+  std::array<int64_t, 2> stride_arr = {1, 1};
+  std::array<int64_t, 2> padding_arr = {0, 0};
+  std::array<int64_t, 2> dilation_arr = {1, 1};
+
+  ::executorch::aten::ArrayRef<int64_t> stride(stride_arr.data(), 2);
+  ::executorch::aten::ArrayRef<int64_t> padding(padding_arr.data(), 2);
+  ::executorch::aten::ArrayRef<int64_t> dilation(dilation_arr.data(), 2);
+
+  quantized_conv2d_nhwc_out(
+      input,
+      weight,
+      bias,
+      stride,
+      padding,
+      dilation,
+      1, // groups
+      in_zero_point,
+      weight_zero_point,
+      bias_scale,
+      output_scale,
+      output_zero_point,
+      out_multiplier,
+      out_shift,
+      output);
+
+  Tensor expected = tf_int16.make({1, 2, 2, 1}, {10, 12, 16, 18});
+  EXPECT_TENSOR_EQ(output, expected);
+}
+
+} // namespace
+} // namespace native
+} // namespace HiFi
+} // namespace impl