From bff35a25d00581c72aa55a397544261bc030ba00 Mon Sep 17 00:00:00 2001
From: Hardik Sharma <hardiksharma@meta.com>
Date: Mon, 21 Apr 2025 23:11:42 -0700
Subject: [PATCH] Add tests for op_quantize_per_tensor + add checks for
 quant_min/max (#10300)

Summary:

HiFi4's quantize_per_tensor does not consider quant_min/max. Add checks when quant_min/max out of bounds.

Reviewed By: mcremon-meta

Differential Revision: D73268792
---
 .../hifi/operators/op_quantize_per_tensor.cpp |  37 +++--
 backends/cadence/hifi/operators/operators.h   |  22 +++
 .../tests/test_op_quantize_per_tensor.cpp     | 139 ++++++++++++++++++
 3 files changed, 189 insertions(+), 9 deletions(-)
 create mode 100644 backends/cadence/hifi/operators/tests/test_op_quantize_per_tensor.cpp
diff --git a/backends/cadence/hifi/operators/op_quantize_per_tensor.cpp b/backends/cadence/hifi/operators/op_quantize_per_tensor.cpp
index 4f59ef0ea8a..cd596417916 100644
--- a/backends/cadence/hifi/operators/op_quantize_per_tensor.cpp
+++ b/backends/cadence/hifi/operators/op_quantize_per_tensor.cpp
@@ -6,18 +6,24 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#include <xa_type_def.h>
+
+#include <xa_nnlib_kernels_api.h>
+
 #include <executorch/backends/cadence/hifi/kernels/kernels.h>
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
-#include <xa_nnlib_kernels_api.h>
+#include <executorch/runtime/kernel/kernel_runtime_context.h>
 
 namespace cadence {
 namespace impl {
 namespace HiFi {
 namespace native {
 
-using executorch::aten::ScalarType;
-using executorch::aten::Tensor;
-using executorch::runtime::KernelRuntimeContext;
+using ::executorch::aten::ScalarType;
+using ::executorch::aten::Tensor;
+using ::executorch::runtime::KernelRuntimeContext;
 
 // Quantize the input tensor (PT2 version). Note that quant_<min,max> are not
 // used in any computation.
@@ -25,11 +31,21 @@ void quantize_per_tensor_out(
     KernelRuntimeContext& ctx,
     const Tensor& input,
     double scale,
-    int64_t zero_point,
+    const int64_t zero_point,
     __ET_UNUSED int64_t quant_min,
     __ET_UNUSED int64_t quant_max,
-    ScalarType dtype,
+    const ScalarType dtype,
     Tensor& out) {
+  // Add checks for dtype quant min/max bounds.
+  ET_SWITCH_REALB_TYPES(
+      out.scalar_type(), ctx, "quantize_per_tensor", OUT_DTYPE, [&]() {
+        ET_KERNEL_CHECK(
+            ctx,
+            std::numeric_limits<OUT_DTYPE>::min() == quant_min &&
+                std::numeric_limits<OUT_DTYPE>::max() == quant_max,
+            InvalidArgument, );
+      });
+
   const float* input_data = input.const_data_ptr<float>();
   const size_t numel = out.numel();
   if (out.scalar_type() == ScalarType::Byte) {
@@ -55,10 +71,13 @@ void quantize_per_tensor_out(
     cadence::impl::HiFi::kernels::quantize<int32_t>(
         out_data, input_data, 1. / scale, zero_point, numel);
   } else {
-    ET_CHECK_MSG(
+    ET_KERNEL_CHECK_MSG(
+        ctx,
         false,
-        "Unhandled output dtype %hhd",
-        static_cast<int8_t>(out.scalar_type()));
+        InvalidType,
+        ,
+        "Unhandled output dtype %s",
+        ::torch::executor::toString(out.scalar_type()));
   }
 }
 
diff --git a/backends/cadence/hifi/operators/operators.h b/backends/cadence/hifi/operators/operators.h
index 3ccab6a48d6..fdd87f833f9 100644
--- a/backends/cadence/hifi/operators/operators.h
+++ b/backends/cadence/hifi/operators/operators.h
@@ -11,3 +11,25 @@
 #define ET_FORALL_CADENCE_QUANTIZED_TYPES(_) \
   _(uint8_t, Byte)                           \
   _(int8_t, Char)
+
+namespace cadence {
+namespace impl {
+namespace HiFi {
+namespace native {
+
+// Quantize the input tensor (PT2 version). Note that quant_<min,max> are not
+// used in any computation.
+void quantize_per_tensor_out(
+    ::executorch::runtime::KernelRuntimeContext& ctx,
+    const ::executorch::aten::Tensor& input,
+    double scale,
+    int64_t zero_point,
+    int64_t quant_min,
+    int64_t quant_max,
+    ::executorch::aten::ScalarType dtype,
+    ::executorch::aten::Tensor& out);
+
+} // namespace native
+} // namespace HiFi
+} // namespace impl
+} // namespace cadence
diff --git a/backends/cadence/hifi/operators/tests/test_op_quantize_per_tensor.cpp b/backends/cadence/hifi/operators/tests/test_op_quantize_per_tensor.cpp
new file mode 100644
index 00000000000..b84c81d1d2d
--- /dev/null
+++ b/backends/cadence/hifi/operators/tests/test_op_quantize_per_tensor.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+#include <sys/times.h>
+#include <xtensa/sim.h>
+
+#include <executorch/kernels/test/TestUtil.h>
+#include <executorch/runtime/core/error.h>
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h>
+#include <executorch/runtime/platform/runtime.h>
+
+#include <executorch/backends/cadence/hifi/operators/operators.h>
+
+namespace cadence {
+namespace impl {
+namespace HiFi {
+namespace native {
+namespace {
+
+using ::executorch::aten::Scalar;
+using ::executorch::aten::ScalarType;
+using ::executorch::aten::Tensor;
+using ::executorch::aten::TensorImpl;
+using ::executorch::runtime::Error;
+using ::executorch::runtime::KernelRuntimeContext;
+using ::executorch::runtime::runtime_init;
+using ::executorch::runtime::testing::TensorFactory;
+
+class HiFiQuantizePerTensorTest : public OperatorTest {
+ public:
+ protected:
+  void quantize_per_tensor_out(
+      const Tensor& input,
+      double scale,
+      int64_t zero_point,
+      __ET_UNUSED int64_t quant_min,
+      __ET_UNUSED int64_t quant_max,
+      ScalarType dtype,
+      Tensor& out) {
+    ::cadence::impl::HiFi::native::quantize_per_tensor_out(
+        context_, input, scale, zero_point, quant_min, quant_max, dtype, out);
+  }
+};
+
+TEST_F(HiFiQuantizePerTensorTest, ThrowKernelFailureForQuantMinMoreThanLimit) {
+  TensorFactory<ScalarType::Float> tf;
+  const std::vector<int> sizes{4};
+  constexpr ScalarType kOutDtype = ScalarType::Int;
+  TensorFactory<kOutDtype> tf_out;
+  Tensor out = tf_out.zeros(sizes);
+  // Some arbitrary values for scalar args.
+  constexpr double kScale = 0.01;
+  constexpr int64_t kZeroPoint = 32768;
+  // quant_min and quant_max are not used in the computation.
+  // However, the kernel should still throw a kernel failure error when
+  // quant_min > std::numeric_limits<kOutDtype>::min() or quant_max <
+  // std::numeric_limits<kOutDtype>::max().
+  constexpr int64_t kQuantMin = 10;
+  constexpr int64_t kQuantMax = std::numeric_limits<int32_t>::max();
+
+  ET_EXPECT_KERNEL_FAILURE(
+      context_,
+      quantize_per_tensor_out(
+          tf.make(sizes, {1, 2, 3, 4}),
+          kScale,
+          kZeroPoint,
+          kQuantMin,
+          kQuantMax,
+          kOutDtype,
+          out));
+}
+
+TEST_F(HiFiQuantizePerTensorTest, ThrowKernelFailureForQuantMaxLessThanLimit) {
+  TensorFactory<ScalarType::Float> tf;
+  const std::vector<int> sizes{4};
+  constexpr ScalarType kOutDtype = ScalarType::Int;
+  TensorFactory<kOutDtype> tf_out;
+  Tensor out = tf_out.zeros(sizes);
+  // Some arbitrary values for scalar args.
+  constexpr double kScale = 0.01;
+  constexpr int64_t kZeroPoint = 32768;
+  // quant_min and quant_max are not used in the computation.
+  // However, the kernel should still throw a kernel failure error when
+  // quant_min > std::numeric_limits<kOutDtype>::min() or quant_max <
+  // std::numeric_limits<kOutDtype>::max().
+  constexpr int64_t kQuantMin = std::numeric_limits<int32_t>::min();
+  constexpr int64_t kQuantMax = 20;
+
+  ET_EXPECT_KERNEL_FAILURE(
+      context_,
+      quantize_per_tensor_out(
+          tf.make(sizes, {1, 2, 3, 4}),
+          kScale,
+          kZeroPoint,
+          kQuantMin,
+          kQuantMax,
+          kOutDtype,
+          out));
+}
+
+TEST_F(HiFiQuantizePerTensorTest, CheckSingleElementQuantize) {
+  TensorFactory<ScalarType::Float> tf;
+  const std::vector<int> sizes{1};
+  constexpr ScalarType kOutDtype = ScalarType::Int;
+  TensorFactory<kOutDtype> tf_out;
+  Tensor out = tf_out.zeros(sizes);
+  // Some arbitrary values for scalar args.
+  constexpr double kScale = 0.01;
+  constexpr int64_t kZeroPoint = 32768;
+  constexpr int64_t kQuantMin = std::numeric_limits<int32_t>::min();
+  constexpr int64_t kQuantMax = std::numeric_limits<int32_t>::max();
+  constexpr float kInputValue = 100.0f;
+  constexpr int32_t kExpectedOutputValue =
+      static_cast<int32_t>(kInputValue / kScale + kZeroPoint);
+
+  quantize_per_tensor_out(
+      tf.make(sizes, {kInputValue}),
+      kScale,
+      kZeroPoint,
+      kQuantMin,
+      kQuantMax,
+      kOutDtype,
+      out);
+  EXPECT_TENSOR_EQ(out, tf_out.make(sizes, {kExpectedOutputValue}));
+}
+
+} // namespace
+} // namespace native
+} // namespace HiFi
+} // namespace impl
+} // namespace cadence