From bff35a25d00581c72aa55a397544261bc030ba00 Mon Sep 17 00:00:00 2001 From: Hardik Sharma Date: Mon, 21 Apr 2025 23:11:42 -0700 Subject: [PATCH] Add tests for op_quantize_per_tensor + add checks for quant_min/max (#10300) Summary: HiFi4's quantize_per_tensor does not consider quant_min/max. Add checks when quant_min/max out of bounds. Reviewed By: mcremon-meta Differential Revision: D73268792 --- .../hifi/operators/op_quantize_per_tensor.cpp | 37 +++-- backends/cadence/hifi/operators/operators.h | 22 +++ .../tests/test_op_quantize_per_tensor.cpp | 139 ++++++++++++++++++ 3 files changed, 189 insertions(+), 9 deletions(-) create mode 100644 backends/cadence/hifi/operators/tests/test_op_quantize_per_tensor.cpp diff --git a/backends/cadence/hifi/operators/op_quantize_per_tensor.cpp b/backends/cadence/hifi/operators/op_quantize_per_tensor.cpp index 4f59ef0ea8a..cd596417916 100644 --- a/backends/cadence/hifi/operators/op_quantize_per_tensor.cpp +++ b/backends/cadence/hifi/operators/op_quantize_per_tensor.cpp @@ -6,18 +6,24 @@ * LICENSE file in the root directory of this source tree. */ +#include + +#include + #include +#include +#include #include -#include +#include namespace cadence { namespace impl { namespace HiFi { namespace native { -using executorch::aten::ScalarType; -using executorch::aten::Tensor; -using executorch::runtime::KernelRuntimeContext; +using ::executorch::aten::ScalarType; +using ::executorch::aten::Tensor; +using ::executorch::runtime::KernelRuntimeContext; // Quantize the input tensor (PT2 version). Note that quant_ are not // used in any computation. @@ -25,11 +31,21 @@ void quantize_per_tensor_out( KernelRuntimeContext& ctx, const Tensor& input, double scale, - int64_t zero_point, + const int64_t zero_point, __ET_UNUSED int64_t quant_min, __ET_UNUSED int64_t quant_max, - ScalarType dtype, + const ScalarType dtype, Tensor& out) { + // Add checks for dtype quant min/max bounds. + ET_SWITCH_REALB_TYPES( + out.scalar_type(), ctx, "quantize_per_tensor", OUT_DTYPE, [&]() { + ET_KERNEL_CHECK( + ctx, + std::numeric_limits::min() == quant_min && + std::numeric_limits::max() == quant_max, + InvalidArgument, ); + }); + const float* input_data = input.const_data_ptr(); const size_t numel = out.numel(); if (out.scalar_type() == ScalarType::Byte) { @@ -55,10 +71,13 @@ void quantize_per_tensor_out( cadence::impl::HiFi::kernels::quantize( out_data, input_data, 1. / scale, zero_point, numel); } else { - ET_CHECK_MSG( + ET_KERNEL_CHECK_MSG( + ctx, false, - "Unhandled output dtype %hhd", - static_cast(out.scalar_type())); + InvalidType, + , + "Unhandled output dtype %s", + ::torch::executor::toString(out.scalar_type())); } } diff --git a/backends/cadence/hifi/operators/operators.h b/backends/cadence/hifi/operators/operators.h index 3ccab6a48d6..fdd87f833f9 100644 --- a/backends/cadence/hifi/operators/operators.h +++ b/backends/cadence/hifi/operators/operators.h @@ -11,3 +11,25 @@ #define ET_FORALL_CADENCE_QUANTIZED_TYPES(_) \ _(uint8_t, Byte) \ _(int8_t, Char) + +namespace cadence { +namespace impl { +namespace HiFi { +namespace native { + +// Quantize the input tensor (PT2 version). Note that quant_ are not +// used in any computation. +void quantize_per_tensor_out( + ::executorch::runtime::KernelRuntimeContext& ctx, + const ::executorch::aten::Tensor& input, + double scale, + int64_t zero_point, + int64_t quant_min, + int64_t quant_max, + ::executorch::aten::ScalarType dtype, + ::executorch::aten::Tensor& out); + +} // namespace native +} // namespace HiFi +} // namespace impl +} // namespace cadence diff --git a/backends/cadence/hifi/operators/tests/test_op_quantize_per_tensor.cpp b/backends/cadence/hifi/operators/tests/test_op_quantize_per_tensor.cpp new file mode 100644 index 00000000000..b84c81d1d2d --- /dev/null +++ b/backends/cadence/hifi/operators/tests/test_op_quantize_per_tensor.cpp @@ -0,0 +1,139 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +namespace cadence { +namespace impl { +namespace HiFi { +namespace native { +namespace { + +using ::executorch::aten::Scalar; +using ::executorch::aten::ScalarType; +using ::executorch::aten::Tensor; +using ::executorch::aten::TensorImpl; +using ::executorch::runtime::Error; +using ::executorch::runtime::KernelRuntimeContext; +using ::executorch::runtime::runtime_init; +using ::executorch::runtime::testing::TensorFactory; + +class HiFiQuantizePerTensorTest : public OperatorTest { + public: + protected: + void quantize_per_tensor_out( + const Tensor& input, + double scale, + int64_t zero_point, + __ET_UNUSED int64_t quant_min, + __ET_UNUSED int64_t quant_max, + ScalarType dtype, + Tensor& out) { + ::cadence::impl::HiFi::native::quantize_per_tensor_out( + context_, input, scale, zero_point, quant_min, quant_max, dtype, out); + } +}; + +TEST_F(HiFiQuantizePerTensorTest, ThrowKernelFailureForQuantMinMoreThanLimit) { + TensorFactory tf; + const std::vector sizes{4}; + constexpr ScalarType kOutDtype = ScalarType::Int; + TensorFactory tf_out; + Tensor out = tf_out.zeros(sizes); + // Some arbitrary values for scalar args. + constexpr double kScale = 0.01; + constexpr int64_t kZeroPoint = 32768; + // quant_min and quant_max are not used in the computation. + // However, the kernel should still throw a kernel failure error when + // quant_min > std::numeric_limits::min() or quant_max < + // std::numeric_limits::max(). + constexpr int64_t kQuantMin = 10; + constexpr int64_t kQuantMax = std::numeric_limits::max(); + + ET_EXPECT_KERNEL_FAILURE( + context_, + quantize_per_tensor_out( + tf.make(sizes, {1, 2, 3, 4}), + kScale, + kZeroPoint, + kQuantMin, + kQuantMax, + kOutDtype, + out)); +} + +TEST_F(HiFiQuantizePerTensorTest, ThrowKernelFailureForQuantMaxLessThanLimit) { + TensorFactory tf; + const std::vector sizes{4}; + constexpr ScalarType kOutDtype = ScalarType::Int; + TensorFactory tf_out; + Tensor out = tf_out.zeros(sizes); + // Some arbitrary values for scalar args. + constexpr double kScale = 0.01; + constexpr int64_t kZeroPoint = 32768; + // quant_min and quant_max are not used in the computation. + // However, the kernel should still throw a kernel failure error when + // quant_min > std::numeric_limits::min() or quant_max < + // std::numeric_limits::max(). + constexpr int64_t kQuantMin = std::numeric_limits::min(); + constexpr int64_t kQuantMax = 20; + + ET_EXPECT_KERNEL_FAILURE( + context_, + quantize_per_tensor_out( + tf.make(sizes, {1, 2, 3, 4}), + kScale, + kZeroPoint, + kQuantMin, + kQuantMax, + kOutDtype, + out)); +} + +TEST_F(HiFiQuantizePerTensorTest, CheckSingleElementQuantize) { + TensorFactory tf; + const std::vector sizes{1}; + constexpr ScalarType kOutDtype = ScalarType::Int; + TensorFactory tf_out; + Tensor out = tf_out.zeros(sizes); + // Some arbitrary values for scalar args. + constexpr double kScale = 0.01; + constexpr int64_t kZeroPoint = 32768; + constexpr int64_t kQuantMin = std::numeric_limits::min(); + constexpr int64_t kQuantMax = std::numeric_limits::max(); + constexpr float kInputValue = 100.0f; + constexpr int32_t kExpectedOutputValue = + static_cast(kInputValue / kScale + kZeroPoint); + + quantize_per_tensor_out( + tf.make(sizes, {kInputValue}), + kScale, + kZeroPoint, + kQuantMin, + kQuantMax, + kOutDtype, + out); + EXPECT_TENSOR_EQ(out, tf_out.make(sizes, {kExpectedOutputValue})); +} + +} // namespace +} // namespace native +} // namespace HiFi +} // namespace impl +} // namespace cadence