From a5d755e381f0101024981a4da50ebcd121fd0bfd Mon Sep 17 00:00:00 2001 From: Matthias Cremon Date: Fri, 5 Sep 2025 17:24:33 -0700 Subject: [PATCH] Use OSS kernels everywhere (#13884) Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/13884 As titled Reviewed By: zonglinpeng Differential Revision: D81203389 --- backends/cadence/hifi/kernels/targets.bzl | 1 + .../operators/op_quantized_conv_nchw_out.cpp | 13 +++++++------ .../cadence/reference/kernels/kernels.cpp | 19 ++++++++++++++----- .../cadence/reference/kernels/targets.bzl | 1 + 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/backends/cadence/hifi/kernels/targets.bzl b/backends/cadence/hifi/kernels/targets.bzl index 596acc90791..fe5feed69b5 100644 --- a/backends/cadence/hifi/kernels/targets.bzl +++ b/backends/cadence/hifi/kernels/targets.bzl @@ -16,6 +16,7 @@ def define_common_targets(): compatible_with = ["ovr_config//cpu:xtensa"], visibility = [ "//executorch/backends/cadence/...", + "@EXECUTORCH_CLIENTS", ], exported_deps = [ "fbsource//third-party/nnlib-hifi4/xa_nnlib:libxa_nnlib_common", diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nchw_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv_nchw_out.cpp index 297fd30e446..fbc97a4c37b 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv_nchw_out.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv_nchw_out.cpp @@ -12,16 +12,17 @@ #define ALIGN_PTR(x, bytes) ((((unsigned)(x)) + (bytes - 1)) & (~(bytes - 1))) -using Tensor = executorch::aten::Tensor; -using KernelRuntimeContext = torch::executor::KernelRuntimeContext; -using ScalarType = executorch::aten::ScalarType; -using ::executorch::aten::IntArrayRef; - namespace cadence { namespace impl { namespace HiFi { namespace native { +using ::cadence::impl::HiFi::kernels::quantize; +using ::executorch::aten::IntArrayRef; +using ::executorch::aten::ScalarType; +using ::executorch::aten::Tensor; +using ::torch::executor::KernelRuntimeContext; + // This implements a generic 2d conv kernel that operates on raw pointers. // The version handles both quantized and fp32 convolutions. // The input is of shape [n x c x h x w] @@ -145,7 +146,7 @@ __attribute__((noinline)) void conv2d_nchw_core_generic( if (quantized) { float val = bias_scale * acc; out_plane[_oh * ow + _ow] = - kernels::quantize(val, inv_out_scale, out_zero_point); + quantize(val, inv_out_scale, out_zero_point); } else { out_plane[_oh * ow + _ow] = acc; } diff --git a/backends/cadence/reference/kernels/kernels.cpp b/backends/cadence/reference/kernels/kernels.cpp index 9583ffc4a20..ad8746f51eb 100644 --- a/backends/cadence/reference/kernels/kernels.cpp +++ b/backends/cadence/reference/kernels/kernels.cpp @@ -7,10 +7,11 @@ */ #include -#include #include +#include #include #include + namespace impl { namespace reference { namespace kernels { @@ -18,10 +19,18 @@ namespace kernels { // Quantize a fp32 value to an int8_t/uint8_t value template T quantize(const float x, float scale, int32_t zero_point) { - constexpr float min_val = std::numeric_limits::min(); - constexpr float max_val = std::numeric_limits::max(); - float tmp = roundf(x * scale + zero_point); - return std::max(std::min(tmp, max_val), min_val); + // constexpr float min_val = std::numeric_limits::min(); + // constexpr float max_val = std::numeric_limits::max(); + // float tmp = roundf(x * scale + zero_point); + // return std::max(std::min(tmp, max_val), min_val); + // Match Executorch CPU kernel implementation at + // https://fburl.com/code/fxizw6u6 + int64_t qvalue; + qvalue = static_cast(zero_point + std::nearbyint(scale * x)); + + qvalue = std::max(qvalue, std::numeric_limits::min()); + qvalue = std::min(qvalue, std::numeric_limits::max()); + return static_cast(qvalue); } // Quantize an fp32 array to an int8_t/uint8_t array diff --git a/backends/cadence/reference/kernels/targets.bzl b/backends/cadence/reference/kernels/targets.bzl index d3fe3fa39db..d50cfe8f130 100644 --- a/backends/cadence/reference/kernels/targets.bzl +++ b/backends/cadence/reference/kernels/targets.bzl @@ -10,6 +10,7 @@ def define_common_targets(): ], visibility = [ "//executorch/backends/cadence/...", + "@EXECUTORCH_CLIENTS", ], platforms = CXX, )