diff --git a/backends/cadence/common/xt_macros.h b/backends/cadence/common/xt_macros.h new file mode 100644 index 00000000000..0d1ee414082 --- /dev/null +++ b/backends/cadence/common/xt_macros.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +#define XT_KERNEL_CHECK(ctx, out, kernel, ...) \ + { \ + const auto ret = kernel(__VA_ARGS__); \ + ET_KERNEL_CHECK_MSG( \ + ctx, \ + ret == 0, \ + InvalidArgument, \ + out, \ + "Failed to run kernel: " #kernel "(" #__VA_ARGS__ ")"); \ + } diff --git a/backends/cadence/fusion_g3/operators/op_add.cpp b/backends/cadence/fusion_g3/operators/op_add.cpp index f6de58e9ac7..b78cc33890b 100644 --- a/backends/cadence/fusion_g3/operators/op_add.cpp +++ b/backends/cadence/fusion_g3/operators/op_add.cpp @@ -10,7 +10,7 @@ #include -#include +#include #include #include #include diff --git a/backends/cadence/fusion_g3/operators/op_cat.cpp b/backends/cadence/fusion_g3/operators/op_cat.cpp index 3ad958b04a6..0c83ebaf0ad 100644 --- a/backends/cadence/fusion_g3/operators/op_cat.cpp +++ b/backends/cadence/fusion_g3/operators/op_cat.cpp @@ -13,7 +13,7 @@ #include -#include +#include #include #include diff --git a/backends/cadence/fusion_g3/operators/op_clamp.cpp b/backends/cadence/fusion_g3/operators/op_clamp.cpp index ffae66af0ef..8eed6b681c2 100644 --- a/backends/cadence/fusion_g3/operators/op_clamp.cpp +++ b/backends/cadence/fusion_g3/operators/op_clamp.cpp @@ -15,7 +15,7 @@ #include -#include +#include #include #include #include diff --git a/backends/cadence/fusion_g3/operators/op_dequantize.cpp b/backends/cadence/fusion_g3/operators/op_dequantize.cpp index 27c374e1a1c..537e3f04ae0 100644 --- a/backends/cadence/fusion_g3/operators/op_dequantize.cpp +++ b/backends/cadence/fusion_g3/operators/op_dequantize.cpp @@ -14,7 +14,7 @@ #include -#include +#include #include #include diff --git a/backends/cadence/fusion_g3/operators/op_div.cpp b/backends/cadence/fusion_g3/operators/op_div.cpp index fb5410a184e..62ebf303ebd 100644 --- a/backends/cadence/fusion_g3/operators/op_div.cpp +++ b/backends/cadence/fusion_g3/operators/op_div.cpp @@ -12,7 +12,7 @@ #include -#include +#include #include #include #include diff --git a/backends/cadence/fusion_g3/operators/op_exp.cpp b/backends/cadence/fusion_g3/operators/op_exp.cpp index 84f5670b320..51d53067668 100644 --- a/backends/cadence/fusion_g3/operators/op_exp.cpp +++ b/backends/cadence/fusion_g3/operators/op_exp.cpp @@ -12,7 +12,7 @@ #include -#include +#include #include #include diff --git a/backends/cadence/fusion_g3/operators/op_hardtanh.cpp b/backends/cadence/fusion_g3/operators/op_hardtanh.cpp index 21570d9cb54..b930098fb24 100644 --- a/backends/cadence/fusion_g3/operators/op_hardtanh.cpp +++ b/backends/cadence/fusion_g3/operators/op_hardtanh.cpp @@ -11,7 +11,7 @@ #include -#include +#include #include #include #include diff --git a/backends/cadence/fusion_g3/operators/op_lt.cpp b/backends/cadence/fusion_g3/operators/op_lt.cpp index 2aafeb09ddd..850552f1d3b 100644 --- a/backends/cadence/fusion_g3/operators/op_lt.cpp +++ b/backends/cadence/fusion_g3/operators/op_lt.cpp @@ -10,7 +10,7 @@ #include -#include +#include #include using ::executorch::aten::Scalar; diff --git a/backends/cadence/fusion_g3/operators/op_mean.cpp b/backends/cadence/fusion_g3/operators/op_mean.cpp index e18a21b4e0c..cefd45f6ef8 100644 --- a/backends/cadence/fusion_g3/operators/op_mean.cpp +++ b/backends/cadence/fusion_g3/operators/op_mean.cpp @@ -10,7 +10,7 @@ #include -#include +#include #include #include #include diff --git a/backends/cadence/fusion_g3/operators/op_mul.cpp b/backends/cadence/fusion_g3/operators/op_mul.cpp index ea78ea11512..a4a230a374f 100644 --- a/backends/cadence/fusion_g3/operators/op_mul.cpp +++ b/backends/cadence/fusion_g3/operators/op_mul.cpp @@ -10,7 +10,7 @@ #include -#include +#include #include #include #include diff --git a/backends/cadence/fusion_g3/operators/op_native_layer_norm.cpp b/backends/cadence/fusion_g3/operators/op_native_layer_norm.cpp index 9ae0a974df9..aa25cec9230 100644 --- a/backends/cadence/fusion_g3/operators/op_native_layer_norm.cpp +++ b/backends/cadence/fusion_g3/operators/op_native_layer_norm.cpp @@ -13,7 +13,7 @@ #include -#include +#include #include #include #include diff --git a/backends/cadence/fusion_g3/operators/op_permute_copy.cpp b/backends/cadence/fusion_g3/operators/op_permute_copy.cpp index a8195b8bacd..5b1d079f92e 100644 --- a/backends/cadence/fusion_g3/operators/op_permute_copy.cpp +++ b/backends/cadence/fusion_g3/operators/op_permute_copy.cpp @@ -11,7 +11,7 @@ #include -#include +#include #include #include diff --git a/backends/cadence/fusion_g3/operators/op_quantize.cpp b/backends/cadence/fusion_g3/operators/op_quantize.cpp index 670e6dcb358..26f90ddf5d1 100644 --- a/backends/cadence/fusion_g3/operators/op_quantize.cpp +++ b/backends/cadence/fusion_g3/operators/op_quantize.cpp @@ -14,7 +14,7 @@ #include -#include +#include #include #include diff --git a/backends/cadence/fusion_g3/operators/op_rsqrt.cpp b/backends/cadence/fusion_g3/operators/op_rsqrt.cpp index 0988fe946e3..a9017397687 100644 --- a/backends/cadence/fusion_g3/operators/op_rsqrt.cpp +++ b/backends/cadence/fusion_g3/operators/op_rsqrt.cpp @@ -10,7 +10,7 @@ #include -#include +#include #include #include #include diff --git a/backends/cadence/fusion_g3/operators/op_sigmoid.cpp b/backends/cadence/fusion_g3/operators/op_sigmoid.cpp index 08dc735a8af..0ded70926eb 100644 --- a/backends/cadence/fusion_g3/operators/op_sigmoid.cpp +++ b/backends/cadence/fusion_g3/operators/op_sigmoid.cpp @@ -12,7 +12,7 @@ #include -#include +#include #include #include #include diff --git a/backends/cadence/fusion_g3/operators/op_slice_copy.cpp b/backends/cadence/fusion_g3/operators/op_slice_copy.cpp index 2bb42f49169..a97f9beb0c7 100644 --- a/backends/cadence/fusion_g3/operators/op_slice_copy.cpp +++ b/backends/cadence/fusion_g3/operators/op_slice_copy.cpp @@ -13,7 +13,7 @@ #include -#include +#include #include #include diff --git a/backends/cadence/fusion_g3/operators/op_softmax.cpp b/backends/cadence/fusion_g3/operators/op_softmax.cpp index 37a0f227193..1faf41c94a8 100644 --- a/backends/cadence/fusion_g3/operators/op_softmax.cpp +++ b/backends/cadence/fusion_g3/operators/op_softmax.cpp @@ -12,7 +12,7 @@ #include -#include +#include #include #include #include diff --git a/backends/cadence/fusion_g3/operators/op_sqrt.cpp b/backends/cadence/fusion_g3/operators/op_sqrt.cpp index cf68b95e7cf..584d94d78a1 100644 --- a/backends/cadence/fusion_g3/operators/op_sqrt.cpp +++ b/backends/cadence/fusion_g3/operators/op_sqrt.cpp @@ -12,7 +12,7 @@ #include -#include +#include #include #include #include diff --git a/backends/cadence/fusion_g3/operators/op_sub.cpp b/backends/cadence/fusion_g3/operators/op_sub.cpp index b90b2fa2ed5..0b5bee9a651 100644 --- a/backends/cadence/fusion_g3/operators/op_sub.cpp +++ b/backends/cadence/fusion_g3/operators/op_sub.cpp @@ -10,7 +10,7 @@ #include -#include +#include #include #include #include diff --git a/backends/cadence/fusion_g3/operators/op_tanh.cpp b/backends/cadence/fusion_g3/operators/op_tanh.cpp index 5015995e925..9686dc7caa9 100644 --- a/backends/cadence/fusion_g3/operators/op_tanh.cpp +++ b/backends/cadence/fusion_g3/operators/op_tanh.cpp @@ -12,7 +12,7 @@ #include -#include +#include #include #include #include diff --git a/backends/cadence/fusion_g3/operators/op_transpose_copy.cpp b/backends/cadence/fusion_g3/operators/op_transpose_copy.cpp index d27c82b4ff3..4bff24cbfe5 100644 --- a/backends/cadence/fusion_g3/operators/op_transpose_copy.cpp +++ b/backends/cadence/fusion_g3/operators/op_transpose_copy.cpp @@ -11,7 +11,7 @@ #include -#include +#include #include #include diff --git a/backends/cadence/fusion_g3/operators/op_where.cpp b/backends/cadence/fusion_g3/operators/op_where.cpp index 8d92be32419..4351e8bd684 100644 --- a/backends/cadence/fusion_g3/operators/op_where.cpp +++ b/backends/cadence/fusion_g3/operators/op_where.cpp @@ -10,7 +10,7 @@ #include -#include +#include #include #include diff --git a/backends/cadence/fusion_g3/operators/targets.bzl b/backends/cadence/fusion_g3/operators/targets.bzl index bc0a01b4fe8..dd04bd1223b 100644 --- a/backends/cadence/fusion_g3/operators/targets.bzl +++ b/backends/cadence/fusion_g3/operators/targets.bzl @@ -10,8 +10,11 @@ def define_operator(name: str, deps: list[str] | None = None) -> None: "//executorch/kernels/portable/cpu/pattern:all_deps", "//executorch/runtime/kernel:kernel_includes", "//executorch/kernels/portable/cpu:scalar_utils", + "//executorch/backends/cadence/common:xt_macros", "fbsource//third-party/nnlib-FusionG3/xa_nnlib:libxa_nnlib_common", "fbsource//third-party/nnlib-FusionG3/xa_nnlib:libxa_nnlib", + ":operators_header", + ":xt_utils", ] if deps == None: deps = [] @@ -26,11 +29,6 @@ def define_operator(name: str, deps: list[str] | None = None) -> None: ], compatible_with = ["ovr_config//cpu:xtensa"], deps = deps + common_deps, - exported_deps = [ - ":operators_header", - ":xt_macros", - ":xt_utils", - ], ) OPERATORS = [ @@ -79,18 +77,6 @@ def define_common_targets(): ], ) - runtime.cxx_library( - name = "xt_macros", - exported_headers = ["xt_macros.h"], - visibility = [ - "//executorch/backends/cadence/...", - ], - exported_deps = [ - "//executorch/runtime/core/exec_aten:lib", - "//executorch/runtime/kernel:kernel_runtime_context", - ], - ) - runtime.cxx_library( name = "xt_utils", exported_headers = ["xt_utils.h"], diff --git a/backends/cadence/fusion_g3/operators/xt_macros.h b/backends/cadence/fusion_g3/operators/xt_macros.h deleted file mode 100644 index 4ab99380a2d..00000000000 --- a/backends/cadence/fusion_g3/operators/xt_macros.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once - -#include - -#define XT_KERNEL_CHECK(ctx, out, kernel, ...) \ - const auto ret = kernel(__VA_ARGS__); \ - ET_KERNEL_CHECK_MSG( \ - ctx, \ - ret == 0, \ - InvalidArgument, \ - out, \ - "Failed to run kernel: " #kernel "(" #__VA_ARGS__ ")"); diff --git a/backends/cadence/hifi/operators/op_add.cpp b/backends/cadence/hifi/operators/op_add.cpp index f51fddf31db..445cf3d9f2b 100644 --- a/backends/cadence/hifi/operators/op_add.cpp +++ b/backends/cadence/hifi/operators/op_add.cpp @@ -16,6 +16,8 @@ #include #include +#include + using executorch::aten::Scalar; using executorch::aten::ScalarType; using executorch::aten::Tensor; @@ -184,10 +186,25 @@ Tensor& add_out( for (int i = 0; i < b.dim(); i++) inp2_shape[i + off_b] = b.size(i); - xa_nn_elm_add_broadcast_4D_f32xf32_f32( - out_data, out_shape, a_data, inp1_shape, b_data, inp2_shape); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_elm_add_broadcast_4D_f32xf32_f32, + out_data, + out_shape, + a_data, + inp1_shape, + b_data, + inp2_shape); } else { - xa_nn_elm_add_f32xf32_f32(out_data, a_data, b_data, out.numel()); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_elm_add_f32xf32_f32, + out_data, + a_data, + b_data, + out.numel()); } return out; diff --git a/backends/cadence/hifi/operators/op_atan2.cpp b/backends/cadence/hifi/operators/op_atan2.cpp index cd412bc5c39..1546c1e3a7f 100644 --- a/backends/cadence/hifi/operators/op_atan2.cpp +++ b/backends/cadence/hifi/operators/op_atan2.cpp @@ -12,6 +12,8 @@ #include #include +#include + using executorch::aten::ScalarType; using executorch::aten::Tensor; using executorch::runtime::isFloatingType; @@ -181,7 +183,15 @@ Tensor& atan2_out( for (int i = 0; i < b_dim; i++) p_inp1_shape[i] = b.size(i); - xa_nn_broadcast_32_32(ptr1, p_out_shape, pin1, p_inp1_shape, out_dim); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_broadcast_32_32, + ptr1, + p_out_shape, + pin1, + p_inp1_shape, + out_dim); FLOAT32* __restrict__ p_out = (FLOAT32* __restrict__)out.mutable_data_ptr(); diff --git a/backends/cadence/hifi/operators/op_bitwise_and.cpp b/backends/cadence/hifi/operators/op_bitwise_and.cpp index 85db3b164d7..82b29b8bcd1 100644 --- a/backends/cadence/hifi/operators/op_bitwise_and.cpp +++ b/backends/cadence/hifi/operators/op_bitwise_and.cpp @@ -14,6 +14,8 @@ #include #include +#include + using exec_aten::Scalar; using exec_aten::ScalarType; using exec_aten::Tensor; @@ -96,14 +98,37 @@ Tensor& bitwise_and_Tensor_out( for (int i = 0; i < b_dim; i++) p_inp2_shape[i] = b.size(i); - xa_nn_broadcast_8_8(ptr1, p_out_shape, pin1, p_inp1_shape, out_dim); - - xa_nn_broadcast_8_8(ptr2, p_out_shape, pin2, p_inp2_shape, out_dim); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_broadcast_8_8, + ptr1, + p_out_shape, + pin1, + p_inp1_shape, + out_dim); + + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_broadcast_8_8, + ptr2, + p_out_shape, + pin2, + p_inp2_shape, + out_dim); const WORD8* __restrict__ p_inp1 = (const WORD8* __restrict__)ptr1; const WORD8* __restrict__ p_inp2 = (const WORD8* __restrict__)ptr2; - xa_nn_elm_logicaland_boolxbool_bool(p_out, p_inp1, p_inp2, num_elm); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_elm_logicaland_boolxbool_bool, + p_out, + p_inp1, + p_inp2, + num_elm); } else if (a_is_broadcasted && !b_is_broadcasted) { WORD8* __restrict__ ptr1 = (WORD8* __restrict__)kernels::allocate_temp_memory(ctx, num_elm); @@ -124,11 +149,26 @@ Tensor& bitwise_and_Tensor_out( for (int i = 0; i < a_dim; i++) p_inp1_shape[i] = a.size(i); - xa_nn_broadcast_8_8(ptr1, p_out_shape, pin1, p_inp1_shape, out_dim); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_broadcast_8_8, + ptr1, + p_out_shape, + pin1, + p_inp1_shape, + out_dim); const WORD8* __restrict__ p_inp1 = (const WORD8* __restrict__)ptr1; - xa_nn_elm_logicaland_boolxbool_bool(p_out, p_inp1, p_inp2, num_elm); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_elm_logicaland_boolxbool_bool, + p_out, + p_inp1, + p_inp2, + num_elm); } else if (!a_is_broadcasted && b_is_broadcasted) { WORD8* __restrict__ ptr1 = (WORD8* __restrict__)kernels::allocate_temp_memory(ctx, num_elm); @@ -149,11 +189,26 @@ Tensor& bitwise_and_Tensor_out( for (int i = 0; i < b_dim; i++) p_inp2_shape[i] = b.size(i); - xa_nn_broadcast_8_8(ptr1, p_out_shape, pinp2, p_inp2_shape, out_dim); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_broadcast_8_8, + ptr1, + p_out_shape, + pinp2, + p_inp2_shape, + out_dim); const WORD8* __restrict__ p_inp2 = (const WORD8* __restrict__)ptr1; - xa_nn_elm_logicaland_boolxbool_bool(p_out, p_inp1, p_inp2, num_elm); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_elm_logicaland_boolxbool_bool, + p_out, + p_inp1, + p_inp2, + num_elm); } else { const WORD8* __restrict__ p_inp1 = (const WORD8* __restrict__)a.const_data_ptr(); diff --git a/backends/cadence/hifi/operators/op_bitwise_or.cpp b/backends/cadence/hifi/operators/op_bitwise_or.cpp index 3b717620202..9a9722aa6a0 100644 --- a/backends/cadence/hifi/operators/op_bitwise_or.cpp +++ b/backends/cadence/hifi/operators/op_bitwise_or.cpp @@ -14,6 +14,8 @@ #include #include +#include + using exec_aten::Scalar; using exec_aten::ScalarType; using exec_aten::Tensor; @@ -96,14 +98,37 @@ Tensor& bitwise_or_Tensor_out( for (int i = 0; i < b_dim; i++) p_inp2_shape[i] = b.size(i); - xa_nn_broadcast_8_8(ptr1, p_out_shape, pin1, p_inp1_shape, out_dim); - - xa_nn_broadcast_8_8(ptr2, p_out_shape, pin2, p_inp2_shape, out_dim); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_broadcast_8_8, + ptr1, + p_out_shape, + pin1, + p_inp1_shape, + out_dim); + + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_broadcast_8_8, + ptr2, + p_out_shape, + pin2, + p_inp2_shape, + out_dim); const WORD8* __restrict__ p_inp1 = (const WORD8* __restrict__)ptr1; const WORD8* __restrict__ p_inp2 = (const WORD8* __restrict__)ptr2; - xa_nn_elm_logicalor_boolxbool_bool(p_out, p_inp1, p_inp2, num_elm); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_elm_logicalor_boolxbool_bool, + p_out, + p_inp1, + p_inp2, + num_elm); } else if (a_is_broadcasted && !b_is_broadcasted) { WORD8* __restrict__ ptr1 = (WORD8* __restrict__)kernels::allocate_temp_memory(ctx, num_elm); @@ -124,11 +149,26 @@ Tensor& bitwise_or_Tensor_out( for (int i = 0; i < a_dim; i++) p_inp1_shape[i] = a.size(i); - xa_nn_broadcast_8_8(ptr1, p_out_shape, pin1, p_inp1_shape, out_dim); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_broadcast_8_8, + ptr1, + p_out_shape, + pin1, + p_inp1_shape, + out_dim); const WORD8* __restrict__ p_inp1 = (const WORD8* __restrict__)ptr1; - xa_nn_elm_logicalor_boolxbool_bool(p_out, p_inp1, p_inp2, num_elm); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_elm_logicalor_boolxbool_bool, + p_out, + p_inp1, + p_inp2, + num_elm); } else if (!a_is_broadcasted && b_is_broadcasted) { WORD8* __restrict__ ptr1 = (WORD8* __restrict__)kernels::allocate_temp_memory(ctx, num_elm); @@ -149,11 +189,26 @@ Tensor& bitwise_or_Tensor_out( for (int i = 0; i < b_dim; i++) p_inp2_shape[i] = b.size(i); - xa_nn_broadcast_8_8(ptr1, p_out_shape, pinp2, p_inp2_shape, out_dim); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_broadcast_8_8, + ptr1, + p_out_shape, + pinp2, + p_inp2_shape, + out_dim); const WORD8* __restrict__ p_inp2 = (const WORD8* __restrict__)ptr1; - xa_nn_elm_logicalor_boolxbool_bool(p_out, p_inp1, p_inp2, num_elm); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_elm_logicalor_boolxbool_bool, + p_out, + p_inp1, + p_inp2, + num_elm); } else { const WORD8* __restrict__ p_inp1 = (const WORD8* __restrict__)a.const_data_ptr(); diff --git a/backends/cadence/hifi/operators/op_bitwise_xor.cpp b/backends/cadence/hifi/operators/op_bitwise_xor.cpp index d71045038e7..66b9e8cc7fe 100644 --- a/backends/cadence/hifi/operators/op_bitwise_xor.cpp +++ b/backends/cadence/hifi/operators/op_bitwise_xor.cpp @@ -14,6 +14,8 @@ #include #include +#include + using exec_aten::Scalar; using exec_aten::ScalarType; using exec_aten::Tensor; @@ -96,14 +98,37 @@ Tensor& bitwise_xor_Tensor_out( for (int i = 0; i < b_dim; i++) p_inp2_shape[i] = b.size(i); - xa_nn_broadcast_8_8(ptr1, p_out_shape, pin1, p_inp1_shape, out_dim); - - xa_nn_broadcast_8_8(ptr2, p_out_shape, pin2, p_inp2_shape, out_dim); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_broadcast_8_8, + ptr1, + p_out_shape, + pin1, + p_inp1_shape, + out_dim); + + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_broadcast_8_8, + ptr2, + p_out_shape, + pin2, + p_inp2_shape, + out_dim); const WORD8* __restrict__ p_inp1 = (const WORD8* __restrict__)ptr1; const WORD8* __restrict__ p_inp2 = (const WORD8* __restrict__)ptr2; - xa_nn_elm_logicalxor_boolxbool_bool(p_out, p_inp1, p_inp2, num_elm); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_elm_logicalxor_boolxbool_bool, + p_out, + p_inp1, + p_inp2, + num_elm); } else if (a_is_broadcasted && !b_is_broadcasted) { WORD8* __restrict__ ptr1 = (WORD8* __restrict__)kernels::allocate_temp_memory(ctx, num_elm); @@ -124,11 +149,26 @@ Tensor& bitwise_xor_Tensor_out( for (int i = 0; i < a_dim; i++) p_inp1_shape[i] = a.size(i); - xa_nn_broadcast_8_8(ptr1, p_out_shape, pin1, p_inp1_shape, out_dim); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_broadcast_8_8, + ptr1, + p_out_shape, + pin1, + p_inp1_shape, + out_dim); const WORD8* __restrict__ p_inp1 = (const WORD8* __restrict__)ptr1; - xa_nn_elm_logicalxor_boolxbool_bool(p_out, p_inp1, p_inp2, num_elm); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_elm_logicalxor_boolxbool_bool, + p_out, + p_inp1, + p_inp2, + num_elm); } else if (!a_is_broadcasted && b_is_broadcasted) { WORD8* __restrict__ ptr1 = (WORD8* __restrict__)kernels::allocate_temp_memory(ctx, num_elm); @@ -149,11 +189,26 @@ Tensor& bitwise_xor_Tensor_out( for (int i = 0; i < b_dim; i++) p_inp2_shape[i] = b.size(i); - xa_nn_broadcast_8_8(ptr1, p_out_shape, pinp2, p_inp2_shape, out_dim); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_broadcast_8_8, + ptr1, + p_out_shape, + pinp2, + p_inp2_shape, + out_dim); const WORD8* __restrict__ p_inp2 = (const WORD8* __restrict__)ptr1; - xa_nn_elm_logicalxor_boolxbool_bool(p_out, p_inp1, p_inp2, num_elm); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_elm_logicalxor_boolxbool_bool, + p_out, + p_inp1, + p_inp2, + num_elm); } else { const WORD8* __restrict__ p_inp1 = (const WORD8* __restrict__)a.const_data_ptr(); diff --git a/backends/cadence/hifi/operators/op_clamp.cpp b/backends/cadence/hifi/operators/op_clamp.cpp index dd9bcff8a0c..e3d5c8914a4 100644 --- a/backends/cadence/hifi/operators/op_clamp.cpp +++ b/backends/cadence/hifi/operators/op_clamp.cpp @@ -20,6 +20,8 @@ #include #include +#include + using executorch::aten::RuntimeContext; using executorch::aten::Scalar; using executorch::aten::ScalarType; @@ -247,8 +249,15 @@ Tensor& clamp_Tensor_out( ctx, p_scratch != nullptr, MemoryAllocationFailed, out); const FLOAT32* p_brd_cond = (const FLOAT32*)p_scratch; - xa_nn_broadcast_32_32( - (WORD32*)p_brd_cond, out_shape, (WORD32*)inp_data, inp_shape, 4); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_broadcast_32_32, + (WORD32*)p_brd_cond, + out_shape, + (WORD32*)inp_data, + inp_shape, + 4); for (int i = 0; i < 4; i++) { inp_shape[i] = out_shape[i]; diff --git a/backends/cadence/hifi/operators/op_dequantize_per_tensor.cpp b/backends/cadence/hifi/operators/op_dequantize_per_tensor.cpp index 30ce938e24d..c091d216556 100644 --- a/backends/cadence/hifi/operators/op_dequantize_per_tensor.cpp +++ b/backends/cadence/hifi/operators/op_dequantize_per_tensor.cpp @@ -10,6 +10,8 @@ #include #include +#include + namespace impl { namespace HiFi { namespace native { @@ -24,8 +26,8 @@ void dequantize_per_tensor_out( const Tensor& input, double scale, int64_t zero_point, - __ET_UNUSED int64_t quant_min, - __ET_UNUSED int64_t quant_max, + ET_UNUSED int64_t quant_min, + ET_UNUSED int64_t quant_max, ScalarType dtype, Tensor& out) { float* out_data = out.mutable_data_ptr(); @@ -35,8 +37,15 @@ void dequantize_per_tensor_out( dequantize(out_data, input_data, scale, zero_point, numel); } else if (input.scalar_type() == ScalarType::Char) { const int8_t* input_data = input.const_data_ptr(); - xa_nn_elm_dequantize_asym8s_f32( - out_data, input_data, zero_point, scale, numel); + XT_KERNEL_CHECK( + ctx, + , + xa_nn_elm_dequantize_asym8s_f32, + out_data, + input_data, + zero_point, + scale, + numel); } else if (input.scalar_type() == ScalarType::Short) { const int16_t* input_data = input.const_data_ptr(); dequantize(out_data, input_data, scale, zero_point, numel); diff --git a/backends/cadence/hifi/operators/op_pow.cpp b/backends/cadence/hifi/operators/op_pow.cpp index 65bb0fba56f..e5b31cc7731 100644 --- a/backends/cadence/hifi/operators/op_pow.cpp +++ b/backends/cadence/hifi/operators/op_pow.cpp @@ -9,13 +9,14 @@ #include #include -#include #include #include #include #include #include +#include + using executorch::aten::Scalar; using executorch::aten::ScalarType; using executorch::aten::Tensor; @@ -121,9 +122,25 @@ Tensor& pow_Tensor_Tensor_out( for (int i = 0; i < b_dim; i++) p_inp2_shape[i] = b.size(i); - xa_nn_broadcast_32_32(ptr1, p_out_shape, pin1, p_inp1_shape, out_dim); - - xa_nn_broadcast_32_32(ptr2, p_out_shape, pin2, p_inp2_shape, out_dim); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_broadcast_32_32, + ptr1, + p_out_shape, + pin1, + p_inp1_shape, + out_dim); + + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_broadcast_32_32, + ptr2, + p_out_shape, + pin2, + p_inp2_shape, + out_dim); FLOAT32* __restrict__ p_out = (FLOAT32* __restrict__)out.mutable_data_ptr(); @@ -150,8 +167,15 @@ Tensor& pow_Tensor_Tensor_out( for (int i = 0; i < a_dim; i++) p_inp1_shape[i] = a.size(i); - xa_nn_broadcast_32_32( - (WORD32*)ptr1, p_out_shape, (WORD32*)pin1, p_inp1_shape, out_dim); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_broadcast_32_32, + (WORD32*)ptr1, + p_out_shape, + (WORD32*)pin1, + p_inp1_shape, + out_dim); FLOAT32* __restrict__ p_out = (FLOAT32* __restrict__)out.mutable_data_ptr(); @@ -179,7 +203,15 @@ Tensor& pow_Tensor_Tensor_out( for (int i = 0; i < b_dim; i++) p_inp1_shape[i] = b.size(i); - xa_nn_broadcast_32_32(ptr1, p_out_shape, pin1, p_inp1_shape, out_dim); + XT_KERNEL_CHECK( + ctx, + out, + xa_nn_broadcast_32_32, + ptr1, + p_out_shape, + pin1, + p_inp1_shape, + out_dim); FLOAT32* __restrict__ p_out = (FLOAT32* __restrict__)out.mutable_data_ptr(); diff --git a/backends/cadence/hifi/operators/targets.bzl b/backends/cadence/hifi/operators/targets.bzl index 1f9814c4a4e..a25dfd1bcbc 100644 --- a/backends/cadence/hifi/operators/targets.bzl +++ b/backends/cadence/hifi/operators/targets.bzl @@ -16,7 +16,8 @@ def define_operator(name: str, deps: list[str] | None = None) -> None: "//executorch/kernels/portable/cpu/util:elementwise_util", "//executorch/kernels/portable/cpu/pattern:bitwise_op", "//executorch/backends/cadence/hifi/third-party/nnlib:nnlib-extensions", - "//executorch/kernels/portable/cpu/pattern:comparison_op" + "//executorch/kernels/portable/cpu/pattern:comparison_op", + "//executorch/backends/cadence/common:xt_macros" ] if deps == None: deps = []