From 33e7495b16cf376c059e47005ae6cc2873ed3eae Mon Sep 17 00:00:00 2001 From: "cmadhira@cadence.com" Date: Mon, 2 Dec 2024 18:27:13 +0530 Subject: [PATCH 1/2] Removed unnecessary conditions in op_add and op_mul Added scalar function call in op_add and op_mul Updated undefining of macros in op_quantize Updated elseif() instead of if() in op_cat Signed-off-by: cmadhira@cadence.com --- .../cadence/fusion_g3/operators/op_add.cpp | 51 ++++++++++++------- .../cadence/fusion_g3/operators/op_cat.cpp | 10 ++-- .../cadence/fusion_g3/operators/op_mul.cpp | 47 ++++++++++------- .../fusion_g3/operators/op_quantize.cpp | 14 ++--- 4 files changed, 70 insertions(+), 52 deletions(-) diff --git a/backends/cadence/fusion_g3/operators/op_add.cpp b/backends/cadence/fusion_g3/operators/op_add.cpp index 9537cbacb70..b5b5baf9a8c 100644 --- a/backends/cadence/fusion_g3/operators/op_add.cpp +++ b/backends/cadence/fusion_g3/operators/op_add.cpp @@ -66,34 +66,20 @@ Tensor& add_out( // @lint-ignore CLANGTIDY facebook-hte-CArray static constexpr const char op_name[] = "add.out"; - const exec_aten::ArrayRef a_size = a.sizes(); - const exec_aten::ArrayRef b_size = b.sizes(); - const exec_aten::ArrayRef out_size = out.sizes(); - int kTensorDimensionLimit = 5; int inp1_shape[kTensorDimensionLimit]; int inp2_shape[kTensorDimensionLimit]; int out_shape[kTensorDimensionLimit]; - /*find broadcast*/ - const bool a_is_broadcasted = !out.sizes().equals(a.sizes()); - const bool b_is_broadcasted = !out.sizes().equals(b.sizes()); - const bool broadcast = (a_is_broadcasted || b_is_broadcasted); + bool broadcast = 0; int max_dim = a.dim() > b.dim() ? a.dim() : b.dim(); max_dim = out.dim() > max_dim ? out.dim() : max_dim; bool optimized = 1; - if ((a.dim() == 0) || (b.dim() == 0)) { - optimized = 0; - } - - if ((broadcast == 1) && (max_dim > kTensorDimensionLimit)) { - optimized = 0; - } - + /* Added change to work with input dimensions more than 5 */ for (int i = 0; i < max_dim; i++) { out_shape[i] = 1; inp1_shape[i] = 1; @@ -114,6 +100,18 @@ Tensor& add_out( inp2_shape[i + offset_inp2] = b.size(i); } + /*find broadcast*/ + for (int i = 0; i < out.dim(); i++) { + if (((inp1_shape[i]) != (out_shape[i])) || + ((inp2_shape[i]) != (out_shape[i]))) { + broadcast = 1; + } + } + + if ((broadcast == 1) && (max_dim > kTensorDimensionLimit)) { + optimized = 0; + } + if ((compute_type == ScalarType::Int) && (optimized)) { const int* const inp1_data = a.const_data_ptr(); const int* const inp2_data = b.const_data_ptr(); @@ -121,7 +119,14 @@ Tensor& add_out( int alpha_val; torch::executor::native::utils::extract_scalar(alpha, &alpha_val); - if (broadcast) { + + if ((a.numel() == 1) && (alpha_val == 1)) { + xa_nn_elm_add_scalar_32x32_32( + out_data, inp2_data, inp1_data[0], alpha_val, out.numel()); + } else if (b.numel() == 1) { + xa_nn_elm_add_scalar_32x32_32( + out_data, inp1_data, inp2_data[0], alpha_val, out.numel()); + } else if (broadcast) { xa_nn_elm_add_broadcast_5D_32x32_32( out_data, out_shape, @@ -143,7 +148,13 @@ Tensor& add_out( float alpha_val; torch::executor::native::utils::extract_scalar(alpha, &alpha_val); - if (broadcast) { + if ((a.numel() == 1) && (alpha_val == 1.0)) { + xa_nn_elm_add_scalar_f32xf32_f32( + out_data, inp2_data, inp1_data[0], alpha_val, out.numel()); + } else if (b.numel() == 1) { + xa_nn_elm_add_scalar_f32xf32_f32( + out_data, inp1_data, inp2_data[0], alpha_val, out.numel()); + } else if (broadcast) { xa_nn_elm_add_broadcast_5D_f32xf32_f32( out_data, out_shape, @@ -176,7 +187,6 @@ Tensor& add_out( torch::executor::native::utils::SupportedTensorDtypes::REALHBBF16); }); } - return out; } @@ -234,6 +244,7 @@ Tensor& add_scalar_out( xa_nn_elm_add_scalar_32x32_32( out_data, inp1_data, inp2_val, alpha_val, out.numel()); + } else if (compute_type == ScalarType::Float) { const float* const inp1_data = a.const_data_ptr(); float inp2_val; @@ -246,6 +257,7 @@ Tensor& add_scalar_out( xa_nn_elm_add_scalar_f32xf32_f32( out_data, inp1_data, inp2_val, alpha_val, out.numel()); + } else { ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { torch::executor::native::utils:: @@ -266,6 +278,7 @@ Tensor& add_scalar_out( SAME_AS_COMMON); }); } + return out; } diff --git a/backends/cadence/fusion_g3/operators/op_cat.cpp b/backends/cadence/fusion_g3/operators/op_cat.cpp index 62bbb0c9d49..11bbcfece22 100644 --- a/backends/cadence/fusion_g3/operators/op_cat.cpp +++ b/backends/cadence/fusion_g3/operators/op_cat.cpp @@ -22,10 +22,7 @@ using torch::executor::KernelRuntimeContext; * updated to have support for below data types, these can be removed and * operator need to be updated accordingly */ -enum datatype { - Ushort = 20, - Uint = 23, -}; +enum datatype { Ushort = 20, Uint = 23 }; namespace cadence { namespace impl { @@ -37,6 +34,7 @@ Tensor& cat_out( exec_aten::ArrayRef tensors, int64_t dim, Tensor& out) { + if (dim < 0) { dim += out.dim(); } @@ -118,8 +116,7 @@ Tensor& cat_out( tensors.size(), (int)dim, sizeof(char)); - } - if (out.scalar_type() == (ScalarType)Uint) { + } else if (out.scalar_type() == (ScalarType)Uint) { xa_nn_cat( out_data, out_shapes, @@ -164,7 +161,6 @@ Tensor& cat_out( if (all_1d_empty) { return out; } - const size_t outer = executorch::runtime::getLeadingDims(out, dim); const size_t dim_stride = executorch::runtime::getTrailingDims(out, dim); const size_t ninputs = tensors.size(); diff --git a/backends/cadence/fusion_g3/operators/op_mul.cpp b/backends/cadence/fusion_g3/operators/op_mul.cpp index 31cd50314e1..914ecf9d7e4 100644 --- a/backends/cadence/fusion_g3/operators/op_mul.cpp +++ b/backends/cadence/fusion_g3/operators/op_mul.cpp @@ -58,34 +58,20 @@ Tensor& mul_out( // @lint-ignore CLANGTIDY facebook-hte-CArray static constexpr const char op_name[] = "mul.out"; - const exec_aten::ArrayRef a_size = a.sizes(); - const exec_aten::ArrayRef b_size = b.sizes(); - const exec_aten::ArrayRef out_size = out.sizes(); - int kTensorDimensionLimit = 5; int inp1_shape[kTensorDimensionLimit]; int inp2_shape[kTensorDimensionLimit]; int out_shape[kTensorDimensionLimit]; - /*find broadcast*/ - const bool a_is_broadcasted = !out.sizes().equals(a.sizes()); - const bool b_is_broadcasted = !out.sizes().equals(b.sizes()); - const bool broadcast = (a_is_broadcasted || b_is_broadcasted); + bool broadcast = 0; int max_dim = a.dim() > b.dim() ? a.dim() : b.dim(); max_dim = out.dim() > max_dim ? out.dim() : max_dim; bool optimized = 1; - if ((a.dim() == 0) || (b.dim() == 0)) { - optimized = 0; - } - - if ((broadcast == 1) && (max_dim > kTensorDimensionLimit)) { - optimized = 0; - } - + /* Added change to work with input dimensions more than 5 */ for (int i = 0; i < max_dim; i++) { out_shape[i] = 1; inp1_shape[i] = 1; @@ -106,12 +92,30 @@ Tensor& mul_out( inp2_shape[i + offset_inp2] = b.size(i); } + /*find broadcast*/ + for (int i = 0; i < out.dim(); i++) { + if (((inp1_shape[i]) != (out_shape[i])) || + ((inp2_shape[i]) != (out_shape[i]))) { + broadcast = 1; + } + } + + if ((broadcast == 1) && (max_dim > kTensorDimensionLimit)) { + optimized = 0; + } + if ((compute_type == ScalarType::Int) && (optimized)) { const int* const inp1_data = a.const_data_ptr(); const int* const inp2_data = b.const_data_ptr(); int* const out_data = out.mutable_data_ptr(); - if (broadcast) { + if (a.numel() == 1) { + xa_nn_elm_mul_scalar_32x32_32( + out_data, inp2_data, inp1_data[0], out.numel()); + } else if (b.numel() == 1) { + xa_nn_elm_mul_scalar_32x32_32( + out_data, inp1_data, inp2_data[0], out.numel()); + } else if (broadcast) { xa_nn_elm_mul_broadcast_5D_32x32_32( out_data, out_shape, @@ -128,7 +132,13 @@ Tensor& mul_out( const float* const inp2_data = b.const_data_ptr(); float* const out_data = out.mutable_data_ptr(); - if (broadcast) { + if (a.numel() == 1) { + xa_nn_elm_mul_scalar_f32xf32_f32( + out_data, inp2_data, inp1_data[0], out.numel()); + } else if (b.numel() == 1) { + xa_nn_elm_mul_scalar_f32xf32_f32( + out_data, inp1_data, inp2_data[0], out.numel()); + } else if (broadcast) { xa_nn_elm_mul_broadcast_5D_f32xf32_f32( out_data, out_shape, @@ -157,7 +167,6 @@ Tensor& mul_out( torch::executor::native::utils::SupportedTensorDtypes::REALHBBF16); }); } - return out; } diff --git a/backends/cadence/fusion_g3/operators/op_quantize.cpp b/backends/cadence/fusion_g3/operators/op_quantize.cpp index 2b8376dc8db..31b00c185ed 100644 --- a/backends/cadence/fusion_g3/operators/op_quantize.cpp +++ b/backends/cadence/fusion_g3/operators/op_quantize.cpp @@ -31,7 +31,7 @@ enum datatype { Ushort = 20, Bits4u = 21, Bits4 = 22 }; */ namespace cadence { namespace impl { -namespace FusionG3 { +namespace G3 { namespace native { namespace { @@ -364,8 +364,8 @@ void quantize_impl( #undef ASYM_CALCULATE_FLOAT_TYPE_TENSOR #undef ASYM_CALCULATE_FLOAT_TYPE_CHANNEL -#undef ASYM_ASYM_QUANTIZE_IMPL_CHANNEL_TENSOR -#undef ASYM_ASYM_QUANTIZE_IMPL_CHANNEL_CHANNEL +#undef ASYM_QUANTIZE_IMPL_TENSOR +#undef ASYM_QUANTIZE_IMPL_CHANNEL } } else { if (out.scalar_type() == ScalarType::Byte) { @@ -549,8 +549,8 @@ void quantize_impl( } #undef SYM_CALCULATE_FLOAT_TYPE_TENSOR #undef SYM_CALCULATE_FLOAT_TYPE_CHANNEL -#undef SYM_ASYM_QUANTIZE_IMPL_CHANNEL_TENSOR -#undef SYM_ASYM_QUANTIZE_IMPL_CHANNEL_CHANNEL +#undef SYM_QUANTIZE_IMPL_TENSOR +#undef SYM_QUANTIZE_IMPL_CHANNEL } } } @@ -565,6 +565,7 @@ Tensor& quantize_per_tensor_out( int64_t quant_max, ScalarType dtype, Tensor& out) { + torch::executor::Error err = resize_tensor(out, input.sizes()); ET_CHECK_MSG( err == torch::executor::Error::Ok, @@ -719,7 +720,6 @@ Tensor& quantize_per_channel_out( axis_ptr, (int)quant_min, (int)quant_max); - return out; } @@ -802,6 +802,6 @@ Tensor& quantize_per_token_out( } } // namespace native -} // namespace FusionG3 +} // namespace G3 } // namespace impl } // namespace cadence \ No newline at end of file From 0ef057f988dd5cb9d69355914484e320823fda4c Mon Sep 17 00:00:00 2001 From: "cmadhira@cadence.com" Date: Tue, 3 Dec 2024 10:17:23 +0530 Subject: [PATCH 2/2] Removed linter errors Signed-off-by: cmadhira@cadence.com --- backends/cadence/fusion_g3/operators/op_cat.cpp | 1 - backends/cadence/fusion_g3/operators/op_quantize.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/backends/cadence/fusion_g3/operators/op_cat.cpp b/backends/cadence/fusion_g3/operators/op_cat.cpp index 11bbcfece22..7fae3fa29c4 100644 --- a/backends/cadence/fusion_g3/operators/op_cat.cpp +++ b/backends/cadence/fusion_g3/operators/op_cat.cpp @@ -34,7 +34,6 @@ Tensor& cat_out( exec_aten::ArrayRef tensors, int64_t dim, Tensor& out) { - if (dim < 0) { dim += out.dim(); } diff --git a/backends/cadence/fusion_g3/operators/op_quantize.cpp b/backends/cadence/fusion_g3/operators/op_quantize.cpp index 31b00c185ed..4b69c5d0f49 100644 --- a/backends/cadence/fusion_g3/operators/op_quantize.cpp +++ b/backends/cadence/fusion_g3/operators/op_quantize.cpp @@ -565,7 +565,6 @@ Tensor& quantize_per_tensor_out( int64_t quant_max, ScalarType dtype, Tensor& out) { - torch::executor::Error err = resize_tensor(out, input.sizes()); ET_CHECK_MSG( err == torch::executor::Error::Ok,