Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 32 additions & 19 deletions backends/cadence/fusion_g3/operators/op_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,34 +66,20 @@ Tensor& add_out(
// @lint-ignore CLANGTIDY facebook-hte-CArray
static constexpr const char op_name[] = "add.out";

const exec_aten::ArrayRef<Tensor::SizesType> a_size = a.sizes();
const exec_aten::ArrayRef<Tensor::SizesType> b_size = b.sizes();
const exec_aten::ArrayRef<Tensor::SizesType> out_size = out.sizes();

int kTensorDimensionLimit = 5;

int inp1_shape[kTensorDimensionLimit];
int inp2_shape[kTensorDimensionLimit];
int out_shape[kTensorDimensionLimit];

/*find broadcast*/
const bool a_is_broadcasted = !out.sizes().equals(a.sizes());
const bool b_is_broadcasted = !out.sizes().equals(b.sizes());
const bool broadcast = (a_is_broadcasted || b_is_broadcasted);
bool broadcast = 0;

int max_dim = a.dim() > b.dim() ? a.dim() : b.dim();
max_dim = out.dim() > max_dim ? out.dim() : max_dim;

bool optimized = 1;

if ((a.dim() == 0) || (b.dim() == 0)) {
optimized = 0;
}

if ((broadcast == 1) && (max_dim > kTensorDimensionLimit)) {
optimized = 0;
}

/* Added change to work with input dimensions more than 5 */
for (int i = 0; i < max_dim; i++) {
out_shape[i] = 1;
inp1_shape[i] = 1;
Expand All @@ -114,14 +100,33 @@ Tensor& add_out(
inp2_shape[i + offset_inp2] = b.size(i);
}

/*find broadcast*/
for (int i = 0; i < out.dim(); i++) {
if (((inp1_shape[i]) != (out_shape[i])) ||
((inp2_shape[i]) != (out_shape[i]))) {
broadcast = 1;
}
}

if ((broadcast == 1) && (max_dim > kTensorDimensionLimit)) {
optimized = 0;
}

if ((compute_type == ScalarType::Int) && (optimized)) {
const int* const inp1_data = a.const_data_ptr<int>();
const int* const inp2_data = b.const_data_ptr<int>();
int* const out_data = out.mutable_data_ptr<int>();

int alpha_val;
torch::executor::native::utils::extract_scalar(alpha, &alpha_val);
if (broadcast) {

if ((a.numel() == 1) && (alpha_val == 1)) {
xa_nn_elm_add_scalar_32x32_32(
out_data, inp2_data, inp1_data[0], alpha_val, out.numel());
} else if (b.numel() == 1) {
xa_nn_elm_add_scalar_32x32_32(
out_data, inp1_data, inp2_data[0], alpha_val, out.numel());
} else if (broadcast) {
xa_nn_elm_add_broadcast_5D_32x32_32(
out_data,
out_shape,
Expand All @@ -143,7 +148,13 @@ Tensor& add_out(
float alpha_val;
torch::executor::native::utils::extract_scalar(alpha, &alpha_val);

if (broadcast) {
if ((a.numel() == 1) && (alpha_val == 1.0)) {
xa_nn_elm_add_scalar_f32xf32_f32(
out_data, inp2_data, inp1_data[0], alpha_val, out.numel());
} else if (b.numel() == 1) {
xa_nn_elm_add_scalar_f32xf32_f32(
out_data, inp1_data, inp2_data[0], alpha_val, out.numel());
} else if (broadcast) {
xa_nn_elm_add_broadcast_5D_f32xf32_f32(
out_data,
out_shape,
Expand Down Expand Up @@ -176,7 +187,6 @@ Tensor& add_out(
torch::executor::native::utils::SupportedTensorDtypes::REALHBBF16);
});
}

return out;
}

Expand Down Expand Up @@ -234,6 +244,7 @@ Tensor& add_scalar_out(

xa_nn_elm_add_scalar_32x32_32(
out_data, inp1_data, inp2_val, alpha_val, out.numel());

} else if (compute_type == ScalarType::Float) {
const float* const inp1_data = a.const_data_ptr<float>();
float inp2_val;
Expand All @@ -246,6 +257,7 @@ Tensor& add_scalar_out(

xa_nn_elm_add_scalar_f32xf32_f32(
out_data, inp1_data, inp2_val, alpha_val, out.numel());

} else {
ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
torch::executor::native::utils::
Expand All @@ -266,6 +278,7 @@ Tensor& add_scalar_out(
SAME_AS_COMMON);
});
}

return out;
}

Expand Down
9 changes: 2 additions & 7 deletions backends/cadence/fusion_g3/operators/op_cat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,7 @@ using torch::executor::KernelRuntimeContext;
* updated to have support for below data types, these can be removed and
* operator need to be updated accordingly
*/
enum datatype {
Ushort = 20,
Uint = 23,
};
enum datatype { Ushort = 20, Uint = 23 };

namespace cadence {
namespace impl {
Expand Down Expand Up @@ -118,8 +115,7 @@ Tensor& cat_out(
tensors.size(),
(int)dim,
sizeof(char));
}
if (out.scalar_type() == (ScalarType)Uint) {
} else if (out.scalar_type() == (ScalarType)Uint) {
xa_nn_cat(
out_data,
out_shapes,
Expand Down Expand Up @@ -164,7 +160,6 @@ Tensor& cat_out(
if (all_1d_empty) {
return out;
}

const size_t outer = executorch::runtime::getLeadingDims(out, dim);
const size_t dim_stride = executorch::runtime::getTrailingDims(out, dim);
const size_t ninputs = tensors.size();
Expand Down
47 changes: 28 additions & 19 deletions backends/cadence/fusion_g3/operators/op_mul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,34 +58,20 @@ Tensor& mul_out(
// @lint-ignore CLANGTIDY facebook-hte-CArray
static constexpr const char op_name[] = "mul.out";

const exec_aten::ArrayRef<Tensor::SizesType> a_size = a.sizes();
const exec_aten::ArrayRef<Tensor::SizesType> b_size = b.sizes();
const exec_aten::ArrayRef<Tensor::SizesType> out_size = out.sizes();

int kTensorDimensionLimit = 5;

int inp1_shape[kTensorDimensionLimit];
int inp2_shape[kTensorDimensionLimit];
int out_shape[kTensorDimensionLimit];

/*find broadcast*/
const bool a_is_broadcasted = !out.sizes().equals(a.sizes());
const bool b_is_broadcasted = !out.sizes().equals(b.sizes());
const bool broadcast = (a_is_broadcasted || b_is_broadcasted);
bool broadcast = 0;

int max_dim = a.dim() > b.dim() ? a.dim() : b.dim();
max_dim = out.dim() > max_dim ? out.dim() : max_dim;

bool optimized = 1;

if ((a.dim() == 0) || (b.dim() == 0)) {
optimized = 0;
}

if ((broadcast == 1) && (max_dim > kTensorDimensionLimit)) {
optimized = 0;
}

/* Added change to work with input dimensions more than 5 */
for (int i = 0; i < max_dim; i++) {
out_shape[i] = 1;
inp1_shape[i] = 1;
Expand All @@ -106,12 +92,30 @@ Tensor& mul_out(
inp2_shape[i + offset_inp2] = b.size(i);
}

/*find broadcast*/
for (int i = 0; i < out.dim(); i++) {
if (((inp1_shape[i]) != (out_shape[i])) ||
((inp2_shape[i]) != (out_shape[i]))) {
broadcast = 1;
}
}

if ((broadcast == 1) && (max_dim > kTensorDimensionLimit)) {
optimized = 0;
}

if ((compute_type == ScalarType::Int) && (optimized)) {
const int* const inp1_data = a.const_data_ptr<int>();
const int* const inp2_data = b.const_data_ptr<int>();
int* const out_data = out.mutable_data_ptr<int>();

if (broadcast) {
if (a.numel() == 1) {
xa_nn_elm_mul_scalar_32x32_32(
out_data, inp2_data, inp1_data[0], out.numel());
} else if (b.numel() == 1) {
xa_nn_elm_mul_scalar_32x32_32(
out_data, inp1_data, inp2_data[0], out.numel());
} else if (broadcast) {
xa_nn_elm_mul_broadcast_5D_32x32_32(
out_data,
out_shape,
Expand All @@ -128,7 +132,13 @@ Tensor& mul_out(
const float* const inp2_data = b.const_data_ptr<float>();
float* const out_data = out.mutable_data_ptr<float>();

if (broadcast) {
if (a.numel() == 1) {
xa_nn_elm_mul_scalar_f32xf32_f32(
out_data, inp2_data, inp1_data[0], out.numel());
} else if (b.numel() == 1) {
xa_nn_elm_mul_scalar_f32xf32_f32(
out_data, inp1_data, inp2_data[0], out.numel());
} else if (broadcast) {
xa_nn_elm_mul_broadcast_5D_f32xf32_f32(
out_data,
out_shape,
Expand Down Expand Up @@ -157,7 +167,6 @@ Tensor& mul_out(
torch::executor::native::utils::SupportedTensorDtypes::REALHBBF16);
});
}

return out;
}

Expand Down
13 changes: 6 additions & 7 deletions backends/cadence/fusion_g3/operators/op_quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ enum datatype { Ushort = 20, Bits4u = 21, Bits4 = 22 };
*/
namespace cadence {
namespace impl {
namespace FusionG3 {
namespace G3 {
namespace native {

namespace {
Expand Down Expand Up @@ -364,8 +364,8 @@ void quantize_impl(

#undef ASYM_CALCULATE_FLOAT_TYPE_TENSOR
#undef ASYM_CALCULATE_FLOAT_TYPE_CHANNEL
#undef ASYM_ASYM_QUANTIZE_IMPL_CHANNEL_TENSOR
#undef ASYM_ASYM_QUANTIZE_IMPL_CHANNEL_CHANNEL
#undef ASYM_QUANTIZE_IMPL_TENSOR
#undef ASYM_QUANTIZE_IMPL_CHANNEL
}
} else {
if (out.scalar_type() == ScalarType::Byte) {
Expand Down Expand Up @@ -549,8 +549,8 @@ void quantize_impl(
}
#undef SYM_CALCULATE_FLOAT_TYPE_TENSOR
#undef SYM_CALCULATE_FLOAT_TYPE_CHANNEL
#undef SYM_ASYM_QUANTIZE_IMPL_CHANNEL_TENSOR
#undef SYM_ASYM_QUANTIZE_IMPL_CHANNEL_CHANNEL
#undef SYM_QUANTIZE_IMPL_TENSOR
#undef SYM_QUANTIZE_IMPL_CHANNEL
}
}
}
Expand Down Expand Up @@ -719,7 +719,6 @@ Tensor& quantize_per_channel_out(
axis_ptr,
(int)quant_min,
(int)quant_max);

return out;
}

Expand Down Expand Up @@ -802,6 +801,6 @@ Tensor& quantize_per_token_out(
}

} // namespace native
} // namespace FusionG3
} // namespace G3
} // namespace impl
} // namespace cadence
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: missing new line at end of file

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: missing new line at end of file

All the operator files do not have a new line at the end. Not having a new line at the end is not shown as linter error.