Skip to content

Commit a0bdd97

Browse files
author
cmadhira@cadence.com
committed
resovled all name space issues. Changed Bits16 to Uint16 in quantize and dequantize operators. Reduced the size of scratch memory in mean operator
1 parent 6ace5a3 commit a0bdd97

File tree

13 files changed

+118
-276
lines changed

13 files changed

+118
-276
lines changed

backends/cadence/fusion_g3/operators/op_cat.cpp

Lines changed: 12 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*/
88

99
#include <executorch/backends/cadence/fusion_g3/operators/operators.h>
10+
#include <executorch/backends/cadence/fusion_g3/operators/xt_utils.h>
1011

1112
#include <cstring>
1213

@@ -16,7 +17,7 @@
1617
#include <executorch/kernels/portable/cpu/util/copy_ops_util.h>
1718
#include <executorch/runtime/kernel/kernel_includes.h>
1819

19-
using ::executorch::aten::Scalar;
20+
using ::executorch::aten::ArrayRef;
2021
using ::executorch::aten::ScalarType;
2122
using ::executorch::aten::Tensor;
2223
using ::executorch::runtime::Error;
@@ -27,7 +28,6 @@ using ::executorch::runtime::KernelRuntimeContext;
2728
* updated to have support for below data types, these can be removed and
2829
* operator need to be updated accordingly
2930
*/
30-
enum datatype { Ushort = 20, Uint = 23 };
3131

3232
namespace cadence {
3333
namespace impl {
@@ -36,7 +36,7 @@ namespace native {
3636

3737
Tensor& cat_out(
3838
KernelRuntimeContext& ctx,
39-
::executorch::aten::ArrayRef<Tensor> tensors,
39+
ArrayRef<Tensor> tensors,
4040
int64_t dim,
4141
Tensor& out) {
4242
if (dim < 0) {
@@ -84,7 +84,7 @@ Tensor& cat_out(
8484
int inp_shapes_size[tensors.size()];
8585

8686
int temp_sizes[tensors.size()][kTensorDimensionLimit];
87-
::executorch::aten::ArrayRef<Tensor::SizesType> temp_size;
87+
ArrayRef<Tensor::SizesType> temp_size;
8888

8989
for (int i = 0; i < tensors.size(); i++) {
9090
inp_tensors[i] = tensors[i].const_data_ptr<signed char>();
@@ -99,14 +99,19 @@ Tensor& cat_out(
9999

100100
signed char* out_data = out.mutable_data_ptr<signed char>();
101101

102-
const ::executorch::aten::ArrayRef<Tensor::SizesType> out_size = out.sizes();
102+
const ArrayRef<Tensor::SizesType> out_size = out.sizes();
103103
int out_shapes[kTensorDimensionLimit];
104104
for (int i = 0; i < out_size.size(); i++) // output shapes
105105
{
106106
out_shapes[i] = out_size[i];
107107
}
108108

109-
if (out.scalar_type() == ScalarType::Int) {
109+
if ((out.scalar_type() == ScalarType::Int) ||
110+
(out.scalar_type() == ScalarType::Short) ||
111+
(out.scalar_type() == ScalarType::Char) ||
112+
(out.scalar_type() == ScalarType::UInt32) ||
113+
(out.scalar_type() == ScalarType::UInt16) ||
114+
(out.scalar_type() == ScalarType::Byte)) {
110115
XT_KERNEL_CHECK(
111116
ctx,
112117
out,
@@ -118,73 +123,7 @@ Tensor& cat_out(
118123
inp_shapes_size[0],
119124
tensors.size(),
120125
(int)dim,
121-
sizeof(int));
122-
} else if (out.scalar_type() == ScalarType::Short) {
123-
XT_KERNEL_CHECK(
124-
ctx,
125-
out,
126-
xa_nn_cat,
127-
out_data,
128-
out_shapes,
129-
inp_tensors,
130-
inp_tensors_shapes,
131-
inp_shapes_size[0],
132-
tensors.size(),
133-
(int)dim,
134-
sizeof(short));
135-
} else if (out.scalar_type() == ScalarType::Char) {
136-
XT_KERNEL_CHECK(
137-
ctx,
138-
out,
139-
xa_nn_cat,
140-
out_data,
141-
out_shapes,
142-
inp_tensors,
143-
inp_tensors_shapes,
144-
inp_shapes_size[0],
145-
tensors.size(),
146-
(int)dim,
147-
sizeof(char));
148-
} else if (out.scalar_type() == (ScalarType)Uint) {
149-
XT_KERNEL_CHECK(
150-
ctx,
151-
out,
152-
xa_nn_cat,
153-
out_data,
154-
out_shapes,
155-
inp_tensors,
156-
inp_tensors_shapes,
157-
inp_shapes_size[0],
158-
tensors.size(),
159-
(int)dim,
160-
sizeof(int));
161-
} else if (out.scalar_type() == (ScalarType)Ushort) {
162-
XT_KERNEL_CHECK(
163-
ctx,
164-
out,
165-
xa_nn_cat,
166-
out_data,
167-
out_shapes,
168-
inp_tensors,
169-
inp_tensors_shapes,
170-
inp_shapes_size[0],
171-
tensors.size(),
172-
(int)dim,
173-
sizeof(short));
174-
} else if (out.scalar_type() == ScalarType::Byte) {
175-
XT_KERNEL_CHECK(
176-
ctx,
177-
out,
178-
xa_nn_cat,
179-
out_data,
180-
out_shapes,
181-
inp_tensors,
182-
inp_tensors_shapes,
183-
inp_shapes_size[0],
184-
tensors.size(),
185-
(int)dim,
186-
sizeof(char));
187-
126+
get_element_size(out.scalar_type()));
188127
} else {
189128
const size_t outer = executorch::runtime::getLeadingDims(out, dim);
190129
const size_t dim_stride = executorch::runtime::getTrailingDims(out, dim);

backends/cadence/fusion_g3/operators/op_dequantize.cpp

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
#include <executorch/kernels/portable/cpu/util/reduce_util.h>
1919
#include <executorch/runtime/kernel/kernel_includes.h>
2020

21-
using ::executorch::aten::Scalar;
2221
using ::executorch::aten::ScalarType;
2322
using ::executorch::aten::Tensor;
2423
using ::executorch::runtime::Error;
@@ -32,7 +31,7 @@ using optional = ::executorch::aten::optional<T>;
3231
* operator need to be updated accordingly
3332
*/
3433

35-
enum datatype { Ushort = 20, Bits4u = 21, Bits4 = 22 };
34+
enum datatype { Bits4u = 21, Bits4 = 22 };
3635

3736
/**
3837
* For an input tensor, use the scale and zero_point arguments to quantize it.
@@ -57,9 +56,8 @@ void check_dequantize_per_tensor_args(
5756
ET_CHECK_MSG(
5857
input.scalar_type() == ScalarType::Byte ||
5958
input.scalar_type() == ScalarType::Char ||
60-
input.scalar_type() == ScalarType::Bits16 ||
59+
input.scalar_type() == ScalarType::UInt16 ||
6160
input.scalar_type() == ScalarType::Short ||
62-
input.scalar_type() == (ScalarType)Ushort ||
6361
input.scalar_type() == (ScalarType)Bits4 ||
6462
input.scalar_type() == (ScalarType)Bits4u ||
6563
input.scalar_type() == ScalarType::Int,
@@ -154,7 +152,7 @@ Tensor& dequantize_impl(
154152
axis,
155153
zero_point_data,
156154
scale_data);
157-
} else if (input.scalar_type() == (ScalarType)Ushort) {
155+
} else if (input.scalar_type() == ScalarType::UInt16) {
158156
const uint16_t* input_data = input.const_data_ptr<uint16_t>();
159157
XT_KERNEL_CHECK(
160158
ctx,
@@ -236,7 +234,7 @@ Tensor& dequantize_impl(
236234
break;
237235
switch (input.scalar_type()) {
238236
ET_FORALL_INT_TYPES(ASYM_CALCULATE_INT_TYPE_TENSOR);
239-
ASYM_CALCULATE_INT_TYPE_TENSOR(uint16_t, Bits16);
237+
ASYM_CALCULATE_INT_TYPE_TENSOR(uint16_t, UInt16);
240238
default:
241239
ET_CHECK_MSG(
242240
false,
@@ -328,7 +326,7 @@ Tensor& dequantize_impl(
328326
break;
329327
switch (input.scalar_type()) {
330328
ET_FORALL_INT_TYPES(ASYM_CALCULATE_INT_TYPE_CHANNEL);
331-
ASYM_CALCULATE_INT_TYPE_CHANNEL(uint16_t, Bits16);
329+
ASYM_CALCULATE_INT_TYPE_CHANNEL(uint16_t, UInt16);
332330
default:
333331
ET_CHECK_MSG(
334332
false,
@@ -364,7 +362,7 @@ Tensor& dequantize_impl(
364362
input.dim(),
365363
axis,
366364
scale_data);
367-
} else if (input.scalar_type() == (ScalarType)Ushort) {
365+
} else if (input.scalar_type() == ScalarType::UInt16) {
368366
const uint16_t* input_data = input.const_data_ptr<uint16_t>();
369367
XT_KERNEL_CHECK(
370368
ctx,
@@ -442,7 +440,7 @@ Tensor& dequantize_impl(
442440
break;
443441
switch (input.scalar_type()) {
444442
ET_FORALL_INT_TYPES(SYM_CALCULATE_INT_TYPE_TENSOR);
445-
SYM_CALCULATE_INT_TYPE_TENSOR(uint16_t, Bits16);
443+
SYM_CALCULATE_INT_TYPE_TENSOR(uint16_t, UInt16);
446444
default:
447445
ET_CHECK_MSG(
448446
false,
@@ -534,7 +532,7 @@ Tensor& dequantize_impl(
534532
break;
535533
switch (input.scalar_type()) {
536534
ET_FORALL_INT_TYPES(SYM_CALCULATE_INT_TYPE_CHANNEL);
537-
SYM_CALCULATE_INT_TYPE_CHANNEL(uint16_t, Bits16);
535+
SYM_CALCULATE_INT_TYPE_CHANNEL(uint16_t, UInt16);
538536
default:
539537
ET_CHECK_MSG(
540538
false,

backends/cadence/fusion_g3/operators/op_div.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,10 @@
1919
#include <executorch/runtime/kernel/kernel_includes.h>
2020
#include <executorch/runtime/platform/assert.h>
2121

22+
using ::executorch::aten::optional;
2223
using ::executorch::aten::Scalar;
2324
using ::executorch::aten::ScalarType;
25+
using ::executorch::aten::string_view;
2426
using ::executorch::aten::Tensor;
2527
using ::executorch::runtime::canCast;
2628
using ::executorch::runtime::Error;
@@ -230,7 +232,7 @@ Tensor& div_out_mode(
230232
KernelRuntimeContext& ctx,
231233
const Tensor& a,
232234
const Tensor& b,
233-
::executorch::aten::optional<::executorch::aten::string_view> mode,
235+
optional<string_view> mode,
234236
Tensor& out) {
235237
if (!mode.has_value()) {
236238
return div_out(ctx, a, b, out);
@@ -546,7 +548,7 @@ Tensor& div_scalar_mode_out(
546548
KernelRuntimeContext& ctx,
547549
const Tensor& a,
548550
const Scalar& b,
549-
::executorch::aten::optional<::executorch::aten::string_view> mode,
551+
optional<string_view> mode,
550552
Tensor& out) {
551553
if (!mode.has_value()) {
552554
return div_scalar_out(ctx, a, b, out);

backends/cadence/fusion_g3/operators/op_exp.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,17 @@
1616
#include <executorch/kernels/portable/cpu/pattern/pattern.h>
1717
#include <executorch/runtime/kernel/kernel_includes.h>
1818

19-
using ::executorch::aten::Scalar;
2019
using ::executorch::aten::ScalarType;
2120
using ::executorch::aten::Tensor;
2221
using ::executorch::runtime::Error;
23-
using torch::executor::RuntimeContext;
22+
using ::executorch::runtime::KernelRuntimeContext;
2423

2524
namespace cadence {
2625
namespace impl {
2726
namespace G3 {
2827
namespace native {
2928

30-
Tensor& exp_out(RuntimeContext& ctx, const Tensor& in, Tensor& out) {
29+
Tensor& exp_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
3130
#ifdef OP_ARG_CHECK
3231
ET_KERNEL_CHECK(
3332
ctx,

backends/cadence/fusion_g3/operators/op_mean.cpp

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
#include <executorch/runtime/kernel/kernel_includes.h>
1717
#include <executorch/runtime/platform/assert.h>
1818

19-
using ::executorch::aten::Scalar;
19+
using ::executorch::aten::ArrayRef;
20+
using ::executorch::aten::optional;
2021
using ::executorch::aten::ScalarType;
2122
using ::executorch::aten::Tensor;
2223
using ::executorch::runtime::Error;
@@ -30,8 +31,7 @@ namespace native {
3031
int prepare_data(
3132
const Tensor& in,
3233
Tensor& out,
33-
::executorch::aten::optional<::executorch::aten::ArrayRef<int64_t>>
34-
dim_list,
34+
optional<ArrayRef<int64_t>> dim_list,
3535
int* inp_shape,
3636
int* out_shape,
3737
int* p_axis,
@@ -62,10 +62,9 @@ int prepare_data(
6262
Tensor& mean_dim_out(
6363
KernelRuntimeContext& ctx,
6464
const Tensor& in,
65-
::executorch::aten::optional<::executorch::aten::ArrayRef<int64_t>>
66-
dim_list,
65+
optional<ArrayRef<int64_t>> dim_list,
6766
bool keepdim,
68-
::executorch::aten::optional<ScalarType> dtype,
67+
optional<ScalarType> dtype,
6968
Tensor& out) {
7069
(void)ctx;
7170

@@ -141,11 +140,15 @@ Tensor& mean_dim_out(
141140
out_shape[0] = 1;
142141
}
143142

144-
int scratch_size = 1;
145-
for (int i = 0; i < num_inp_dims; i++) {
146-
scratch_size *= inp_shape[i];
143+
int inp_shape_max = inp_shape[p_axis[0]];
144+
for (int i = 1; i < num_axis_dims; i++) {
145+
if (inp_shape[p_axis[i]] > inp_shape_max) {
146+
inp_shape_max = inp_shape[p_axis[i]];
147+
}
147148
}
148149

150+
int scratch_size = in.numel() / inp_shape_max;
151+
149152
executorch::runtime::Result<void*> temp_mem =
150153
ctx.allocate_temp(scratch_size * sizeof(float));
151154

backends/cadence/fusion_g3/operators/op_native_layer_norm.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@
1818
#include <executorch/kernels/portable/cpu/vec_ops.h>
1919
#include <executorch/runtime/kernel/kernel_includes.h>
2020

21-
using Tensor = ::executorch::aten::Tensor;
22-
using ScalarType = ::executorch::aten::ScalarType;
23-
using IntArrayRef = ::executorch::aten::ArrayRef<int64_t>;
21+
using ::executorch::aten::IntArrayRef;
22+
using ::executorch::aten::optional;
23+
using ::executorch::aten::ScalarType;
24+
using ::executorch::aten::Tensor;
2425
using ::executorch::runtime::Error;
2526
using ::executorch::runtime::KernelRuntimeContext;
2627

@@ -35,8 +36,8 @@ template <typename CTYPE>
3536
void layer_norm(
3637
const Tensor& input,
3738
IntArrayRef normalized_shape,
38-
const ::executorch::aten::optional<Tensor>& weight,
39-
const ::executorch::aten::optional<Tensor>& bias,
39+
const optional<Tensor>& weight,
40+
const optional<Tensor>& bias,
4041
CTYPE eps,
4142
Tensor& out,
4243
Tensor& mean,
@@ -112,8 +113,8 @@ std::tuple<Tensor&, Tensor&, Tensor&> native_layer_norm_out(
112113
KernelRuntimeContext& ctx,
113114
const Tensor& input,
114115
IntArrayRef normalized_shape,
115-
const ::executorch::aten::optional<Tensor>& weight,
116-
const ::executorch::aten::optional<Tensor>& bias,
116+
const optional<Tensor>& weight,
117+
const optional<Tensor>& bias,
117118
double eps,
118119
Tensor& out,
119120
Tensor& mean_out,

0 commit comments

Comments
 (0)