Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions backends/cadence/vision/operators/op_dequantize_per_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,25 +31,24 @@ void dequantize_per_tensor_out(

if (input.scalar_type() == ScalarType::Byte) {
const uint8_t* input_data = input.const_data_ptr<uint8_t>();
impl::vision::native::kernels::dequantize<uint8_t>(
kernels::dequantize<uint8_t>(
out_data, input_data, scale, zero_point, numel);
} else if (input.scalar_type() == ScalarType::Char) {
const int8_t* input_data = input.const_data_ptr<int8_t>();
impl::vision::native::kernels::dequantize<int8_t>(
out_data, input_data, scale, zero_point, numel);
kernels::dequantize<int8_t>(out_data, input_data, scale, zero_point, numel);
} else if (
input.scalar_type() == ScalarType::Bits16 ||
input.scalar_type() == ScalarType::UInt16) {
const uint16_t* input_data = input.const_data_ptr<uint16_t>();
impl::vision::native::kernels::dequantize<uint16_t>(
kernels::dequantize<uint16_t>(
out_data, input_data, scale, zero_point, numel);
} else if (input.scalar_type() == ScalarType::Short) {
const int16_t* input_data = input.const_data_ptr<int16_t>();
impl::vision::native::kernels::dequantize<int16_t>(
kernels::dequantize<int16_t>(
out_data, input_data, scale, zero_point, numel);
} else if (input.scalar_type() == ScalarType::Int) {
const int32_t* input_data = input.const_data_ptr<int32_t>();
impl::vision::native::kernels::dequantize<int32_t>(
kernels::dequantize<int32_t>(
out_data, input_data, scale, zero_point, numel);
} else {
ET_CHECK_MSG(
Expand Down
10 changes: 5 additions & 5 deletions backends/cadence/vision/operators/op_quantize_per_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,25 +33,25 @@ void quantize_per_tensor_out(

if (out.scalar_type() == ScalarType::Byte) {
uint8_t* out_data = out.mutable_data_ptr<uint8_t>();
impl::vision::native::kernels::quantize<uint8_t>(
kernels::quantize<uint8_t>(
out_data, input_data, 1. / scale, zero_point, numel);
} else if (out.scalar_type() == ScalarType::Char) {
int8_t* out_data = out.mutable_data_ptr<int8_t>();
impl::vision::native::kernels::quantize<int8_t>(
kernels::quantize<int8_t>(
out_data, input_data, 1. / scale, zero_point, numel);
} else if (
out.scalar_type() == ScalarType::Bits16 ||
out.scalar_type() == ScalarType::UInt16) {
uint16_t* out_data = out.mutable_data_ptr<uint16_t>();
impl::vision::native::kernels::quantize<uint16_t>(
kernels::quantize<uint16_t>(
out_data, input_data, 1. / scale, zero_point, numel);
} else if (out.scalar_type() == ScalarType::Short) {
int16_t* out_data = out.mutable_data_ptr<int16_t>();
impl::vision::native::kernels::quantize<int16_t>(
kernels::quantize<int16_t>(
out_data, input_data, 1. / scale, zero_point, numel);
} else if (out.scalar_type() == ScalarType::Int) {
int32_t* out_data = out.mutable_data_ptr<int32_t>();
impl::vision::native::kernels::quantize<int32_t>(
kernels::quantize<int32_t>(
out_data, input_data, 1. / scale, zero_point, numel);
} else {
ET_CHECK_MSG(
Expand Down
81 changes: 77 additions & 4 deletions backends/cadence/vision/operators/op_quantized_conv_out.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,7 @@ __attribute__((noinline)) void conv2d_nchw_core_generic(
if (quantized) {
float val = bias_scale * acc;
out_plane[_oh * ow + _ow] =
::impl::vision::native::kernels::quantize<OT>(
val, inv_out_scale, out_zero_point);
kernels::quantize<OT>(val, inv_out_scale, out_zero_point);
} else {
out_plane[_oh * ow + _ow] = acc;
}
Expand Down Expand Up @@ -267,8 +266,8 @@ __attribute__((noinline)) void conv2d_nhwc_core_generic(
}
if (quantized) {
float val = bias_scale * acc;
out_line[_oc] = ::impl::vision::native::kernels::quantize<OT>(
val, inv_out_scale, out_zero_point);
out_line[_oc] =
kernels::quantize<OT>(val, inv_out_scale, out_zero_point);
} else {
out_line[_oc] = acc;
}
Expand Down Expand Up @@ -530,6 +529,80 @@ void quantized_conv_per_tensor_out(
}
}

void quantized_conv2d_nchw_per_tensor_out(
KernelRuntimeContext& ctx,
const Tensor& input,
const Tensor& weight,
const Tensor& bias,
IntArrayRef stride,
IntArrayRef padding,
IntArrayRef dilation,
int64_t groups,
int64_t in_zero_point,
int64_t weight_zero_point,
double bias_scale,
double output_scale,
int64_t output_zero_point,
int64_t out_multiplier,
int64_t out_shift,
Tensor& out) {
quantized_conv_per_tensor_out(
ctx,
input,
weight,
bias,
stride,
padding,
dilation,
groups,
in_zero_point,
weight_zero_point,
bias_scale,
output_scale,
output_zero_point,
out_multiplier,
out_shift,
false, // channel_last = false for NCHW
out);
}

void quantized_conv2d_nhwc_per_tensor_out(
KernelRuntimeContext& ctx,
const Tensor& input,
const Tensor& weight,
const Tensor& bias,
IntArrayRef stride,
IntArrayRef padding,
IntArrayRef dilation,
int64_t groups,
int64_t in_zero_point,
int64_t weight_zero_point,
double bias_scale,
double output_scale,
int64_t output_zero_point,
int64_t out_multiplier,
int64_t out_shift,
Tensor& out) {
quantized_conv_per_tensor_out(
ctx,
input,
weight,
bias,
stride,
padding,
dilation,
groups,
in_zero_point,
weight_zero_point,
bias_scale,
output_scale,
output_zero_point,
out_multiplier,
out_shift,
true, // channel_last = true for NHWC
out);
}

} // namespace native
} // namespace vision
} // namespace impl
4 changes: 2 additions & 2 deletions backends/cadence/vision/operators/op_softmax.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
* LICENSE file in the root directory of this source tree.
*/

#include <api.h>
#include <executorch/backends/cadence/vision/kernels/kernels.h>
#include <executorch/kernels/portable/cpu/util/activation_ops_util.h>
#include <executorch/kernels/portable/cpu/util/functional_util.h>
#include <executorch/kernels/portable/cpu/util/reduce_util.h>
#include <executorch/runtime/kernel/kernel_includes.h>
#include <idma_init.h>
#include <include/api.h>
#include <include_private/idma_init.h>
#include <stdio.h>

using executorch::aten::ScalarType;
Expand Down
6 changes: 3 additions & 3 deletions backends/cadence/vision/operators/quantized_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ inline __attribute__((always_inline)) void quantized_linear_per_tensor_(
(int32_t)weight_data[j * in_dim + k] - (int32_t)weight_zero_point;
sum += x * w;
}
out_data[i * out_dim + j] = ::impl::vision::native::kernels::quantize<T>(
out_data[i * out_dim + j] = impl::vision::kernels::quantize<T>(
sum, requant_scale, out_zero_point);
}
}
Expand Down Expand Up @@ -121,8 +121,8 @@ inline __attribute__((always_inline)) void quantized_linear_per_channel_(
// Compute the out_scale from out_multiplier and out_shift
const float out_scale =
-out_multiplier_data[j] * 1.0 / (1 << 31) * pow(2, out_shift_data[j]);
out_data[i * out_dim + j] = ::impl::vision::native::kernels::quantize<T>(
sum, out_scale, out_zero_point);
out_data[i * out_dim + j] =
impl::vision::kernels::quantize<T>(sum, out_scale, out_zero_point);
}
}
}
Expand Down
21 changes: 20 additions & 1 deletion backends/cadence/vision/operators/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,25 @@ def define_operator(name: str, deps: list[str] | None = None) -> None:
if deps == None:
deps = []

# Determine which headers to export based on operator name
exported_headers = ["operators.h"]

# Add quantized_ops.h header for quantized operators
quantized_ops = [
"quantized_fully_connected_out",
"quantized_matmul_out",
"quantized_layer_norm",
"quantized_relu_out",
"quantized_conv_out",
"quantized_linear_out",
"quantize_per_tensor",
"dequantize_per_tensor",
"requantize_out"
]

if name in quantized_ops:
exported_headers.append("quantized_ops.h")

runtime.cxx_library(
name = op_name,
srcs = [op_name + ".cpp"],
Expand All @@ -31,7 +50,7 @@ def define_operator(name: str, deps: list[str] | None = None) -> None:
],
compatible_with = ["ovr_config//cpu:xtensa"],
deps = deps + common_deps,
exported_headers = ["operators.h"],
exported_headers = exported_headers,
)

OPERATORS = [
Expand Down
25 changes: 15 additions & 10 deletions backends/cadence/vision/third-party/include_private/idma_init.h
Original file line number Diff line number Diff line change
@@ -1,31 +1,36 @@
#ifndef __IDMA__INIT_H__
#define __IDMA__INIT_H__

#include "dtypes.h"
#include "../include/dtypes.h"
#include "common.h"

#define IDMA_BUFF_SIZE 16384 // 16 kb DRAM storage. Assume 4 buffers (2 input and 2 output)
#define IDMA_BUFF_SIZE \
16384 // 16 kb DRAM storage. Assume 4 buffers (2 input and 2 output)

#ifndef PLACE_IN_DRAM0
#define PLACE_IN_DRAM0 __attribute__ ((aligned(2*IVP_SIMD_WIDTH), section(".dram0.data")))
#define PLACE_IN_DRAM0 \
__attribute__((aligned(2 * IVP_SIMD_WIDTH), section(".dram0.data")))
#endif

#ifndef PLACE_IN_DRAM1
#define PLACE_IN_DRAM1 __attribute__ ((aligned(2*IVP_SIMD_WIDTH), section(".dram1.data")))
#define PLACE_IN_DRAM1 \
__attribute__((aligned(2 * IVP_SIMD_WIDTH), section(".dram1.data")))
#endif

float32_t data_dram0[IDMA_BUFF_SIZE / 2] PLACE_IN_DRAM0;
float32_t data_dram1[IDMA_BUFF_SIZE / 2] PLACE_IN_DRAM1;

float32_t *inpData[2] = {&data_dram0[0], &data_dram1[0]};
float32_t *outData[2] = {&data_dram0[IDMA_BUFF_SIZE / 4], &data_dram1[IDMA_BUFF_SIZE / 4]};
float32_t* inpData[2] = {&data_dram0[0], &data_dram1[0]};
float32_t* outData[2] = {
&data_dram0[IDMA_BUFF_SIZE / 4],
&data_dram1[IDMA_BUFF_SIZE / 4]};

IDMA_BUFFER_DEFINE(buffer_idma_ch0, 1, IDMA_2D_DESC);
IDMA_BUFFER_DEFINE(buffer_idma_ch1, 1, IDMA_2D_DESC);

idma_buffer_t * descbuf[] = {
buffer_idma_ch0,
buffer_idma_ch1,
idma_buffer_t* descbuf[] = {
buffer_idma_ch0,
buffer_idma_ch1,
};

#endif // __IDMA__INIT_H__
#endif // __IDMA__INIT_H__
Loading
Loading