Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backends/cadence/aot/ops_registrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2527,7 +2527,7 @@ def quantized_max_pool2d_nhwc_meta(
def fully_connected_meta(
src: torch.Tensor,
weight: torch.Tensor,
bias: torch.Tensor,
bias: Optional[torch.Tensor] = None,
) -> torch.Tensor:
# src comes in shape [leading_dims, in_dim]
# weight comes in shape [out_dim, in_dim]
Expand Down
4 changes: 1 addition & 3 deletions backends/cadence/aot/ref_implementations.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,10 +633,8 @@ def quantized_fully_connected_asym8uxasym8u_asym8u_per_tensor() -> torch.Tensor:
def fully_connected(
input_tensor: torch.Tensor,
weight: torch.Tensor,
bias: torch.Tensor,
bias: Optional[torch.Tensor] = None,
) -> torch.Tensor:
if input_tensor.shape[0] != 1:
raise ValueError("Fully connected linear only supports batch size of 1")
return F.linear(input_tensor, weight, bias)


Expand Down
5 changes: 3 additions & 2 deletions backends/cadence/generic/operators/op_fully_connected.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ void linear(
Tensor& output) {
const float* __restrict__ input_data = input.const_data_ptr<float>();
const float* __restrict__ weight_data = weight.const_data_ptr<float>();
const float* __restrict__ bias_data = bias.value().const_data_ptr<float>();
const float* __restrict__ bias_data =
bias.has_value() ? bias.value().const_data_ptr<float>() : nullptr;
float* __restrict__ output_data = output.mutable_data_ptr<float>();

// input comes in shape [batch_size, in_dim]
Expand All @@ -43,7 +44,7 @@ void linear(

for (int i = 0; i < leading_dims; ++i) {
for (int j = 0; j < M; ++j) {
float sum = bias_data[j];
float sum = bias_data != nullptr ? bias_data[j] : 0.0f;
for (int k = 0; k < N; ++k) {
sum += input_data[i * N + k] * weight_data[j * N + k];
}
Expand Down
11 changes: 7 additions & 4 deletions backends/cadence/hifi/operators/op_quantized_conv1d_ncl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,10 @@ void xa_opt_quantized_conv1d_ncl_asym8uxsym8u_asym8u(
WORD32 x_stride = stride[0];
WORD32 x_padding = padding[0];
WORD32 input_zero_bias = -in_zero_point;
WORD32 out_multiplier32 = bias_scale * (1. / output_scale) * 2147483648;
const float eff_scale = bias_scale * (1.0f / output_scale);
WORD32 out_multiplier32 = (eff_scale >= 1.0f)
? static_cast<WORD32>(2147483647)
: static_cast<WORD32>(eff_scale * 2147483648.0f);
WORD32 out_shift32 = 0;
WORD32 kernel_zero_bias = -weight_zero_point;

Expand Down Expand Up @@ -419,9 +422,9 @@ void quantized_conv1d_ncl_per_tensor_out(
out);
}
} else if (dtype == ScalarType::Byte) {
// HiFi nnlib conv1d_std kernel does not support depthwise (groups > 1).
// Fall back to generic implementation.
if (groups > 1) {
// HiFi nnlib conv1d_std kernel does not support depthwise (groups > 1)
// or stride > 1. Fall back to generic implementation.
if (groups > 1 || stride[0] > 1) {
impl::generic::native::quantized_conv1d_ncl_per_tensor_out(
ctx,
input,
Expand Down
11 changes: 7 additions & 4 deletions backends/cadence/hifi/operators/op_quantized_conv1d_nlc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,10 @@ void xa_opt_quantized_conv1d_nlc_asym8uxsym8u_asym8u(
WORD32 x_stride = stride[stride.size() - 1];
WORD32 x_padding = padding[padding.size() - 1];
WORD32 input_zero_bias = -in_zero_point;
WORD32 out_multiplier32 = bias_scale * (1. / output_scale) * 2147483648;
const float eff_scale = bias_scale * (1.0f / output_scale);
WORD32 out_multiplier32 = (eff_scale >= 1.0f)
? static_cast<WORD32>(2147483647)
: static_cast<WORD32>(eff_scale * 2147483648.0f);
WORD32 out_shift32 = 0;
WORD32 kernel_zero_bias = -weight_zero_point;

Expand Down Expand Up @@ -298,9 +301,9 @@ void quantized_conv1d_nlc_per_tensor_out(
out);
}
} else if (dtype == ScalarType::Byte) {
// HiFi nnlib conv1d_std kernel does not support depthwise (groups > 1).
// Fall back to generic implementation.
if (groups > 1) {
// HiFi nnlib conv1d_std kernel does not support depthwise (groups > 1)
// or stride > 1. Fall back to generic implementation.
if (groups > 1 || stride[0] > 1) {
impl::generic::native::quantized_conv1d_nlc_per_tensor_out(
ctx,
input,
Expand Down
Loading