Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 131 additions & 20 deletions onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include <core/common/safeint.h>
#include <core/common/logging/logging.h>
#include <core/framework/tensorprotoutils.h>
#include <core/graph/graph.h>
#include <core/graph/graph_viewer.h>
#include <core/providers/common.h>
Expand Down Expand Up @@ -64,15 +65,42 @@ QLinearOpType GetQLinearOpType(const onnxruntime::Node& node) {
return QLinearOpType::Unknown;
}

ConvType GetConvType(const onnxruntime::Node& node, const InitializedTensorSet& initializers) {
const auto& op_type = node.OpType();
bool is_qlinear_conv = (op_type == "QLinearConv");
ORT_ENFORCE(op_type == "Conv" || is_qlinear_conv);

NodeAttrHelper helper(node);
const auto group = helper.Get("group", 1);

size_t w_idx = is_qlinear_conv ? 3 : 1;
const auto& weight = node.InputDefs()[w_idx]->Name();
const auto& weight_tensor = *initializers.at(weight);

// For ONNX we only have 1 conv ops
// For NNAPI we have 3
// Input is (N, C, H, W)
// group == 1, --> regular conv
// group != 1 && weight is (M, 1, kH, kW), --> depthwise conv
// group != 1 && weight is (M, C/group, kH, kW), --> grouped conv
if (group == 1)
return ConvType::Regular;
else if ((weight_tensor.dims()[1] == 1))
return ConvType::Depthwise;
else
return ConvType::Grouped;
}

bool IsQLinearBinaryOp(QLinearOpType qlinear_op_type) {
return qlinear_op_type == QLinearOpType::QLinearConv ||
qlinear_op_type == QLinearOpType::QLinearMatMul ||
qlinear_op_type == QLinearOpType::QLinearAdd;
}

bool HasValidBinaryOpQuantizedInputs(const Node& node) {
auto op_type = GetQLinearOpType(node);
int32_t a_input_type, b_input_type;
if (!IsQLinearBinaryOp(GetQLinearOpType(node))) {
if (!IsQLinearBinaryOp(op_type)) {
LOGS_DEFAULT(VERBOSE) << "[" << node.OpType() << "] is not a binary qlinear op";
return false;
}
Expand All @@ -83,7 +111,16 @@ bool HasValidBinaryOpQuantizedInputs(const Node& node) {
if (!GetType(*input_defs[3], b_input_type))
return false;

if (a_input_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8 || a_input_type != b_input_type) {
// QlinearConv supports u8u8 or u8s8
// QLinearMatMul/Add only support u8u8
bool is_qlinear_conv = op_type == QLinearOpType::QLinearConv;
bool has_valid_qlinear_conv_weight =
(b_input_type == ONNX_NAMESPACE::TensorProto_DataType_UINT8 ||
b_input_type == ONNX_NAMESPACE::TensorProto_DataType_INT8);

if (a_input_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8 ||
(!is_qlinear_conv && a_input_type != b_input_type) ||
(is_qlinear_conv && !has_valid_qlinear_conv_weight)) {
LOGS_DEFAULT(VERBOSE) << "[" << node.OpType()
<< "] A Input type: [" << a_input_type
<< "] B Input type: [" << b_input_type
Expand All @@ -95,8 +132,9 @@ bool HasValidBinaryOpQuantizedInputs(const Node& node) {
}

bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const Node& node,
const std::vector<size_t>& indices) {
const auto& op = node.OpType();
const std::vector<size_t>& indices, const OpSupportCheckParams& params) {
const auto& op_type = node.OpType();
bool is_qlinear_conv = (op_type == "QLinearConv");
const auto input_defs(node.InputDefs());
for (const auto idx : indices) {
if (idx >= input_defs.size()) {
Expand All @@ -106,13 +144,42 @@ bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const
}
const auto scale_name = input_defs[idx]->Name();
if (Contains(initializers, scale_name)) {
const auto& tensor = *initializers.at(scale_name);
if (!tensor.dims().empty() && tensor.dims()[0] != 1) {
LOGS_DEFAULT(VERBOSE) << op << " does not support per-channel quantization";
return false;
const auto& scale_tensor = *initializers.at(scale_name);
int64_t scales_dim = scale_tensor.dims().empty() ? 1 : scale_tensor.dims()[0];
bool is_conv_weight = is_qlinear_conv && idx == 4;
bool is_conv_u8s8_weight = false;

if (is_conv_weight) {
const auto& weight_tensor = *initializers.at(node.InputDefs()[3]->Name());
is_conv_u8s8_weight = weight_tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT8;
}

// We need to check the per-channel quantization scales dimensions for u8s8 QlinearConv
// We only support per-channel quantization for u8s8
// For all other cases, the scales should be a scalar
if (is_conv_u8s8_weight) {
if (params.android_sdk_ver < 29) {
LOGS_DEFAULT(VERBOSE) << op_type << " only supports per-channel quantization on Android API 29+, "
<< "system API level: " << params.android_sdk_ver;
return false;
}

const auto& weight_tensor = *initializers.at(node.InputDefs()[3]->Name());
if (weight_tensor.dims()[0] != scales_dim) {
LOGS_DEFAULT(VERBOSE) << op_type << " mismatch int8 per-channel quantization weight,"
<< " weight dimension[0] " << weight_tensor.dims()[0]
<< " scale dimension " << scales_dim;
return false;
}
} else {
if (scales_dim != 1) {
LOGS_DEFAULT(VERBOSE) << op_type << " does not support per-channel quantization, "
<< " for now, only u8s8 QlinearConv supports per-channel quantization on API 29+";
return false;
}
}
} else {
LOGS_DEFAULT(VERBOSE) << "The scale of " << op << " must be known";
LOGS_DEFAULT(VERBOSE) << "The scale of " << op_type << " must be known";
return false;
}
}
Expand All @@ -122,28 +189,72 @@ bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const

bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, const Node& node,
const std::vector<size_t>& indices) {
const auto& op = node.OpType();
const auto& op_type = node.OpType();
bool is_qlinear_conv = (op_type == "QLinearConv");
const auto input_defs(node.InputDefs());
for (const auto idx : indices) {
if (idx >= input_defs.size()) {
LOGS_DEFAULT(VERBOSE) << "HasValidQuantizationZeroPoints, Input index, " << idx
<< " >= input number, " << input_defs.size();
return false;
}
const auto zero_point_name = node.InputDefs()[idx]->Name();

const auto zero_point_name = input_defs[idx]->Name();
if (Contains(initializers, zero_point_name)) {
const auto& tensor = *initializers.at(zero_point_name);
if (!tensor.dims().empty() && tensor.dims()[0] != 1) {
LOGS_DEFAULT(VERBOSE) << op << " does not support per-channel quantization";
return false;
bool is_conv_weight = is_qlinear_conv && idx == 5;
bool is_conv_u8s8_weight = false;
if (is_conv_weight) {
const auto& weight_tensor = *initializers.at(node.InputDefs()[3]->Name());
is_conv_u8s8_weight = weight_tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT8;
}
if (tensor.data_type() != ONNX_NAMESPACE::TensorProto_DataType_UINT8) {
LOGS_DEFAULT(VERBOSE) << op << " does not support zero point data type "
<< std::to_string(tensor.data_type());
return false;

const auto& zero_tensor = *initializers.at(zero_point_name);
int64_t zero_dim = zero_tensor.dims().empty() ? 1 : zero_tensor.dims()[0];
if (is_conv_u8s8_weight) {
if (zero_tensor.data_type() != ONNX_NAMESPACE::TensorProto_DataType_INT8) {
LOGS_DEFAULT(VERBOSE) << "u8s8 QlinearConv only supports int8 zero point for weight, "
<< "actual zero point type: [" << zero_tensor.data_type() << "]";
return false;
}

// For onnx, u8s8 QlinearConv, the weight zero point can be a scalar,
// or a tensor with same channel as weight, for NNAPI we only support it be
// 0 (scalar) or all 0 (tensor), NNAPI will assume the zero point for per-channel
// quantization is 0 there is no input for it
const auto& weight_tensor = *initializers.at(node.InputDefs()[3]->Name());
if (weight_tensor.dims()[0] != zero_dim && zero_dim != 1) {
LOGS_DEFAULT(VERBOSE) << op_type << " mismatch int8 per-channel quantization weight,"
<< " weight dimension[0] " << weight_tensor.dims()[0]
<< " zero point dimension " << zero_dim;
return false;
}

std::unique_ptr<uint8_t[]> unpacked_tensor;
size_t tensor_byte_size;
auto status = onnxruntime::utils::UnpackInitializerData(zero_tensor, unpacked_tensor, tensor_byte_size);
if (!status.IsOK()) {
LOGS_DEFAULT(ERROR) << "QLinearConv erro when unpack zero tensor:" << status.ErrorMessage();
return false;
}

// Verify all onnx weight zero point(s) are 0(s)
const int8_t* zero_points = reinterpret_cast<const int8_t*>(unpacked_tensor.get());
for (size_t i = 0; i < tensor_byte_size; i++) {
if (zero_points[i] != 0) {
LOGS_DEFAULT(VERBOSE) << "QLinearConv only support 0 as zero point, "
<< "zero_points[" << i << "] has value: " << zero_points[i];
return false;
}
}
} else {
if (zero_dim != 1) {
LOGS_DEFAULT(VERBOSE) << op_type << " does not support per-channel quantization, "
<< " for now, only u8s8 QlinearConv supports per-channel quantization on API 29+";
return false;
}
}
} else {
LOGS_DEFAULT(VERBOSE) << "The zero point of " << op << " must be known";
LOGS_DEFAULT(VERBOSE) << "The zero point of " << op_type << " must be known";
return false;
}
}
Expand Down
12 changes: 11 additions & 1 deletion onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,18 @@ enum class QLinearOpType : uint8_t {
// QLinearReduceMean,
};

enum class ConvType : uint8_t {
Regular,
Depthwise,
Grouped,
};

QLinearOpType GetQLinearOpType(const onnxruntime::Node& node);

// Return the type of the conv ops,
// This function assumes the input is a 2d conv node
ConvType GetConvType(const onnxruntime::Node& node, const InitializedTensorSet& initializers);

// This qlinear op is an operator takes 2 input and produces 1 output
// Such as QLinearConv, QLinearMatMul, QLinearAdd, ...
bool IsQLinearBinaryOp(QLinearOpType qlinear_op_type);
Expand All @@ -97,7 +107,7 @@ bool IsQLinearBinaryOp(QLinearOpType qlinear_op_type);
bool HasValidBinaryOpQuantizedInputs(const Node& node);
// Check if a qlinear op has valid scales for given indices
bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const Node& node,
const std::vector<size_t>& indices);
const std::vector<size_t>& indices, const OpSupportCheckParams& params);
// Check if a qlinear op has valid zero points for given indices
bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, const Node& node,
const std::vector<size_t>& indices);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ int32_t ModelBuilder::GetAndroidSdkVer() const {
// Scalar operand is copied into the model, no need to persist
#define DEFINE_ADD_OPERAND_FROM_SCALAR(scalar_type, op_type) \
Status ModelBuilder::AddOperandFromScalar(scalar_type value, uint32_t& index) { \
OperandType operandType(Type::op_type); \
OperandType operandType(Type::op_type, vector<uint32_t>{}); \
ORT_RETURN_IF_ERROR(AddNewNNAPIOperand(operandType, index)); \
RETURN_STATUS_ON_ERROR_WITH_NOTE( \
nnapi_->ANeuralNetworksModel_setOperandValue( \
Expand Down Expand Up @@ -377,6 +377,18 @@ Status ModelBuilder::AddNewNNAPIOperand(const OperandType& operand_type, uint32_
RETURN_STATUS_ON_ERROR(
nnapi_->ANeuralNetworksModel_addOperand(nnapi_model_->model_, &operand_type.operandType));
index = next_index_++;

if (operand_type.channelQuant) {
if (GetAndroidSdkVer() < 29) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Per-channel quantization is only supported on Android API level 29+,",
" system API level: ", GetAndroidSdkVer());
}

RETURN_STATUS_ON_ERROR(nnapi_->ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
nnapi_model_->model_, index, &operand_type.channelQuant->params));
}

return Status::OK();
}

Expand Down
Loading