Skip to content
Permalink
Browse files

[onnxifi] add onnxifi quantization support (#2617)

  • Loading branch information...
zrphercule authored and rdzhabarov committed Mar 29, 2019
1 parent 86cc871 commit 036071a49f475633d288d7010a8c86588d265f6b
@@ -67,6 +67,14 @@ class Caffe2ModelLoader
/// Mapping between Caffe2 tensor names for inputs and actual Glow input vars.
llvm::StringMap<Placeholder *> nameToInputVars_;

/// loadInputs calls this function for each member in its target arguments.
/// Currently we are supporting two tensorprototypes:
/// caffe2::TensorProto, caffe2::QTensorProto
template <class TensorProtoType>
llvm::Error loadInputsWithTensorProtoType(const caffe2::NetDef &net,
bool loadInputsAsPlaceholders,
const TensorProtoType &in);

/// Load the inputs from the NetDef. If \p loadInputsAsPlaceholders is
/// true then this will load each graph input as a placeholder otherwise it
/// will create an empty tensor for each input.
@@ -40,54 +40,65 @@ inline llvm::Error loadWeight(const onnxTensorDescriptorV1 &in, Tensor *T) {
RETURN_ERR("Only support CPU memory tensors.");
}

// This is a caffe2 offset shift.
const int32_t OFFSETSHIFT = 128;
std::vector<size_t> dims;
for (unsigned i = 0; i < in.dimensions; ++i) {
dims.push_back(in.shape[i]);
}

if (in.dataType == ONNXIFI_DATATYPE_FLOAT32) {
T->reset(ElemKind::FloatTy, dims);

auto TH = T->getHandle<>();
float *data = (float *)in.buffer;
for (size_t i = 0; i < TH.size(); ++i) {
TH.raw(i) = data[i];
}
} else if (in.dataType == ONNXIFI_DATATYPE_UINT64 ||
in.dataType == ONNXIFI_DATATYPE_INT64) {
const bool inDataSigned = in.dataType == ONNXIFI_DATATYPE_INT64;
(void)inDataSigned;
T->reset(ElemKind::Int64ITy, dims);

auto TH = T->getHandle<int64_t>();
int64_t *data = (int64_t *)in.buffer;
for (size_t i = 0; i < TH.size(); ++i) {
RETURN_ERR_IF_NOT(
(inDataSigned || data[i] >= 0),
"Disallow overflow of loaded UINT64 data into Int64ITy.");
TH.raw(i) = data[i];
}
} else if (in.dataType == ONNXIFI_DATATYPE_INT32) {
T->reset(ElemKind::Int32ITy, dims);

auto TH = T->getHandle<int32_t>();
int32_t *data = (int32_t *)in.buffer;
for (size_t i = 0; i < TH.size(); ++i) {
TH.raw(i) = data[i];
}
} else if (in.dataType == ONNXIFI_DATATYPE_UINT8) {
T->reset(ElemKind::Int8QTy, dims, 1.0, 0);

auto TH = T->getHandle<int8_t>();
uint8_t *data = (uint8_t *)in.buffer;
for (size_t i = 0; i < TH.size(); ++i) {
constexpr uint8_t OFFSETSHIFT = 128;
TH.raw(i) = static_cast<int8_t>((((uint8_t)data[i]) - OFFSETSHIFT));
if (in.is_quantized == 1) {
if (in.dataType == ONNXIFI_DATATYPE_UINT8) {
T->reset(ElemKind::Int8QTy, dims, in.scale, in.bias - OFFSETSHIFT);

auto TH = T->getHandle<int8_t>();
uint8_t *data = (uint8_t *)in.buffer;
for (size_t i = 0; i < TH.size(); ++i) {
TH.raw(i) = (int8_t)(data[i] - OFFSETSHIFT);
}
} else if (in.dataType == ONNXIFI_DATATYPE_INT32) {
T->reset(ElemKind::Int32QTy, dims, in.scale, in.bias);
auto TH = T->getHandle<int32_t>();
int32_t *data = (int32_t *)in.buffer;
for (size_t i = 0; i < TH.size(); ++i) {
TH.raw(i) = data[i];
}
} else {
RETURN_ERR("Only uint8 and int32 quantized tensors are supported.");
}
} else {
RETURN_ERR("Only float, index, and int8 tensors are supported.");
}
if (in.dataType == ONNXIFI_DATATYPE_FLOAT32) {
T->reset(ElemKind::FloatTy, dims);

auto TH = T->getHandle<>();
float *data = (float *)in.buffer;
for (size_t i = 0; i < TH.size(); ++i) {
TH.raw(i) = data[i];
}
} else if (in.dataType == ONNXIFI_DATATYPE_UINT64 ||
in.dataType == ONNXIFI_DATATYPE_INT64) {
const bool inDataSigned = in.dataType == ONNXIFI_DATATYPE_INT64;
T->reset(ElemKind::Int64ITy, dims);

auto TH = T->getHandle<int64_t>();
int64_t *data = (int64_t *)in.buffer;
for (size_t i = 0; i < TH.size(); ++i) {
RETURN_ERR_IF_NOT(
(inDataSigned || data[i] >= 0),
"Disallow overflow of loaded UINT64 data into Int64ITy.");
TH.raw(i) = data[i];
}
} else if (in.dataType == ONNXIFI_DATATYPE_INT32) {
T->reset(ElemKind::Int32ITy, dims);

auto TH = T->getHandle<int32_t>();
int32_t *data = (int32_t *)in.buffer;
for (size_t i = 0; i < TH.size(); ++i) {
TH.raw(i) = data[i];
}
} else {
RETURN_ERR("Only float and index tensors are supported.");
}
}
return llvm::Error::success();
}

@@ -43,8 +43,8 @@ using ArgumentDictionaryTy =
/// In Glow, the activations are quantized to int_8. Therefore, for the offset
/// read from quantized caffe2 model, we need to subtract 128(i.e. INT8_MIN) to
/// make the activations becomes int8_t.
/// For Glow: -127 <= orig_fp32/scale_1 + offset_1 < 128
/// For Caffe2: 0 <= orig_fp32/scale_2 + offset_2 < 255
/// For Glow: -128 <= orig_fp32/scale_1 + offset_1 <= 127
/// For Caffe2: 0 <= orig_fp32/scale_2 + offset_2 <= 255
/// Therefore, we can make scale_1 == scale_2, and offset_1 = offset2 - 128
const int32_t OFFSETSHIFT = 128;

@@ -60,7 +60,6 @@ llvm::Error setTensorType(const caffe2::TensorProto &in, Tensor *T) {
}
dim.push_back(d);
}

if (in.data_type() == caffe2::TensorProto::FLOAT) {
T->reset(ElemKind::FloatTy, dim);
return llvm::Error::success();
@@ -77,6 +76,26 @@ llvm::Error setTensorType(const caffe2::TensorProto &in, Tensor *T) {
RETURN_ERR("Only float and index tensors are supported");
}
}

llvm::Error setTensorType(const caffe2::QTensorProto &in, Tensor *T) {
std::vector<size_t> dim;
for (auto d : in.dims()) {
if (d == 0) {
RETURN_ERR("0 dimemsion qtensor is not supported");
}
dim.push_back(d);
}

if (in.data_type() == caffe2::TensorProto::UINT8) {
T->reset(ElemKind::Int8QTy, dim, in.scale(), in.bias() - OFFSETSHIFT);
return llvm::Error::success();
} else if (in.data_type() == caffe2::TensorProto::INT32) {
T->reset(ElemKind::Int32QTy, dim, in.scale(), in.bias());
return llvm::Error::success();
} else {
RETURN_ERR("Only uint8 and int32 qtensors are supported");
}
}
} // namespace

/// Translates the protocol buffer node \p op into a random access map.
@@ -1043,37 +1062,59 @@ llvm::Error Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) {
RETURN_ERR(unexpectedNodeErrorMessage(op, "Unsupported operator."));
}

template <class TensorProtoType>
llvm::Error
Caffe2ModelLoader::loadInputsWithTensorProtoType(const caffe2::NetDef &net,
bool loadInputsAsPlaceholders,
const TensorProtoType &in) {
// Skip static weights
if (tensors_.count(in.name())) {
return llvm::Error::success();
}

if (loadInputsAsPlaceholders) {
Tensor T;
RETURN_IF_ERR(setTensorType(in, &T));

Placeholder *placeholder;
ASSIGN_VALUE_OR_RETURN_ERR(
placeholder, createAndRegisterPlaceholder(in.name(), &T.getType()));
nameToInputVars_.try_emplace(in.name(), placeholder);
} else {
std::unique_ptr<Tensor> T(new Tensor());
RETURN_IF_ERR(setTensorType(in, T.get()));
tensors_[in.name()] = std::move(T);
}
return llvm::Error::success();
}

llvm::Error Caffe2ModelLoader::loadInputs(const caffe2::NetDef &net,
bool loadInputsAsPlaceholders) {
const caffe2::Argument *arg = nullptr;
for (auto i = 0, e = net.arg_size(); i < e; ++i) {
const caffe2::Argument *arg = nullptr, *qarg = nullptr;
for (auto i = 0, e = net.arg_size(); i < e && (!arg || !qarg); ++i) {
if (net.arg(i).name() == "input_shape_info") {
arg = &net.arg(i);
break;
} else if (net.arg(i).name() == "input_qshape_info") {
qarg = &net.arg(i);
}
}

// Load all regular tensor input
if (arg) {
for (const auto &in : arg->tensors()) {
// Skip static weights
if (tensors_.count(in.name())) {
continue;
}

if (loadInputsAsPlaceholders) {
Tensor T;
RETURN_IF_ERR(setTensorType(in, &T));
RETURN_IF_ERR(loadInputsWithTensorProtoType<caffe2::TensorProto>(
net, loadInputsAsPlaceholders, in));
}
}

Placeholder *placeholder;
ASSIGN_VALUE_OR_RETURN_ERR(
placeholder, createAndRegisterPlaceholder(in.name(), &T.getType()));
nameToInputVars_.try_emplace(in.name(), placeholder);
} else {
std::unique_ptr<Tensor> T(new Tensor());
RETURN_IF_ERR(setTensorType(in, T.get()));
tensors_[in.name()] = std::move(T);
}
// Load all quantized tensor input
if (qarg) {
for (const auto &in : qarg->qtensors()) {
RETURN_IF_ERR(loadInputsWithTensorProtoType<caffe2::QTensorProto>(
net, loadInputsAsPlaceholders, in));
}
}

return llvm::Error::success();
}

@@ -163,6 +163,7 @@ message Argument {
repeated bytes strings = 7;
repeated TensorProto tensors = 11;
repeated NetDef nets = 9;
repeated QTensorProto qtensors = 12;
}

// DeviceType that Caffe2 currently supports.
Submodule foxi updated 1 files
+16 −0 foxi/onnxifi.h

0 comments on commit 036071a

Please sign in to comment.
You can’t perform that action at this time.