Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Quantization] Add symmetric with power2 scale quantization schema #3437

20 changes: 18 additions & 2 deletions include/glow/Quantization/Base/Base.h
Expand Up @@ -53,7 +53,7 @@ struct QuantizationTransform32To8 {
int32_t transform(int32_t input) {
// The operation x >> y is rounded down to negative infinity. To get to
// round-nearest we add (1 << (shift - 1)) to the value prior to shifting.
int rtn = (1 << (post - 1));
int rtn = (post > 0) ? (1 << (post - 1)) : 0;
mciprian13 marked this conversation as resolved.
Show resolved Hide resolved
return ((((input >> pre) * scale) + rtn) >> post) + offset;
}
};
Expand Down Expand Up @@ -118,6 +118,14 @@ enum Schema {
/// version of the quantized type with an offset of zero:
/// For example, int8 is [-128; 127] - (-128) == uint8 [0; 255] - 0
SymmetricWithUnsigned,
/// Quantization schema with:
/// - range centered on 0 (symmetric): offset == 0.
/// - scale parameter is a power of 2: scale = 2^E where E is a signed
/// exponent. Since the scale parameter is mostly subunitary, the
/// exponent is mostly negative.
/// Since the scale parameter is stored as floating point, the values
/// of E which are exactly representable range from -126 to 127.
SymmetricWithPower2Scale,
};

/// Configuration for Quantization, passed into \ref quantizeFunction().
Expand Down Expand Up @@ -163,7 +171,9 @@ template <class SrcTy, class DestTy> DestTy clip(SrcTy in) {
template <class DestTy = int8_t>
inline DestTy quantize(float input, const TensorQuantizationParams &TQP) {
float result = input / TQP.scale + TQP.offset;
return quantization::clip<int32_t, DestTy>((int32_t)nearbyintf(result));
// Note: use int64_t since casts of large values might be wrapped around
// before clipping, for example for result = 2147483648.00 (float).
return quantization::clip<int64_t, DestTy>((int64_t)nearbyintf(result));
}

/// Converts a quantized value (type eTy) to floating point based on the
Expand Down Expand Up @@ -347,6 +357,12 @@ void tensorFusedRowwiseQuantization(const Tensor &input, Tensor &output) {
}
}

/// Verify if float is an exact power of 2 (mantissa is exactly 1.0).
bool isFloatPowerOf2(float val);

/// Get float 2's exponent.
int getFloat2Exp(float val);

} // namespace quantization
} // namespace glow

Expand Down
49 changes: 48 additions & 1 deletion lib/Quantization/Base/Base.cpp
@@ -1,5 +1,6 @@
/**
* Copyright (c) 2017-present, Facebook, Inc.
* Copyright (c) 2019-present, NXP Semiconductor, Inc.
mciprian13 marked this conversation as resolved.
Show resolved Hide resolved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -201,6 +202,27 @@ QuantizationTransform32To8 quantizeScaleOffset32To8(float scale,
int preShift = 0;
int postShift = 0;

// We treat first the particular case when scale is a power of 2 (2 ^ exp,
// where exp is a signed integer exponent). The operation is specialized as:
// - for positive 2's exponent:
// x * scale + offset (pre = 0, post = 0, scale = (int)scale).
// - for negative 2's exponent:
// x >> post + offset (pre = 0, post = -exp, scale = 1).
if (isFloatPowerOf2(scale)) {
int exp = getFloat2Exp(scale);
mciprian13 marked this conversation as resolved.
Show resolved Hide resolved
if (exp > 0) {
return QuantizationTransform32To8(0, // pre
0, // post
static_cast<int>(scale), // scale
offset); // offset
} else {
return QuantizationTransform32To8(0, // pre
-exp, // post
1, // scale
offset); // offset
}
}

// Calculate the post-shift value. It's always safe to increase scale as long
// as it's below one, and it's always legal to shift at least 15 bits for
// small scale values.
Expand Down Expand Up @@ -277,7 +299,8 @@ TensorQuantizationParams chooseQuantizationParams(float min, float max,
schema = quantization::Schema::Symmetric;
}
}
if (schema == quantization::Schema::Symmetric) {
if (schema == quantization::Schema::Symmetric ||
schema == quantization::Schema::SymmetricWithPower2Scale) {
// Check which end saturates the output dynamic range earlier
// and extend the other end to map the zero-point to quantized 0.
double rmin = min / (double)qmin;
Expand Down Expand Up @@ -338,6 +361,11 @@ TensorQuantizationParams chooseQuantizationParams(float min, float max,
nudgedZeroPoint = static_cast<int32_t>(round(initialZeroPoint));
}

// For SymmetricWithPower2Scale, round scale to nearest higher power of 2.
if (schema == quantization::Schema::SymmetricWithPower2Scale) {
scale = std::exp2(std::ceil(std::log2(scale)));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we have a test which verifies that for SymmetricWithPower2Scale scale is indeed power of 2?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is an assert in the same function of code which does that all the time.
I did however add an unit test to make the verification: chooseQuantizationSymmetricWithPower2Scale

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While adding the last unit test I found a bug (or corner case) in the function "chooseQuantizationParams":

  • for the previous "Symmetric" schema (and also for the newly added "SymmetricWithPower2Scale"), when qTy=UInt8QTy (qmin = 0) we get division by zero in the following line of code:
    double rmin = min / (double)qmin;

What should be do about this? This problem was already there and was not exposed by unit tests. Do you think we should exclude this case by putting an assert in the function?

Thanks!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting. Thanks for flagging this!

Do you think we should exclude this case by putting an assert in the function?

I think that would be reasonable thing to add.

Symmetric with Uint basically means that it's only non negative (non positive, depending on a scale sign) numbers that could be represented, which for majority, if not all, networks would be a weird range.

}

TensorQuantizationParams result{static_cast<float>(scale), nudgedZeroPoint};
// The only valid offset for symmetric quantization is 0.
assert((result.offset == 0 || schema != quantization::Schema::Symmetric) &&
Expand All @@ -349,6 +377,17 @@ TensorQuantizationParams chooseQuantizationParams(float min, float max,
schema != quantization::Schema::SymmetricWithUnsigned) &&
"Symmetric quantization with unsigned should be centered on 0 or on "
"-qmin");

// For SymmetricWithPower2Scale schema the offset should be 0.
assert((result.offset == 0 ||
schema != quantization::Schema::SymmetricWithPower2Scale) &&
"Symmetric quantization should be centered on 0");

// For SymmetricWithPower2Scale schema the scale should be a power of 2.
assert((isFloatPowerOf2(result.scale) ||
schema != quantization::Schema::SymmetricWithPower2Scale) &&
"Scale quantization parameter should be a power of 2");

return result;
}

Expand Down Expand Up @@ -377,5 +416,13 @@ std::vector<int8_t> createMapping(TypeRef inTy, TypeRef outTy,
return mapping;
}

bool isFloatPowerOf2(float val) {
// frexp returns mantissa normalized in [0.5,1) so compare with 0.5.
int exp;
return (std::abs(std::frexp(val, &exp)) == 0.5);
}

int getFloat2Exp(float val) { return std::ilogb(val); }

} // namespace quantization
} // namespace glow
44 changes: 43 additions & 1 deletion lib/Quantization/Serialization.cpp
@@ -1,5 +1,6 @@
/**
* Copyright (c) 2017-present, Facebook, Inc.
* Copyright (c) 2019-present, NXP Semiconductor, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -28,11 +29,52 @@
namespace llvm {
namespace yaml {

/// The default behavior of YAML is to serialize floating point numbers
/// using the "%g" format specifier which is not guaranteed to print all
/// the decimals. During a round-trip (serialize, deserialize) decimals
/// might be lost and hence precision is lost. Although this might not be
/// critical for some quantization schema, for "SymmetricWithPower2Scale"
/// the round-trip must preserve the exact representation of the floating
/// point scale which is a power of 2. The code below is a workaround to
/// overwrite the behavior of the YAML serializer to print all the digits.
struct FloatWrapper {
float _val;
mciprian13 marked this conversation as resolved.
Show resolved Hide resolved
FloatWrapper(float val) : _val(val) {}
};

template <> struct ScalarTraits<FloatWrapper> {
mciprian13 marked this conversation as resolved.
Show resolved Hide resolved
static void output(const FloatWrapper &value, void *ctxt,
llvm::raw_ostream &out) {
// Print number with all the digits and without trailing 0's
char buffer[200];
snprintf(buffer, sizeof(buffer), "%.126f", value._val);
int n = strlen(buffer) - 1;
while ((n > 0) && (buffer[n] == '0') && (buffer[n - 1] != '.')) {
buffer[n--] = '\0';
}
out << buffer;
}
static StringRef input(StringRef scalar, void *ctxt, FloatWrapper &value) {
if (to_float(scalar, value._val))
return StringRef();
return "invalid floating point number";
}
static QuotingType mustQuote(StringRef) { return QuotingType::None; }
};

/// Mapping for NodeQuantizationInfo yaml serializer.
template <> struct MappingTraits<glow::NodeQuantizationInfo> {
struct FloatNormalized {
FloatNormalized(IO &io) : _val(0.0) {}
FloatNormalized(IO &, float &val) : _val(val) {}
float denormalize(IO &) { return _val._val; }
FloatWrapper _val;
mciprian13 marked this conversation as resolved.
Show resolved Hide resolved
};
static void mapping(IO &io, glow::NodeQuantizationInfo &info) {
MappingNormalization<FloatNormalized, float> scale(
io, info.tensorQuantizationParams_.scale);
io.mapRequired("nodeOutputName", info.nodeOutputName_);
io.mapRequired("scale", info.tensorQuantizationParams_.scale);
io.mapRequired("scale", scale->_val);
io.mapRequired("offset", info.tensorQuantizationParams_.offset);
}
};
Expand Down
56 changes: 54 additions & 2 deletions tests/unittests/QuantizationTest.cpp
Expand Up @@ -102,9 +102,21 @@ void testSerialization(const std::vector<NodeQuantizationInfo> &expected) {
}

TEST(Quantization, Serialize) {
std::vector<NodeQuantizationInfo> expected{
{"first", {1, 10}}, {"second", {-1, 3}}, {"third", {-10, 30}}};
std::vector<NodeQuantizationInfo> expected{{"first", {1, 10}},
{"second", {-1, 3}},
{"third", {-10, 30}},
{"fourth", {0.1, -10}},
{"fifth", {0.123, -30}}};
testSerialization(expected);
}

TEST(Quantization, SerializePower2Scale) {
std::vector<NodeQuantizationInfo> expected{
{"pwr_neg_0", {1.0000000000f, 0}}, {"pwr_neg_1", {0.5000000000f, 0}},
{"pwr_neg_2", {0.2500000000f, 0}}, {"pwr_neg_3", {0.1250000000f, 0}},
{"pwr_neg_4", {0.0625000000f, 0}}, {"pwr_neg_5", {0.0312500000f, 0}},
{"pwr_neg_6", {0.0156250000f, 0}}, {"pwr_neg_7", {0.0078125000f, 0}},
{"pwr_neg_8", {0.0039062500f, 0}}, {"pwr_neg_9", {0.0019531250f, 0}}};
testSerialization(expected);
}

Expand Down Expand Up @@ -150,6 +162,34 @@ TEST(Quantization, quantScaleOffset) {
}
}

TEST(Quantization, quantScaleOffsetPower2Scale) {
// Test different power of 2 scale values (from 2^-10 to 2^1).
float scales[] = {0.0009765625f, 0.0019531250f, 0.0039062500f, 0.0078125000f,
0.0156250000f, 0.0312500000f, 0.0625000000f, 0.1250000000f,
0.2500000000f, 0.5000000000f, 1.0000000000f, 2.0000000000f};

// Try all scale factors:
for (float scale : scales) {
// Try all legal integers within the range:
for (int8_t input = -128; input < 127; input++) {
int32_t sum32num = round(input / scale);
auto TR = quantization::quantizeScaleOffset32To8(scale, 0);
EXPECT_EQ(quantization::isFloatPowerOf2(scale), true);
EXPECT_EQ(TR.pre, 0);
int exp = quantization::getFloat2Exp(scale);
if (exp > 0) {
EXPECT_EQ(TR.scale, (int)scale);
EXPECT_EQ(TR.post, 0);
} else {
EXPECT_EQ(TR.scale, 1);
EXPECT_EQ(TR.post, -exp);
}
int32_t computed = TR.transform(sum32num);
EXPECT_NEAR(input, computed, 1);
}
}
}

template <class qtype>
void quantizeTensorTest(ElemKind qTy, quantization::Schema schema) {
// Map float [0.0; 6.0] to a quantized type using its entire value range.
Expand Down Expand Up @@ -233,6 +273,18 @@ TEST(Quantization, quantizeTensorSymmetricUInt32) {
quantizeTensorTest<int32_t>(ElemKind::Int32QTy,
quantization::Schema::SymmetricWithUnsigned);
}
TEST(Quantization, quantizeTensorSymmetricPwr2Int8) {
quantizeTensorTest<int8_t>(ElemKind::Int8QTy,
quantization::Schema::SymmetricWithPower2Scale);
}
TEST(Quantization, quantizeTensorSymmetricPwr2Int16) {
quantizeTensorTest<int16_t>(ElemKind::Int16QTy,
quantization::Schema::SymmetricWithPower2Scale);
}
TEST(Quantization, quantizeTensorSymmetricPwr2Int32) {
quantizeTensorTest<int32_t>(ElemKind::Int32QTy,
quantization::Schema::SymmetricWithPower2Scale);
}

/// Helper for quantizing a simple Conv with precision \p quantizationPrecision.
static void quantizeSimpleConvGraph(ElemKind quantizationPrecision) {
Expand Down
5 changes: 4 additions & 1 deletion tools/loader/Loader.cpp
Expand Up @@ -85,7 +85,10 @@ llvm::cl::opt<quantization::Schema> quantizationSchema(
"Use symmetric ranges"),
clEnumValN(quantization::Schema::SymmetricWithUnsigned,
"symmetric_with_uint8",
"Use symmetric ranges with potentially uint8 ranges")),
"Use symmetric ranges with potentially uint8 ranges"),
clEnumValN(quantization::Schema::SymmetricWithPower2Scale,
mciprian13 marked this conversation as resolved.
Show resolved Hide resolved
"symmetric_with_power2_scale",
"Use symmetric ranges with power of 2 scaling factor")),
llvm::cl::init(quantization::Schema::Asymmetric), llvm::cl::cat(loaderCat));

llvm::cl::opt<ElemKind> quantizationPrecision(
Expand Down