Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[oneDNN][Bug-fix] Add check for min-max tensors in quantized matmul OpKernel. #59437

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
60 changes: 52 additions & 8 deletions tensorflow/core/kernels/mkl/mkl_qmatmul_op.cc
Expand Up @@ -317,8 +317,20 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase<Tweight, Toutput> {
// This is the case the inner-product and requantization are fused.
// "min_freezed_output" and "max_freezed_output" are the requested range
// for the output.
min_output_value = context->input(7).flat<float>()(0);
max_output_value = context->input(8).flat<float>()(0);
const Tensor& min_freezed_tensor = context->input(7);
const Tensor& max_freezed_tensor = context->input(8);
OP_REQUIRES(context,
TensorShapeUtils::IsScalar(min_freezed_tensor.shape()),
errors::InvalidArgument(
"`min_freezed_output` must be rank 0 but is rank ",
min_freezed_tensor.dims()));
OP_REQUIRES(context,
TensorShapeUtils::IsScalar(max_freezed_tensor.shape()),
errors::InvalidArgument(
"`max_freezed_output` must be rank 0 but is rank ",
max_freezed_tensor.dims()));
min_output_value = min_freezed_tensor.scalar<float>()();
max_output_value = max_freezed_tensor.scalar<float>()();
} else {
ComputeOutputRangeForInt32(context, &min_output_value, &max_output_value);
}
Expand All @@ -344,10 +356,10 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase<Tweight, Toutput> {
void ComputeOutputRangeForInt32(OpKernelContext* context,
float* min_output_value,
float* max_output_value) {
const float min_input = context->input(3).flat<float>()(0);
const float max_input = context->input(4).flat<float>()(0);
const float min_weight = context->input(5).flat<float>()(0);
const float max_weight = context->input(6).flat<float>()(0);
const float min_input = context->input(3).scalar<float>()();
const float max_input = context->input(4).scalar<float>()();
const float min_weight = context->input(5).scalar<float>()();
const float max_weight = context->input(6).scalar<float>()();
MklQuantizationRangeForMultiplication<quint8, qint8, qint32>(
min_input, max_input, min_weight, max_weight, min_output_value,
max_output_value);
Expand All @@ -361,6 +373,25 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase<Tweight, Toutput> {
params.dtypes.append(typeid(Tbias).name());
params.dtypes.append(typeid(Toutput).name());

// min-max values for input and weight should be scalar.
const Tensor& min_input_tensor = context->input(3);
const Tensor& max_input_tensor = context->input(4);
const Tensor& min_weight_tensor = context->input(5);
const Tensor& max_weight_tensor = context->input(6);

OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_input_tensor.shape()),
errors::InvalidArgument("`min_a` must be rank 0 but is rank ",
min_input_tensor.dims()));
OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_input_tensor.shape()),
errors::InvalidArgument("`max_a` must be rank 0 but is rank ",
max_input_tensor.dims()));
OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_weight_tensor.shape()),
errors::InvalidArgument("`min_b` must be rank 0 but is rank ",
min_weight_tensor.dims()));
OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_weight_tensor.shape()),
errors::InvalidArgument("`max_b` must be rank 0 but is rank ",
max_weight_tensor.dims()));

penpornk marked this conversation as resolved.
Show resolved Hide resolved
// When the output type is quint8, the output data is requantized into
// quint8. A post_op "output_scale" is added to do the conversion.
if (std::is_same<Toutput, quint8>::value ||
Expand All @@ -371,8 +402,21 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase<Tweight, Toutput> {
ComputeOutputRangeForInt32(context, &min_output_value, &max_output_value);
float scale_int32 =
std::max(std::abs(min_output_value), std::abs(max_output_value));
const float min_freezed_output = context->input(7).flat<float>()(0);
const float max_freezed_output = context->input(8).flat<float>()(0);
const Tensor& min_freezed_tensor = context->input(7);
const Tensor& max_freezed_tensor = context->input(8);
// min-max values of freezed output range should be scalar.
OP_REQUIRES(context,
TensorShapeUtils::IsScalar(min_freezed_tensor.shape()),
errors::InvalidArgument(
"`min_freezed_output` must be rank 0 but is rank ",
min_freezed_tensor.dims()));
OP_REQUIRES(context,
TensorShapeUtils::IsScalar(max_freezed_tensor.shape()),
errors::InvalidArgument(
"`max_freezed_output` must be rank 0 but is rank ",
max_freezed_tensor.dims()));
const float min_freezed_output = min_freezed_tensor.scalar<float>()();
const float max_freezed_output = max_freezed_tensor.scalar<float>()();
float scale_eightbit =
std::max(std::abs(min_freezed_output), std::abs(max_freezed_output));
float scale = 1.0;
Expand Down
80 changes: 40 additions & 40 deletions tensorflow/core/kernels/mkl/mkl_qmatmul_op_test.cc
Expand Up @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#if defined(INTEL_MKL) && defined(ENABLE_MKL)
#if defined(INTEL_MKL)
#define EIGEN_USE_THREADS

#include <functional>
Expand Down Expand Up @@ -64,10 +64,10 @@ TEST_F(QuantizedMatMulTest, Small_withBias) {
AddInputFromArray<qint8>(TensorShape({3, 4}),
{7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
AddInputFromArray<qint32>(TensorShape({4}), {1, 2, 3, 4});
AddInputFromArray<float>(TensorShape({1}), {0});
AddInputFromArray<float>(TensorShape({1}), {255.0f});
AddInputFromArray<float>(TensorShape({1}), {-127.0f});
AddInputFromArray<float>(TensorShape({1}), {127.0f});
AddInputFromArray<float>(TensorShape({}), {0});
AddInputFromArray<float>(TensorShape({}), {255.0f});
AddInputFromArray<float>(TensorShape({}), {-127.0f});
AddInputFromArray<float>(TensorShape({}), {127.0f});

TF_ASSERT_OK(RunOpKernel());
// Here are the results we expect, from hand calculations:
Expand Down Expand Up @@ -116,10 +116,10 @@ TEST_F(QuantizedMatMulTest, Small_withNegBias) {
AddInputFromArray<qint8>(TensorShape({3, 4}),
{7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
AddInputFromArray<qint32>(TensorShape({4}), {100, -200, 300, -400});
AddInputFromArray<float>(TensorShape({1}), {0});
AddInputFromArray<float>(TensorShape({1}), {255.0f});
AddInputFromArray<float>(TensorShape({1}), {-127.0f});
AddInputFromArray<float>(TensorShape({1}), {127.0f});
AddInputFromArray<float>(TensorShape({}), {0});
AddInputFromArray<float>(TensorShape({}), {255.0f});
AddInputFromArray<float>(TensorShape({}), {-127.0f});
AddInputFromArray<float>(TensorShape({}), {127.0f});

TF_ASSERT_OK(RunOpKernel());
// Here are the results we expect, from hand calculations:
Expand Down Expand Up @@ -178,10 +178,10 @@ TEST_F(QuantizedMatMulTest, Small_WithNegInp) {
AddInputFromArray<qint8>(TensorShape({3, 2}), {1, 4, 2, 5, 3, 6});
// Bias
AddInputFromArray<float>(TensorShape({2}), {10.0f, 20.0f});
AddInputFromArray<float>(TensorShape({1}), {-12.0f});
AddInputFromArray<float>(TensorShape({1}), {243.0f});
AddInputFromArray<float>(TensorShape({1}), {-127.0f});
AddInputFromArray<float>(TensorShape({1}), {127.0f});
AddInputFromArray<float>(TensorShape({}), {-12.0f});
AddInputFromArray<float>(TensorShape({}), {243.0f});
AddInputFromArray<float>(TensorShape({}), {-127.0f});
AddInputFromArray<float>(TensorShape({}), {127.0f});

TF_ASSERT_OK(RunOpKernel());
// First calculate C = A * B,
Expand Down Expand Up @@ -240,12 +240,12 @@ TEST_F(QuantizedMatMulTest, Small_withBiasAndReq) {
AddInputFromArray<qint8>(TensorShape({3, 4}),
{7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
AddInputFromArray<qint32>(TensorShape({4}), {10, -20, 30, -40});
AddInputFromArray<float>(TensorShape({1}), {0});
AddInputFromArray<float>(TensorShape({1}), {255.0f});
AddInputFromArray<float>(TensorShape({1}), {-127.0f});
AddInputFromArray<float>(TensorShape({1}), {127.0f});
AddInputFromArray<float>(TensorShape({1}), {0});
AddInputFromArray<float>(TensorShape({1}), {255.0f});
AddInputFromArray<float>(TensorShape({}), {0});
AddInputFromArray<float>(TensorShape({}), {255.0f});
AddInputFromArray<float>(TensorShape({}), {-127.0f});
AddInputFromArray<float>(TensorShape({}), {127.0f});
AddInputFromArray<float>(TensorShape({}), {0});
AddInputFromArray<float>(TensorShape({}), {255.0f});

TF_ASSERT_OK(RunOpKernel());
// Here are the results we expect, from hand calculations:
Expand Down Expand Up @@ -308,12 +308,12 @@ TEST_F(QuantizedMatMulTest, Small_withBiasAndDeq) {
AddInputFromArray<qint8>(TensorShape({3, 4}),
{7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
AddInputFromArray<qint32>(TensorShape({4}), {10, -20, 30, -40});
AddInputFromArray<float>(TensorShape({1}), {0});
AddInputFromArray<float>(TensorShape({1}), {255.0f});
AddInputFromArray<float>(TensorShape({1}), {-127.0f});
AddInputFromArray<float>(TensorShape({1}), {127.0f});
AddInputFromArray<float>(TensorShape({1}), {0});
AddInputFromArray<float>(TensorShape({1}), {255.0f});
AddInputFromArray<float>(TensorShape({}), {0});
AddInputFromArray<float>(TensorShape({}), {255.0f});
AddInputFromArray<float>(TensorShape({}), {-127.0f});
AddInputFromArray<float>(TensorShape({}), {127.0f});
AddInputFromArray<float>(TensorShape({}), {0});
AddInputFromArray<float>(TensorShape({}), {255.0f});

TF_ASSERT_OK(RunOpKernel());
// Here are the results we expect, from hand calculations:
Expand Down Expand Up @@ -375,10 +375,10 @@ TEST_F(QuantizedMatMulTest, Small_withBiasAndRelu) {
{7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
AddInputFromArray<float>(TensorShape({4}),
{100.0f, -200.0f, 300.0f, -400.0f});
AddInputFromArray<float>(TensorShape({1}), {0});
AddInputFromArray<float>(TensorShape({1}), {255.0f});
AddInputFromArray<float>(TensorShape({1}), {-127.0f});
AddInputFromArray<float>(TensorShape({1}), {127.0f});
AddInputFromArray<float>(TensorShape({}), {0});
AddInputFromArray<float>(TensorShape({}), {255.0f});
AddInputFromArray<float>(TensorShape({}), {-127.0f});
AddInputFromArray<float>(TensorShape({}), {127.0f});

TF_ASSERT_OK(RunOpKernel());
// Here are the results we expect, from hand calculations:
Expand Down Expand Up @@ -431,12 +431,12 @@ TEST_F(QuantizedMatMulTest, Small_withBiasAndReluAndReq) {
AddInputFromArray<qint8>(TensorShape({3, 4}),
{7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
AddInputFromArray<qint32>(TensorShape({4}), {10, -20, 30, -40});
AddInputFromArray<float>(TensorShape({1}), {0});
AddInputFromArray<float>(TensorShape({1}), {255.0f});
AddInputFromArray<float>(TensorShape({1}), {-127.0f});
AddInputFromArray<float>(TensorShape({1}), {127.0f});
AddInputFromArray<float>(TensorShape({1}), {0});
AddInputFromArray<float>(TensorShape({1}), {255.0f});
AddInputFromArray<float>(TensorShape({}), {0});
AddInputFromArray<float>(TensorShape({}), {255.0f});
AddInputFromArray<float>(TensorShape({}), {-127.0f});
AddInputFromArray<float>(TensorShape({}), {127.0f});
AddInputFromArray<float>(TensorShape({}), {0});
AddInputFromArray<float>(TensorShape({}), {255.0f});

TF_ASSERT_OK(RunOpKernel());
// Here are the results we expect, from hand calculations:
Expand Down Expand Up @@ -502,10 +502,10 @@ TEST_F(QuantizedMatMulTest, Small_withWeightCached) {
AddInputFromArray<qint8>(TensorShape({3, 4}),
{7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
AddInputFromArray<qint32>(TensorShape({4}), {1, 2, 3, 4});
AddInputFromArray<float>(TensorShape({1}), {0});
AddInputFromArray<float>(TensorShape({1}), {255.0f});
AddInputFromArray<float>(TensorShape({1}), {-127.0f});
AddInputFromArray<float>(TensorShape({1}), {127.0f});
AddInputFromArray<float>(TensorShape({}), {0});
AddInputFromArray<float>(TensorShape({}), {255.0f});
AddInputFromArray<float>(TensorShape({}), {-127.0f});
AddInputFromArray<float>(TensorShape({}), {127.0f});

int64 start_time = Env::Default()->NowMicros();
TF_ASSERT_OK(RunOpKernel());
Expand Down Expand Up @@ -543,4 +543,4 @@ TEST_F(QuantizedMatMulTest, Small_withWeightCached) {

} // namespace tensorflow

#endif // INTEL_MKL && ENABLE_MKL
#endif // INTEL_MKL