Skip to content
Permalink
Browse files Browse the repository at this point in the history
Merge pull request #59437 from Intel-tensorflow:amin/fix-qmatmul
PiperOrigin-RevId: 504745468
  • Loading branch information
tensorflower-gardener committed Jan 26, 2023
2 parents 6351f10 + 2648c81 commit 8a47a39
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 48 deletions.
60 changes: 52 additions & 8 deletions tensorflow/core/kernels/mkl/mkl_qmatmul_op.cc
Expand Up @@ -317,8 +317,20 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase<Tweight, Toutput> {
// This is the case the inner-product and requantization are fused.
// "min_freezed_output" and "max_freezed_output" are the requested range
// for the output.
min_output_value = context->input(7).flat<float>()(0);
max_output_value = context->input(8).flat<float>()(0);
const Tensor& min_freezed_tensor = context->input(7);
const Tensor& max_freezed_tensor = context->input(8);
OP_REQUIRES(context,
TensorShapeUtils::IsScalar(min_freezed_tensor.shape()),
errors::InvalidArgument(
"`min_freezed_output` must be rank 0 but is rank ",
min_freezed_tensor.dims()));
OP_REQUIRES(context,
TensorShapeUtils::IsScalar(max_freezed_tensor.shape()),
errors::InvalidArgument(
"`max_freezed_output` must be rank 0 but is rank ",
max_freezed_tensor.dims()));
min_output_value = min_freezed_tensor.scalar<float>()();
max_output_value = max_freezed_tensor.scalar<float>()();
} else {
ComputeOutputRangeForInt32(context, &min_output_value, &max_output_value);
}
Expand All @@ -344,10 +356,10 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase<Tweight, Toutput> {
void ComputeOutputRangeForInt32(OpKernelContext* context,
float* min_output_value,
float* max_output_value) {
const float min_input = context->input(3).flat<float>()(0);
const float max_input = context->input(4).flat<float>()(0);
const float min_weight = context->input(5).flat<float>()(0);
const float max_weight = context->input(6).flat<float>()(0);
const float min_input = context->input(3).scalar<float>()();
const float max_input = context->input(4).scalar<float>()();
const float min_weight = context->input(5).scalar<float>()();
const float max_weight = context->input(6).scalar<float>()();
MklQuantizationRangeForMultiplication<quint8, qint8, qint32>(
min_input, max_input, min_weight, max_weight, min_output_value,
max_output_value);
Expand All @@ -361,6 +373,25 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase<Tweight, Toutput> {
params.dtypes.append(typeid(Tbias).name());
params.dtypes.append(typeid(Toutput).name());

// min-max values for input and weight should be scalar.
const Tensor& min_input_tensor = context->input(3);
const Tensor& max_input_tensor = context->input(4);
const Tensor& min_weight_tensor = context->input(5);
const Tensor& max_weight_tensor = context->input(6);

OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_input_tensor.shape()),
errors::InvalidArgument("`min_a` must be rank 0 but is rank ",
min_input_tensor.dims()));
OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_input_tensor.shape()),
errors::InvalidArgument("`max_a` must be rank 0 but is rank ",
max_input_tensor.dims()));
OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_weight_tensor.shape()),
errors::InvalidArgument("`min_b` must be rank 0 but is rank ",
min_weight_tensor.dims()));
OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_weight_tensor.shape()),
errors::InvalidArgument("`max_b` must be rank 0 but is rank ",
max_weight_tensor.dims()));

// When the output type is quint8, the output data is requantized into
// quint8. A post_op "output_scale" is added to do the conversion.
if (std::is_same<Toutput, quint8>::value ||
Expand All @@ -371,8 +402,21 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase<Tweight, Toutput> {
ComputeOutputRangeForInt32(context, &min_output_value, &max_output_value);
float scale_int32 =
std::max(std::abs(min_output_value), std::abs(max_output_value));
const float min_freezed_output = context->input(7).flat<float>()(0);
const float max_freezed_output = context->input(8).flat<float>()(0);
const Tensor& min_freezed_tensor = context->input(7);
const Tensor& max_freezed_tensor = context->input(8);
// min-max values of freezed output range should be scalar.
OP_REQUIRES(context,
TensorShapeUtils::IsScalar(min_freezed_tensor.shape()),
errors::InvalidArgument(
"`min_freezed_output` must be rank 0 but is rank ",
min_freezed_tensor.dims()));
OP_REQUIRES(context,
TensorShapeUtils::IsScalar(max_freezed_tensor.shape()),
errors::InvalidArgument(
"`max_freezed_output` must be rank 0 but is rank ",
max_freezed_tensor.dims()));
const float min_freezed_output = min_freezed_tensor.scalar<float>()();
const float max_freezed_output = max_freezed_tensor.scalar<float>()();
float scale_eightbit =
std::max(std::abs(min_freezed_output), std::abs(max_freezed_output));
float scale = 1.0;
Expand Down
80 changes: 40 additions & 40 deletions tensorflow/core/kernels/mkl/mkl_qmatmul_op_test.cc
Expand Up @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#if defined(INTEL_MKL) && defined(ENABLE_MKL)
#if defined(INTEL_MKL)
#define EIGEN_USE_THREADS

#include <functional>
Expand Down Expand Up @@ -64,10 +64,10 @@ TEST_F(QuantizedMatMulTest, Small_withBias) {
AddInputFromArray<qint8>(TensorShape({3, 4}),
{7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
AddInputFromArray<qint32>(TensorShape({4}), {1, 2, 3, 4});
AddInputFromArray<float>(TensorShape({1}), {0});
AddInputFromArray<float>(TensorShape({1}), {255.0f});
AddInputFromArray<float>(TensorShape({1}), {-127.0f});
AddInputFromArray<float>(TensorShape({1}), {127.0f});
AddInputFromArray<float>(TensorShape({}), {0});
AddInputFromArray<float>(TensorShape({}), {255.0f});
AddInputFromArray<float>(TensorShape({}), {-127.0f});
AddInputFromArray<float>(TensorShape({}), {127.0f});

TF_ASSERT_OK(RunOpKernel());
// Here are the results we expect, from hand calculations:
Expand Down Expand Up @@ -116,10 +116,10 @@ TEST_F(QuantizedMatMulTest, Small_withNegBias) {
AddInputFromArray<qint8>(TensorShape({3, 4}),
{7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
AddInputFromArray<qint32>(TensorShape({4}), {100, -200, 300, -400});
AddInputFromArray<float>(TensorShape({1}), {0});
AddInputFromArray<float>(TensorShape({1}), {255.0f});
AddInputFromArray<float>(TensorShape({1}), {-127.0f});
AddInputFromArray<float>(TensorShape({1}), {127.0f});
AddInputFromArray<float>(TensorShape({}), {0});
AddInputFromArray<float>(TensorShape({}), {255.0f});
AddInputFromArray<float>(TensorShape({}), {-127.0f});
AddInputFromArray<float>(TensorShape({}), {127.0f});

TF_ASSERT_OK(RunOpKernel());
// Here are the results we expect, from hand calculations:
Expand Down Expand Up @@ -178,10 +178,10 @@ TEST_F(QuantizedMatMulTest, Small_WithNegInp) {
AddInputFromArray<qint8>(TensorShape({3, 2}), {1, 4, 2, 5, 3, 6});
// Bias
AddInputFromArray<float>(TensorShape({2}), {10.0f, 20.0f});
AddInputFromArray<float>(TensorShape({1}), {-12.0f});
AddInputFromArray<float>(TensorShape({1}), {243.0f});
AddInputFromArray<float>(TensorShape({1}), {-127.0f});
AddInputFromArray<float>(TensorShape({1}), {127.0f});
AddInputFromArray<float>(TensorShape({}), {-12.0f});
AddInputFromArray<float>(TensorShape({}), {243.0f});
AddInputFromArray<float>(TensorShape({}), {-127.0f});
AddInputFromArray<float>(TensorShape({}), {127.0f});

TF_ASSERT_OK(RunOpKernel());
// First calculate C = A * B,
Expand Down Expand Up @@ -240,12 +240,12 @@ TEST_F(QuantizedMatMulTest, Small_withBiasAndReq) {
AddInputFromArray<qint8>(TensorShape({3, 4}),
{7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
AddInputFromArray<qint32>(TensorShape({4}), {10, -20, 30, -40});
AddInputFromArray<float>(TensorShape({1}), {0});
AddInputFromArray<float>(TensorShape({1}), {255.0f});
AddInputFromArray<float>(TensorShape({1}), {-127.0f});
AddInputFromArray<float>(TensorShape({1}), {127.0f});
AddInputFromArray<float>(TensorShape({1}), {0});
AddInputFromArray<float>(TensorShape({1}), {255.0f});
AddInputFromArray<float>(TensorShape({}), {0});
AddInputFromArray<float>(TensorShape({}), {255.0f});
AddInputFromArray<float>(TensorShape({}), {-127.0f});
AddInputFromArray<float>(TensorShape({}), {127.0f});
AddInputFromArray<float>(TensorShape({}), {0});
AddInputFromArray<float>(TensorShape({}), {255.0f});

TF_ASSERT_OK(RunOpKernel());
// Here are the results we expect, from hand calculations:
Expand Down Expand Up @@ -308,12 +308,12 @@ TEST_F(QuantizedMatMulTest, Small_withBiasAndDeq) {
AddInputFromArray<qint8>(TensorShape({3, 4}),
{7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
AddInputFromArray<qint32>(TensorShape({4}), {10, -20, 30, -40});
AddInputFromArray<float>(TensorShape({1}), {0});
AddInputFromArray<float>(TensorShape({1}), {255.0f});
AddInputFromArray<float>(TensorShape({1}), {-127.0f});
AddInputFromArray<float>(TensorShape({1}), {127.0f});
AddInputFromArray<float>(TensorShape({1}), {0});
AddInputFromArray<float>(TensorShape({1}), {255.0f});
AddInputFromArray<float>(TensorShape({}), {0});
AddInputFromArray<float>(TensorShape({}), {255.0f});
AddInputFromArray<float>(TensorShape({}), {-127.0f});
AddInputFromArray<float>(TensorShape({}), {127.0f});
AddInputFromArray<float>(TensorShape({}), {0});
AddInputFromArray<float>(TensorShape({}), {255.0f});

TF_ASSERT_OK(RunOpKernel());
// Here are the results we expect, from hand calculations:
Expand Down Expand Up @@ -375,10 +375,10 @@ TEST_F(QuantizedMatMulTest, Small_withBiasAndRelu) {
{7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
AddInputFromArray<float>(TensorShape({4}),
{100.0f, -200.0f, 300.0f, -400.0f});
AddInputFromArray<float>(TensorShape({1}), {0});
AddInputFromArray<float>(TensorShape({1}), {255.0f});
AddInputFromArray<float>(TensorShape({1}), {-127.0f});
AddInputFromArray<float>(TensorShape({1}), {127.0f});
AddInputFromArray<float>(TensorShape({}), {0});
AddInputFromArray<float>(TensorShape({}), {255.0f});
AddInputFromArray<float>(TensorShape({}), {-127.0f});
AddInputFromArray<float>(TensorShape({}), {127.0f});

TF_ASSERT_OK(RunOpKernel());
// Here are the results we expect, from hand calculations:
Expand Down Expand Up @@ -431,12 +431,12 @@ TEST_F(QuantizedMatMulTest, Small_withBiasAndReluAndReq) {
AddInputFromArray<qint8>(TensorShape({3, 4}),
{7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
AddInputFromArray<qint32>(TensorShape({4}), {10, -20, 30, -40});
AddInputFromArray<float>(TensorShape({1}), {0});
AddInputFromArray<float>(TensorShape({1}), {255.0f});
AddInputFromArray<float>(TensorShape({1}), {-127.0f});
AddInputFromArray<float>(TensorShape({1}), {127.0f});
AddInputFromArray<float>(TensorShape({1}), {0});
AddInputFromArray<float>(TensorShape({1}), {255.0f});
AddInputFromArray<float>(TensorShape({}), {0});
AddInputFromArray<float>(TensorShape({}), {255.0f});
AddInputFromArray<float>(TensorShape({}), {-127.0f});
AddInputFromArray<float>(TensorShape({}), {127.0f});
AddInputFromArray<float>(TensorShape({}), {0});
AddInputFromArray<float>(TensorShape({}), {255.0f});

TF_ASSERT_OK(RunOpKernel());
// Here are the results we expect, from hand calculations:
Expand Down Expand Up @@ -502,10 +502,10 @@ TEST_F(QuantizedMatMulTest, Small_withWeightCached) {
AddInputFromArray<qint8>(TensorShape({3, 4}),
{7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
AddInputFromArray<qint32>(TensorShape({4}), {1, 2, 3, 4});
AddInputFromArray<float>(TensorShape({1}), {0});
AddInputFromArray<float>(TensorShape({1}), {255.0f});
AddInputFromArray<float>(TensorShape({1}), {-127.0f});
AddInputFromArray<float>(TensorShape({1}), {127.0f});
AddInputFromArray<float>(TensorShape({}), {0});
AddInputFromArray<float>(TensorShape({}), {255.0f});
AddInputFromArray<float>(TensorShape({}), {-127.0f});
AddInputFromArray<float>(TensorShape({}), {127.0f});

int64 start_time = Env::Default()->NowMicros();
TF_ASSERT_OK(RunOpKernel());
Expand Down Expand Up @@ -543,4 +543,4 @@ TEST_F(QuantizedMatMulTest, Small_withWeightCached) {

} // namespace tensorflow

#endif // INTEL_MKL && ENABLE_MKL
#endif // INTEL_MKL

0 comments on commit 8a47a39

Please sign in to comment.