tensorflow · tensorflow-copybara · Nov 26, 2019 · Nov 1, 2019 · Nov 12, 2019 · Nov 13, 2019
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -328,6 +328,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
         "QuantizedMatMulWithBiasAndRelu";
     csinfo_.quantized_matmul_with_bias_and_relu_and_requantize =
         "QuantizedMatMulWithBiasAndReluAndRequantize";
+    csinfo_.quantized_matmul_with_bias_and_requantize =
+        "QuantizedMatMulWithBiasAndRequantize";
     csinfo_.quantized_depthwise_conv2d = "QuantizedDepthwiseConv2D";
     csinfo_.quantized_depthwise_conv2d_with_bias =
         "QuantizedDepthwiseConv2DWithBias";
@@ -621,6 +623,10 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
          mkl_op_registry::GetMklOpName(
              csinfo_.quantized_matmul_with_bias_and_relu_and_requantize),
          CopyAttrsQuantizedMatMulWithBias, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.quantized_matmul_with_bias_and_requantize,
+                      mkl_op_registry::GetMklOpName(
+                          csinfo_.quantized_matmul_with_bias_and_requantize),
+                      CopyAttrsQuantizedMatMulWithBias, AlwaysRewrite});
     rinfo_.push_back(
         {csinfo_.quantized_depthwise_conv2d,
          mkl_op_registry::GetMklOpName(csinfo_.quantized_depthwise_conv2d),
@@ -955,6 +961,7 @@ rinfo_.push_back({csinfo_.tanh_grad,
     string quantized_matmul_with_bias;
     string quantized_matmul_with_bias_and_relu;
     string quantized_matmul_with_bias_and_relu_and_requantize;
+    string quantized_matmul_with_bias_and_requantize;
     string quantized_depthwise_conv2d;
     string quantized_depthwise_conv2d_with_bias;
     string quantized_depthwise_conv2d_with_bias_and_relu;
@@ -2218,6 +2225,7 @@ Status MklLayoutRewritePass::SetUpInputs(
       "QuantizedConv2DWithBiasSumAndReluAndRequantize",
       "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize",
       "QuantizedMatMulWithBias",
+      "QuantizedMatMulWithBiasAndRequantize",
       "QuantizedMatMulWithBiasAndRelu",
       "QuantizedMatMulWithBiasAndReluAndRequantize",
       "QuantizedDepthwiseConv2D",

diff --git a/tensorflow/core/kernels/mkl_qmatmul_op.cc b/tensorflow/core/kernels/mkl_qmatmul_op.cc
@@ -614,6 +614,17 @@ REGISTER_KERNEL_BUILDER(Name("QuantizedMatMulWithBiasAndReluAndRequantize")
                             .TypeConstraint<quint8>("Toutput"),
                         NoOp);
 
+// Register NoOp kernel for QuantizedIPWithBiasAndRequantize
+// to get a python interface. This kernel will be replaced by an MKL kernel
+// during graph-optimization pass.
+REGISTER_KERNEL_BUILDER(Name("QuantizedMatMulWithBiasAndRequantize")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<quint8>("T1")
+                            .TypeConstraint<qint8>("T2")
+                            .TypeConstraint("Tbias", {DT_QINT32, DT_FLOAT})
+                            .TypeConstraint<quint8>("Toutput"),
+                        NoOp);
+
 // Register a templatized implementation of _MklQuantizedMatMulWithBiasAndRelu.
 REGISTER_KERNEL_BUILDER(
     Name("_MklQuantizedMatMulWithBiasAndRelu")
@@ -644,6 +655,27 @@ REGISTER_KERNEL_BUILDER(
         .Label(mkl_op_registry::kMklQuantizedOpLabel),
     MklDnnQuantizedMatMulReluOp<CPUDevice, quint8, qint8, float, quint8>);
 
+// Register a templatized implementation of
+// _MklQuantizedMatMulWithBiasAndRequantize.
+REGISTER_KERNEL_BUILDER(
+    Name("_MklQuantizedMatMulWithBiasAndRequantize")
+        .Device(DEVICE_CPU)
+        .TypeConstraint<quint8>("T1")
+        .TypeConstraint<qint8>("T2")
+        .TypeConstraint<qint32>("Tbias")
+        .TypeConstraint<quint8>("Toutput")
+        .Label(mkl_op_registry::kMklQuantizedOpLabel),
+    MklDnnQuantizedMatMulOp<CPUDevice, quint8, qint8, qint32, quint8>);
+REGISTER_KERNEL_BUILDER(
+    Name("_MklQuantizedMatMulWithBiasAndRequantize")
+        .Device(DEVICE_CPU)
+        .TypeConstraint<quint8>("T1")
+        .TypeConstraint<qint8>("T2")
+        .TypeConstraint<float>("Tbias")
+        .TypeConstraint<quint8>("Toutput")
+        .Label(mkl_op_registry::kMklQuantizedOpLabel),
+    MklDnnQuantizedMatMulOp<CPUDevice, quint8, qint8, float, quint8>);
+
 }  // namespace tensorflow
 
 #endif  // INTEL_MKL
diff --git a/tensorflow/core/kernels/mkl_qmatmul_op_test.cc b/tensorflow/core/kernels/mkl_qmatmul_op_test.cc
@@ -300,6 +300,99 @@ TEST_F(QuantizedMatMulTest, Small_WithNegInp) {
   test::ExpectTensorEqual<qint32>(expected, output_quantized);
 }
 
+// Two small matrices A of type uint8 and B of type int8  are multiplied
+// and the result is added with int32 bias and Requantization fusion
+TEST_F(QuantizedMatMulTest, Small_withBiasAndReq) {
+  TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op",
+                              "_MklQuantizedMatMulWithBiasAndRequantize")
+                   .Input(FakeInput(DT_QUINT8))
+                   .Input(FakeInput(DT_QINT8))
+                   .Input(FakeInput(DT_QINT32))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Input(FakeInput(DT_UINT8))  // MKL second tensor
+                   .Input(FakeInput(DT_UINT8))  // MKL second tensor
+                   .Input(FakeInput(DT_UINT8))  // MKL second tensor
+                   .Input(FakeInput(DT_UINT8))  // MKL second tensor
+                   .Input(FakeInput(DT_UINT8))  // MKL second tensor
+                   .Input(FakeInput(DT_UINT8))  // MKL second tensor
+                   .Input(FakeInput(DT_UINT8))  // MKL second tensor
+                   .Input(FakeInput(DT_UINT8))  // MKL second tensor
+                   .Input(FakeInput(DT_UINT8))  // MKL second tensor
+                   .Attr("Toutput", DataTypeToEnum<quint8>::v())
+                   .Attr("T", DataTypeToEnum<quint8>::v())
+                   .Attr("_kernel", "QuantizedMklOp")
+                   .Finalize(node_def()));
+  TF_ASSERT_OK(InitOp());
+  // A matrix is:
+  // |  1 |  2 |  3 |
+  // |  4 |  5 |  6 |
+  AddInputFromArray<quint8>(TensorShape({2, 3}), {1, 2, 3, 4, 5, 6});
+  // B matrix is:
+  // |  7 |  8 |  9 | 10 |
+  // | 11 | 12 | 13 | 14 |
+  // | 15 | 16 | 17 | 18 |
+  AddInputFromArray<qint8>(TensorShape({3, 4}),
+                           {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
+  AddInputFromArray<qint32>(TensorShape({4}), {10, -20, 30, -40});
+  AddInputFromArray<float>(TensorShape({1}), {0});
+  AddInputFromArray<float>(TensorShape({1}), {255.0f});
+  AddInputFromArray<float>(TensorShape({1}), {-127.0f});
+  AddInputFromArray<float>(TensorShape({1}), {127.0f});
+  AddInputFromArray<float>(TensorShape({1}), {0});
+  AddInputFromArray<float>(TensorShape({1}), {255.0f});
+  AddInputFromArray<uint8>(kDummyShape, kDummyTensor);
+  AddInputFromArray<uint8>(kDummyShape, kDummyTensor);
+  AddInputFromArray<uint8>(kDummyShape, kDummyTensor);
+  AddInputFromArray<uint8>(kDummyShape, kDummyTensor);
+  AddInputFromArray<uint8>(kDummyShape, kDummyTensor);
+  AddInputFromArray<uint8>(kDummyShape, kDummyTensor);
+  AddInputFromArray<uint8>(kDummyShape, kDummyTensor);
+  AddInputFromArray<uint8>(kDummyShape, kDummyTensor);
+  AddInputFromArray<uint8>(kDummyShape, kDummyTensor);
+
+  TF_ASSERT_OK(RunOpKernel());
+  // Here are the results we expect, from hand calculations:
+  // (1 * 7) + (2 * 11) + (3 * 15) = 74
+  // (1 * 8) + (2 * 12) + (3 * 16) = 80
+  // (1 * 9) + (2 * 13) + (3 * 17) = 86
+  // (1 * 10) + (2 * 14) + (3 * 18) = 92
+  // (4 * 7) + (5 * 11) + (6 * 15) = 173
+  // (4 * 8) + (5 * 12) + (6 * 16) = 188
+  // (4 * 9) + (5 * 13) + (6 * 17) = 203
+  // (4 * 10) + (5 * 14) + (6 * 18) = 218
+  // After Bias addition
+  // 74+10=84, 80-20=60, 86+30=116, 92-40=52,
+  // 173+10=183, 188-20=168, 203+30=233, 218-40=178
+  // After Requantize
+  // requantscale = scale_int32 / scale_eightbit / static_cast<float>(1 << 23)
+  // requantscale = 2^31/255/2^23 ~= 1.00392
+  // 84 * 1.00392 ~= 84.329 ~= 84
+  // 60 * 1.00392 ~= 60.235 ~= 60
+  // 116 * 1.00392 ~= 116.454 ~= 116
+  // 52 * 1.00392 ~= 52.203 ~= 52
+  // 183 * 1.00392 ~= 183.717 ~= 184
+  // 168 * 1.00392 ~= 168.658 ~= 169
+  // 233 * 1.00392 ~= 233.913 ~= 234
+  // 178 * 1.00392 ~= 178.698 ~= 179
+
+  Tensor expected(allocator(), DT_QUINT8, TensorShape({2, 4}));
+  test::FillValues<quint8>(&expected, {84, 60, 116, 52, 184, 169, 234, 179});
+
+  const Tensor& output = *GetOutput(0);
+  const Tensor& mkl_shape_tensor = *GetOutput(3);
+  ConvMklToTF conv_comp;
+  Tensor output_quantized;
+  conv_comp.ConvertMKL2TF<quint8>(DT_QUINT8, output, mkl_shape_tensor,
+                                  output_quantized);
+
+  test::ExpectTensorEqual<quint8>(expected, output_quantized);
+}
+
 // Two small matrices A of type uint8 and B of type int8  are multiplied
 // and the result is added with float bias and then performed relu on the result
 TEST_F(QuantizedMatMulTest, Small_withBiasAndRelu) {

diff --git a/tensorflow/core/ops/mkl_nn_ops.cc b/tensorflow/core/ops/mkl_nn_ops.cc
@@ -974,6 +974,56 @@ REGISTER_OP("_MklQuantizedMatMulWithBiasAndReluAndRequantize")
       return Status::OK();
     });
 
+REGISTER_OP("_MklQuantizedMatMulWithBiasAndRequantize")
+    .Input("a: T1")
+    .Input("b: T2")
+    .Input("bias: Tbias")
+    .Input("min_a: float")
+    .Input("max_a: float")
+    .Input("min_b: float")
+    .Input("max_b: float")
+    .Input("min_freezed_output: float")
+    .Input("max_freezed_output: float")
+    .Input("mkl_a: uint8")                   // MKL second tensor
+    .Input("mkl_b: uint8")                   // MKL second tensor
+    .Input("mkl_bias: uint8")                // MKL second tensor
+    .Input("mkl_min_a: uint8")               // MKL second tensor
+    .Input("mkl_max_a: uint8")               // MKL second tensor
+    .Input("mkl_min_b: uint8")               // MKL second tensor
+    .Input("mkl_max_b: uint8")               // MKL second tensor
+    .Input("mkl_min_freezed_output: uint8")  // MKL second tensor
+    .Input("mkl_max_freezed_output: uint8")  // MKL second tensor
+    .Output("out: Toutput")
+    .Output("min_out: float")
+    .Output("max_out: float")
+    .Output("mkl_out: uint8")      // MKL second tensor
+    .Output("mkl_min_out: uint8")  // MKL second tensor
+    .Output("mkl_max_out: uint8")  // MKL second tensor
+    .Attr("T1: quantizedtype")
+    .Attr("T2: quantizedtype")
+    .Attr("Tbias: {float, qint32}")
+    .Attr("T: quantizedtype")  // Additional attr "T" for MklToTf conversion
+    .Attr("Toutput: quantizedtype = DT_QUINT8")
+    .Attr("transpose_a: bool = false")
+    .Attr("transpose_b: bool = false")
+    .Attr("input_quant_mode: {'MIN_FIRST', 'SCALED'} = 'MIN_FIRST'")
+    .Attr("is_weight_const: bool = true")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::MatMulShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(6), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(7), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(8), 0, &unused));
+
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    });
+
 REGISTER_OP("_MklQuantizedDepthwiseConv2D")
     .Input("input: Tinput")
     .Input("filter: Tfilter")

diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
@@ -1281,9 +1281,9 @@ Status TopKShapeFn(InferenceContext* c) {
   DimensionHandle last_dim = c->Dim(input, -1);
   if (c->ValueKnown(last_dim) && c->ValueKnown(k_dim) &&
       c->Value(last_dim) < c->Value(k_dim)) {
-    return errors::InvalidArgument(
-        "input must have last dimension >= k = ", c->Value(k_dim), " but is ",
-        c->Value(last_dim));
+    return errors::InvalidArgument("input must have last dimension >= k = ",
+                                   c->Value(k_dim), " but is ",
+                                   c->Value(last_dim));
   }
 
   // Replace last_dim with k_dim.
@@ -1337,9 +1337,9 @@ REGISTER_OP("NthElement")
       DimensionHandle last_dim = c->Dim(input, -1);
       if (c->ValueKnown(last_dim) && c->ValueKnown(n_dim) &&
           c->Value(last_dim) <= c->Value(n_dim)) {
-        return errors::InvalidArgument(
-            "Input must have last dimension > n = ", c->Value(n_dim),
-            " but is ", c->Value(last_dim));
+        return errors::InvalidArgument("Input must have last dimension > n = ",
+                                       c->Value(n_dim), " but is ",
+                                       c->Value(last_dim));
       }
 
       // Reduce last_dim for output tensor
@@ -3184,6 +3184,41 @@ REGISTER_OP("QuantizedMatMulWithBiasAndReluAndRequantize")
       return Status::OK();
     });
 
+REGISTER_OP("QuantizedMatMulWithBiasAndRequantize")
+    .Input("a: T1")
+    .Input("b: T2")
+    .Input("bias: Tbias")
+    .Input("min_a: float")
+    .Input("max_a: float")
+    .Input("min_b: float")
+    .Input("max_b: float")
+    .Input("min_freezed_output: float")
+    .Input("max_freezed_output: float")
+    .Output("out: Toutput")
+    .Output("min_out: float")
+    .Output("max_out: float")
+    .Attr("T1: quantizedtype")
+    .Attr("T2: quantizedtype")
+    .Attr("Tbias: {float, qint32}")
+    .Attr("Toutput: quantizedtype = DT_QUINT8")
+    .Attr("transpose_a: bool = false")
+    .Attr("transpose_b: bool = false")
+    .Attr("input_quant_mode: {'MIN_FIRST', 'SCALED'} = 'MIN_FIRST'")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::MatMulShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(6), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(7), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(8), 0, &unused));
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    });
+
 REGISTER_OP("QuantizedConv2DPerChannel")
     .Input("input: Tinput")
     .Input("filter: Tfilter")