tensorflow · tensorflow-copybara · May 30, 2019 · Mar 19, 2019 · Mar 20, 2019 · Mar 21, 2019
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
@@ -1586,6 +1586,7 @@ cc_library(
         "//tensorflow/core/kernels:quantized_ops",
         "//tensorflow/core/kernels/neon:neon_depthwise_conv_op",
     ]) + if_mkl([
+        "//tensorflow/core/kernels:mkl_aggregate_ops",
         "//tensorflow/core/kernels:mkl_concat_op",
         "//tensorflow/core/kernels:mkl_dequantize_op",
         "//tensorflow/core/kernels:mkl_conv_op",
@@ -1594,16 +1595,16 @@ cc_library(
         "//tensorflow/core/kernels:mkl_identity_op",
         "//tensorflow/core/kernels:mkl_input_conversion_op",
         "//tensorflow/core/kernels:mkl_lrn_op",
-        "//tensorflow/core/kernels:mkl_requantize_ops",
         "//tensorflow/core/kernels:mkl_pooling_ops",
+        "//tensorflow/core/kernels:mkl_qmatmul_op",
+        "//tensorflow/core/kernels:mkl_requantize_ops",
         "//tensorflow/core/kernels:mkl_quantize_op",
         "//tensorflow/core/kernels:mkl_relu_op",
         "//tensorflow/core/kernels:mkl_reshape_op",
         "//tensorflow/core/kernels:mkl_slice_op",
         "//tensorflow/core/kernels:mkl_softmax_op",
         "//tensorflow/core/kernels:mkl_transpose_op",
         "//tensorflow/core/kernels:mkl_tfconv_op",
-        "//tensorflow/core/kernels:mkl_aggregate_ops",
     ]) + if_cuda([
         "//tensorflow/core/grappler/optimizers:gpu_swapping_kernels",
         "//tensorflow/core/grappler/optimizers:gpu_swapping_ops",
@@ -4414,6 +4415,7 @@ tf_cc_test_mkl(
         "//tensorflow/core/kernels:mkl_input_conversion_op",
         "//tensorflow/core/kernels:mkl_lrn_op",
         "//tensorflow/core/kernels:mkl_pooling_ops",
+        "//tensorflow/core/kernels:mkl_qmatmul_op",
         "//tensorflow/core/kernels:mkl_quantize_op",
         "//tensorflow/core/kernels:mkl_relu_op",
         "//tensorflow/core/kernels:mkl_reshape_op",

diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizedMatMulWithBias.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizedMatMulWithBias.pbtxt
@@ -0,0 +1,84 @@
+op {
+  graph_op_name: "QuantizedMatMulWithBias"
+  visibility: HIDDEN
+  in_arg {
+    name: "a"
+    description: <<END
+A matrix to be multiplied. Must be a two-dimensional tensor of type `quint8`.
+END
+  }
+  in_arg {
+    name: "b"
+    description: <<END
+A matrix to be multiplied and must be a two-dimensional tensor of type `qint8`.
+END
+  }
+  in_arg {
+    name: "bias"
+    description: <<END
+A 1D bias tensor with size matching inner dimension of `b` (after being
+transposed if `transposed_b` is non-zero).
+END
+  }
+  in_arg {
+    name: "min_a"
+    description: <<END
+The float value that the lowest quantized `a` value represents.
+END
+  }
+  in_arg {
+    name: "max_a"
+    description: <<END
+The float value that the highest quantized `a` value represents.
+END
+  }
+  in_arg {
+    name: "min_b"
+    description: <<END
+The float value that the lowest quantized `b` value represents.
+END
+  }
+  in_arg {
+    name: "max_b"
+    description: <<END
+The float value that the highest quantized `b` value represents.
+END
+  }
+  out_arg {
+    name: "min_out"
+    description: <<END
+The float value that the lowest quantized output value represents.
+END
+  }
+  out_arg {
+    name: "max_out"
+    description: <<END
+The float value that the highest quantized output value represents.
+END
+  }
+  attr {
+    name: "transpose_a"
+    description: "If true, `a` is transposed before multiplication."
+  }
+  attr {
+    name: "transpose_b"
+    description: "If true, `b` is transposed before multiplication."
+  }
+  attr {
+    name: "input_quant_mode"
+    description: <<END
+Input data quantization mode. Either MIN_FIRST(default) or SCALED.
+END
+  }
+  summary: <<END
+Performs a quantized matrix multiplication of `a` by the matrix `b` with bias
+add.
+END
+  description: <<END
+The inputs must be two-dimensional matrices and 1D bias vector. And the inner
+dimension of `a` (after being transposed if `transpose_a` is non-zero) must
+match the outer dimension of `b` (after being transposed if `transposed_b` is
+non-zero). Then do broadcast add operation with bias values on the matrix
+mulplication result. The bias size must match inner dimension of `b`.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizedMatMulWithBiasAndRelu.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizedMatMulWithBiasAndRelu.pbtxt
@@ -0,0 +1,85 @@
+op {
+  graph_op_name: "QuantizedMatMulWithBiasAndRelu"
+  visibility: HIDDEN
+  in_arg {
+    name: "a"
+    description: <<END
+A matrix to be multiplied. Must be a two-dimensional tensor of type `quint8`.
+END
+  }
+  in_arg {
+    name: "b"
+    description: <<END
+A matrix to be multiplied and must be a two-dimensional tensor of type `qint8`.
+END
+  }
+  in_arg {
+    name: "bias"
+    description: <<END
+A 1D bias tensor with size matching with inner dimension of `b` (after being
+transposed if `transposed_b` is non-zero).
+END
+  }
+  in_arg {
+    name: "min_a"
+    description: <<END
+The float value that the lowest quantized `a` value represents.
+END
+  }
+  in_arg {
+    name: "max_a"
+    description: <<END
+The float value that the highest quantized `a` value represents.
+END
+  }
+  in_arg {
+    name: "min_b"
+    description: <<END
+The float value that the lowest quantized `b` value represents.
+END
+  }
+  in_arg {
+    name: "max_b"
+    description: <<END
+The float value that the highest quantized `b` value represents.
+END
+  }
+  out_arg {
+    name: "min_out"
+    description: <<END
+The float value that the lowest quantized output value represents.
+END
+  }
+  out_arg {
+    name: "max_out"
+    description: <<END
+The float value that the highest quantized output value represents.
+END
+  }
+  attr {
+    name: "transpose_a"
+    description: "If true, `a` is transposed before multiplication."
+  }
+  attr {
+    name: "transpose_b"
+    description: "If true, `b` is transposed before multiplication."
+  }
+  attr {
+    name: "input_quant_mode"
+    description: <<END
+Input data quantization mode. Either MIN_FIRST(default) or SCALED.
+END
+  }
+  summary: <<END
+Perform a quantized matrix multiplication of  `a` by the matrix `b` with bias
+add and relu fusion.
+END
+  description: <<END
+The inputs must be two-dimensional matrices and 1D bias vector. And the inner
+dimension of `a` (after being transposed if `transpose_a` is non-zero) must
+match the outer dimension of `b` (after being transposed if `transposed_b` is
+non-zero). Then do broadcast add operation with bias values on the matrix
+mulplication result. The bias size must match inner dimension of `b`. Then do
+relu activation to get non-negative result.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizedMatMulWithBiasAndReluAndRequantize.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizedMatMulWithBiasAndReluAndRequantize.pbtxt
@@ -0,0 +1,99 @@
+op {
+  graph_op_name: "QuantizedMatMulWithBiasAndReluAndRequantize"
+  visibility: HIDDEN
+  in_arg {
+    name: "a"
+    description: <<END
+A matrix to be multiplied. Must be a two-dimensional tensor of type `quint8`.
+END
+  }
+  in_arg {
+    name: "b"
+    description: <<END
+A matrix to be multiplied and must be a two-dimensional tensor of type `qint8`.
+END
+  }
+  in_arg {
+    name: "bias"
+    description: <<END
+A 1D bias tensor with size matching with inner dimension of `b` (after being
+transposed if `transposed_b` is non-zero).
+END
+  }
+  in_arg {
+    name: "min_a"
+    description: <<END
+The float value that the lowest quantized `a` value represents.
+END
+  }
+  in_arg {
+    name: "max_a"
+    description: <<END
+The float value that the highest quantized `a` value represents.
+END
+  }
+  in_arg {
+    name: "min_b"
+    description: <<END
+The float value that the lowest quantized `b` value represents.
+END
+  }
+  in_arg {
+    name: "max_b"
+    description: <<END
+The float value that the highest quantized `b` value represents.
+END
+  }
+  in_arg {
+    name: "min_freezed_output"
+    description: <<END
+The float value that the lowest quantized output value after requantize.
+END
+  }
+  in_arg {
+    name: "min_freezed_output"
+    description: <<END
+The float value that the highest quantized output value after requantize.
+END
+  }
+
+  out_arg {
+    name: "min_out"
+    description: <<END
+The float value that the lowest quantized output value represents.
+END
+  }
+  out_arg {
+    name: "max_out"
+    description: <<END
+The float value that the highest quantized output value represents.
+END
+  }
+  attr {
+    name: "transpose_a"
+    description: "If true, `a` is transposed before multiplication."
+  }
+  attr {
+    name: "transpose_b"
+    description: "If true, `b` is transposed before multiplication."
+  }
+  attr {
+    name: "input_quant_mode"
+    description: <<END
+Input data quantization mode. Either MIN_FIRST(default) or SCALED.
+END
+  }
+  summary: <<END
+Perform a quantized matrix multiplication of  `a` by the matrix `b` with bias
+add and relu and requantize fusion.
+END
+  description: <<END
+The inputs must be two-dimensional matrices and 1D bias vector. And the inner
+dimension of `a` (after being transposed if `transpose_a` is non-zero) must
+match the outer dimension of `b` (after being transposed if `transposed_b` is
+non-zero). Then do broadcast add operation with bias values on the matrix
+mulplication result. The bias size must match inner dimension of `b`.  Then do
+relu activation to get non-negative result. Then do requantize operation to get
+final uint8 result.
+END
+}
diff --git a/tensorflow/core/api_def/excluded_ops.cc b/tensorflow/core/api_def/excluded_ops.cc
@@ -19,21 +19,25 @@ namespace tensorflow {
 
 const std::unordered_set<std::string>* GetExcludedOps() {
   static std::unordered_set<std::string>* excluded_ops =
-      new std::unordered_set<std::string>(
-          {"BigQueryReader", "GenerateBigQueryReaderPartitions",
-           "GcsConfigureBlockCache", "GcsConfigureCredentials",
+      new std::unordered_set<std::string>({
+          "BigQueryReader", "GenerateBigQueryReaderPartitions",
+          "GcsConfigureBlockCache", "GcsConfigureCredentials",
 #ifdef INTEL_MKL
-           // QuantizedFusedOps for Intel CPU
-           "QuantizedConcatV2", "QuantizedConv2DAndRequantize",
-           "QuantizedConv2DWithBias", "QuantizedConv2DWithBiasAndRequantize",
-           "QuantizedConv2DAndRelu", "QuantizedConv2DAndReluAndRequantize",
-           "QuantizedConv2DWithBiasAndRelu",
-           "QuantizedConv2DWithBiasAndReluAndRequantize",
-           "QuantizedConv2DWithBiasSumAndRelu",
-           "QuantizedConv2DWithBiasSumAndReluAndRequantize",
-           "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize"
+          // QuantizedFusedOps for Intel CPU
+          "QuantizedConcatV2", "QuantizedConv2DAndRequantize",
+          "QuantizedConv2DWithBias", "QuantizedConv2DWithBiasAndRequantize",
+          "QuantizedConv2DAndRelu", "QuantizedConv2DAndReluAndRequantize",
+          "QuantizedConv2DWithBiasAndRelu",
+          "QuantizedConv2DWithBiasAndReluAndRequantize",
+          "QuantizedConv2DWithBiasSumAndRelu",
+          "QuantizedConv2DWithBiasSumAndReluAndRequantize",
+          "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize",
+          "QuantizedMatMulWithBias"
+          "QuantizedMatMulWithBiasAndRelu"
+          "QuantizedMatMulWithBiasAndReluAndRequantize",
+
 #endif  // INTEL_MKL
-          });
+      });
   return excluded_ops;
 }
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
@@ -7221,6 +7221,27 @@ tf_cc_test(
     ],
 )
 
+tf_mkl_kernel_library(
+    name = "mkl_qmatmul_op",
+    srcs = ["mkl_qmatmul_op.cc"],
+    hdrs = [
+        "mkl_quantized_conv_ops.h",
+        "no_op.h",
+    ],
+    deps = [
+        ":bounds_check",
+        ":matmul_op",
+        ":ops_util",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:math_ops_op_lib",
+        "//tensorflow/core:mkl_nn_ops_op_lib",
+        "//tensorflow/core:nn_ops_op_lib",
+    ] + mkl_deps(),
+)
+
 tf_mkl_kernel_library(
     name = "mkl_conv_op",
     hdrs = [