Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[INTEL MKL] Add MKL-DNN quantized Matmul op with some fusions - Part1. #26909

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 4 additions & 2 deletions tensorflow/core/BUILD
Expand Up @@ -1586,6 +1586,7 @@ cc_library(
"//tensorflow/core/kernels:quantized_ops",
"//tensorflow/core/kernels/neon:neon_depthwise_conv_op",
]) + if_mkl([
"//tensorflow/core/kernels:mkl_aggregate_ops",
"//tensorflow/core/kernels:mkl_concat_op",
"//tensorflow/core/kernels:mkl_dequantize_op",
"//tensorflow/core/kernels:mkl_conv_op",
Expand All @@ -1594,16 +1595,16 @@ cc_library(
"//tensorflow/core/kernels:mkl_identity_op",
"//tensorflow/core/kernels:mkl_input_conversion_op",
"//tensorflow/core/kernels:mkl_lrn_op",
"//tensorflow/core/kernels:mkl_requantize_ops",
"//tensorflow/core/kernels:mkl_pooling_ops",
"//tensorflow/core/kernels:mkl_qmatmul_op",
"//tensorflow/core/kernels:mkl_requantize_ops",
"//tensorflow/core/kernels:mkl_quantize_op",
"//tensorflow/core/kernels:mkl_relu_op",
"//tensorflow/core/kernels:mkl_reshape_op",
"//tensorflow/core/kernels:mkl_slice_op",
"//tensorflow/core/kernels:mkl_softmax_op",
"//tensorflow/core/kernels:mkl_transpose_op",
"//tensorflow/core/kernels:mkl_tfconv_op",
"//tensorflow/core/kernels:mkl_aggregate_ops",
]) + if_cuda([
"//tensorflow/core/grappler/optimizers:gpu_swapping_kernels",
"//tensorflow/core/grappler/optimizers:gpu_swapping_ops",
Expand Down Expand Up @@ -4414,6 +4415,7 @@ tf_cc_test_mkl(
"//tensorflow/core/kernels:mkl_input_conversion_op",
"//tensorflow/core/kernels:mkl_lrn_op",
"//tensorflow/core/kernels:mkl_pooling_ops",
"//tensorflow/core/kernels:mkl_qmatmul_op",
"//tensorflow/core/kernels:mkl_quantize_op",
"//tensorflow/core/kernels:mkl_relu_op",
"//tensorflow/core/kernels:mkl_reshape_op",
Expand Down
@@ -0,0 +1,84 @@
op {
graph_op_name: "QuantizedMatMulWithBias"
penpornk marked this conversation as resolved.
Show resolved Hide resolved
visibility: HIDDEN
in_arg {
name: "a"
description: <<END
A matrix to be multiplied. Must be a two-dimensional tensor of type `quint8`.
END
}
in_arg {
name: "b"
description: <<END
A matrix to be multiplied and must be a two-dimensional tensor of type `qint8`.
END
}
in_arg {
name: "bias"
description: <<END
A 1D bias tensor with size matching inner dimension of `b` (after being
transposed if `transposed_b` is non-zero).
END
}
in_arg {
name: "min_a"
description: <<END
The float value that the lowest quantized `a` value represents.
END
}
in_arg {
name: "max_a"
description: <<END
The float value that the highest quantized `a` value represents.
END
}
in_arg {
name: "min_b"
description: <<END
The float value that the lowest quantized `b` value represents.
END
}
in_arg {
name: "max_b"
description: <<END
The float value that the highest quantized `b` value represents.
END
}
out_arg {
name: "min_out"
description: <<END
The float value that the lowest quantized output value represents.
END
}
out_arg {
name: "max_out"
description: <<END
The float value that the highest quantized output value represents.
END
}
attr {
name: "transpose_a"
description: "If true, `a` is transposed before multiplication."
}
attr {
name: "transpose_b"
description: "If true, `b` is transposed before multiplication."
}
attr {
name: "input_quant_mode"
description: <<END
Input data quantization mode. Either MIN_FIRST(default) or SCALED.
END
}
summary: <<END
Performs a quantized matrix multiplication of `a` by the matrix `b` with bias
add.
END
description: <<END
The inputs must be two-dimensional matrices and 1D bias vector. And the inner
dimension of `a` (after being transposed if `transpose_a` is non-zero) must
match the outer dimension of `b` (after being transposed if `transposed_b` is
non-zero). Then do broadcast add operation with bias values on the matrix
mulplication result. The bias size must match inner dimension of `b`.
END
}
@@ -0,0 +1,85 @@
op {
graph_op_name: "QuantizedMatMulWithBiasAndRelu"
visibility: HIDDEN
in_arg {
name: "a"
description: <<END
A matrix to be multiplied. Must be a two-dimensional tensor of type `quint8`.
END
}
in_arg {
name: "b"
description: <<END
A matrix to be multiplied and must be a two-dimensional tensor of type `qint8`.
END
}
in_arg {
name: "bias"
description: <<END
A 1D bias tensor with size matching with inner dimension of `b` (after being
transposed if `transposed_b` is non-zero).
END
}
in_arg {
name: "min_a"
description: <<END
The float value that the lowest quantized `a` value represents.
END
}
in_arg {
name: "max_a"
description: <<END
The float value that the highest quantized `a` value represents.
END
}
in_arg {
name: "min_b"
description: <<END
The float value that the lowest quantized `b` value represents.
END
}
in_arg {
name: "max_b"
description: <<END
The float value that the highest quantized `b` value represents.
END
}
out_arg {
name: "min_out"
description: <<END
The float value that the lowest quantized output value represents.
END
}
out_arg {
name: "max_out"
description: <<END
The float value that the highest quantized output value represents.
END
}
attr {
name: "transpose_a"
description: "If true, `a` is transposed before multiplication."
}
attr {
name: "transpose_b"
description: "If true, `b` is transposed before multiplication."
}
attr {
name: "input_quant_mode"
description: <<END
Input data quantization mode. Either MIN_FIRST(default) or SCALED.
END
}
summary: <<END
Perform a quantized matrix multiplication of `a` by the matrix `b` with bias
add and relu fusion.
END
description: <<END
The inputs must be two-dimensional matrices and 1D bias vector. And the inner
dimension of `a` (after being transposed if `transpose_a` is non-zero) must
match the outer dimension of `b` (after being transposed if `transposed_b` is
non-zero). Then do broadcast add operation with bias values on the matrix
mulplication result. The bias size must match inner dimension of `b`. Then do
relu activation to get non-negative result.
END
}
@@ -0,0 +1,99 @@
op {
graph_op_name: "QuantizedMatMulWithBiasAndReluAndRequantize"
visibility: HIDDEN
in_arg {
name: "a"
description: <<END
A matrix to be multiplied. Must be a two-dimensional tensor of type `quint8`.
END
}
in_arg {
name: "b"
description: <<END
A matrix to be multiplied and must be a two-dimensional tensor of type `qint8`.
END
}
in_arg {
name: "bias"
description: <<END
A 1D bias tensor with size matching with inner dimension of `b` (after being
transposed if `transposed_b` is non-zero).
END
}
in_arg {
name: "min_a"
description: <<END
The float value that the lowest quantized `a` value represents.
END
}
in_arg {
name: "max_a"
description: <<END
The float value that the highest quantized `a` value represents.
END
}
in_arg {
name: "min_b"
description: <<END
The float value that the lowest quantized `b` value represents.
END
}
in_arg {
name: "max_b"
description: <<END
The float value that the highest quantized `b` value represents.
END
}
in_arg {
name: "min_freezed_output"
description: <<END
The float value that the lowest quantized output value after requantize.
END
}
in_arg {
name: "min_freezed_output"
description: <<END
The float value that the highest quantized output value after requantize.
END
}

out_arg {
name: "min_out"
description: <<END
The float value that the lowest quantized output value represents.
END
}
out_arg {
name: "max_out"
description: <<END
The float value that the highest quantized output value represents.
END
}
attr {
name: "transpose_a"
description: "If true, `a` is transposed before multiplication."
}
attr {
name: "transpose_b"
description: "If true, `b` is transposed before multiplication."
}
attr {
name: "input_quant_mode"
description: <<END
Input data quantization mode. Either MIN_FIRST(default) or SCALED.
END
}
summary: <<END
Perform a quantized matrix multiplication of `a` by the matrix `b` with bias
add and relu and requantize fusion.
END
description: <<END
The inputs must be two-dimensional matrices and 1D bias vector. And the inner
dimension of `a` (after being transposed if `transpose_a` is non-zero) must
match the outer dimension of `b` (after being transposed if `transposed_b` is
non-zero). Then do broadcast add operation with bias values on the matrix
mulplication result. The bias size must match inner dimension of `b`. Then do
relu activation to get non-negative result. Then do requantize operation to get
final uint8 result.
END
}
30 changes: 17 additions & 13 deletions tensorflow/core/api_def/excluded_ops.cc
Expand Up @@ -19,21 +19,25 @@ namespace tensorflow {

const std::unordered_set<std::string>* GetExcludedOps() {
static std::unordered_set<std::string>* excluded_ops =
new std::unordered_set<std::string>(
{"BigQueryReader", "GenerateBigQueryReaderPartitions",
"GcsConfigureBlockCache", "GcsConfigureCredentials",
new std::unordered_set<std::string>({
"BigQueryReader", "GenerateBigQueryReaderPartitions",
"GcsConfigureBlockCache", "GcsConfigureCredentials",
#ifdef INTEL_MKL
// QuantizedFusedOps for Intel CPU
"QuantizedConcatV2", "QuantizedConv2DAndRequantize",
"QuantizedConv2DWithBias", "QuantizedConv2DWithBiasAndRequantize",
"QuantizedConv2DAndRelu", "QuantizedConv2DAndReluAndRequantize",
"QuantizedConv2DWithBiasAndRelu",
"QuantizedConv2DWithBiasAndReluAndRequantize",
"QuantizedConv2DWithBiasSumAndRelu",
"QuantizedConv2DWithBiasSumAndReluAndRequantize",
"QuantizedConv2DWithBiasSignedSumAndReluAndRequantize"
// QuantizedFusedOps for Intel CPU
"QuantizedConcatV2", "QuantizedConv2DAndRequantize",
"QuantizedConv2DWithBias", "QuantizedConv2DWithBiasAndRequantize",
"QuantizedConv2DAndRelu", "QuantizedConv2DAndReluAndRequantize",
"QuantizedConv2DWithBiasAndRelu",
"QuantizedConv2DWithBiasAndReluAndRequantize",
"QuantizedConv2DWithBiasSumAndRelu",
"QuantizedConv2DWithBiasSumAndReluAndRequantize",
"QuantizedConv2DWithBiasSignedSumAndReluAndRequantize",
"QuantizedMatMulWithBias"
"QuantizedMatMulWithBiasAndRelu"
"QuantizedMatMulWithBiasAndReluAndRequantize",

#endif // INTEL_MKL
});
});
return excluded_ops;
}
} // namespace tensorflow
21 changes: 21 additions & 0 deletions tensorflow/core/kernels/BUILD
Expand Up @@ -7221,6 +7221,27 @@ tf_cc_test(
],
)

tf_mkl_kernel_library(
name = "mkl_qmatmul_op",
srcs = ["mkl_qmatmul_op.cc"],
hdrs = [
"mkl_quantized_conv_ops.h",
"no_op.h",
],
deps = [
":bounds_check",
":matmul_op",
":ops_util",
"//tensorflow/core:core_cpu",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core:math_ops_op_lib",
"//tensorflow/core:mkl_nn_ops_op_lib",
"//tensorflow/core:nn_ops_op_lib",
] + mkl_deps(),
)

tf_mkl_kernel_library(
name = "mkl_conv_op",
hdrs = [
Expand Down