tensorflow · copybara-service · Jan 26, 2023 · Jan 23, 2023
@@ -317,8 +317,20 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase<Tweight, Toutput> {
       // This is the case the inner-product and requantization are fused.
       // "min_freezed_output" and "max_freezed_output" are the requested range
       // for the output.
-      min_output_value = context->input(7).flat<float>()(0);
-      max_output_value = context->input(8).flat<float>()(0);
+      const Tensor& min_freezed_tensor = context->input(7);
+      const Tensor& max_freezed_tensor = context->input(8);
+      OP_REQUIRES(context,
+                  TensorShapeUtils::IsScalar(min_freezed_tensor.shape()),
+                  errors::InvalidArgument(
+                      "`min_freezed_output` must be rank 0 but is rank ",
+                      min_freezed_tensor.dims()));
+      OP_REQUIRES(context,
+                  TensorShapeUtils::IsScalar(max_freezed_tensor.shape()),
+                  errors::InvalidArgument(
+                      "`max_freezed_output` must be rank 0 but is rank ",
+                      max_freezed_tensor.dims()));
+      min_output_value = min_freezed_tensor.scalar<float>()();
+      max_output_value = max_freezed_tensor.scalar<float>()();
     } else {
       ComputeOutputRangeForInt32(context, &min_output_value, &max_output_value);
     }
@@ -344,10 +356,10 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase<Tweight, Toutput> {
   void ComputeOutputRangeForInt32(OpKernelContext* context,
                                   float* min_output_value,
                                   float* max_output_value) {
-    const float min_input = context->input(3).flat<float>()(0);
-    const float max_input = context->input(4).flat<float>()(0);
-    const float min_weight = context->input(5).flat<float>()(0);
-    const float max_weight = context->input(6).flat<float>()(0);
+    const float min_input = context->input(3).scalar<float>()();
+    const float max_input = context->input(4).scalar<float>()();
+    const float min_weight = context->input(5).scalar<float>()();
+    const float max_weight = context->input(6).scalar<float>()();
     MklQuantizationRangeForMultiplication<quint8, qint8, qint32>(
         min_input, max_input, min_weight, max_weight, min_output_value,
         max_output_value);
@@ -361,6 +373,25 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase<Tweight, Toutput> {
     params.dtypes.append(typeid(Tbias).name());
     params.dtypes.append(typeid(Toutput).name());
 
+    // min-max values for input and weight should be scalar.
+    const Tensor& min_input_tensor = context->input(3);
+    const Tensor& max_input_tensor = context->input(4);
+    const Tensor& min_weight_tensor = context->input(5);
+    const Tensor& max_weight_tensor = context->input(6);
+
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_input_tensor.shape()),
+                errors::InvalidArgument("`min_a` must be rank 0 but is rank ",
+                                        min_input_tensor.dims()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_input_tensor.shape()),
+                errors::InvalidArgument("`max_a` must be rank 0 but is rank ",
+                                        max_input_tensor.dims()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_weight_tensor.shape()),
+                errors::InvalidArgument("`min_b` must be rank 0 but is rank ",
+                                        min_weight_tensor.dims()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_weight_tensor.shape()),
+                errors::InvalidArgument("`max_b` must be rank 0 but is rank ",
+                                        max_weight_tensor.dims()));
+
     // When the output type is quint8, the output data is requantized into
     // quint8. A post_op "output_scale" is added to do the conversion.
     if (std::is_same<Toutput, quint8>::value ||
@@ -371,8 +402,21 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase<Tweight, Toutput> {
       ComputeOutputRangeForInt32(context, &min_output_value, &max_output_value);
       float scale_int32 =
           std::max(std::abs(min_output_value), std::abs(max_output_value));
-      const float min_freezed_output = context->input(7).flat<float>()(0);
-      const float max_freezed_output = context->input(8).flat<float>()(0);
+      const Tensor& min_freezed_tensor = context->input(7);
+      const Tensor& max_freezed_tensor = context->input(8);
+      // min-max values of freezed output range should be scalar.
+      OP_REQUIRES(context,
+                  TensorShapeUtils::IsScalar(min_freezed_tensor.shape()),
+                  errors::InvalidArgument(
+                      "`min_freezed_output` must be rank 0 but is rank ",
+                      min_freezed_tensor.dims()));
+      OP_REQUIRES(context,
+                  TensorShapeUtils::IsScalar(max_freezed_tensor.shape()),
+                  errors::InvalidArgument(
+                      "`max_freezed_output` must be rank 0 but is rank ",
+                      max_freezed_tensor.dims()));
+      const float min_freezed_output = min_freezed_tensor.scalar<float>()();
+      const float max_freezed_output = max_freezed_tensor.scalar<float>()();
       float scale_eightbit =
           std::max(std::abs(min_freezed_output), std::abs(max_freezed_output));
       float scale = 1.0;

@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#if defined(INTEL_MKL) && defined(ENABLE_MKL)
+#if defined(INTEL_MKL)
 #define EIGEN_USE_THREADS
 
 #include <functional>
@@ -64,10 +64,10 @@ TEST_F(QuantizedMatMulTest, Small_withBias) {
   AddInputFromArray<qint8>(TensorShape({3, 4}),
                            {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
   AddInputFromArray<qint32>(TensorShape({4}), {1, 2, 3, 4});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-  AddInputFromArray<float>(TensorShape({1}), {-127.0f});
-  AddInputFromArray<float>(TensorShape({1}), {127.0f});
+  AddInputFromArray<float>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {255.0f});
+  AddInputFromArray<float>(TensorShape({}), {-127.0f});
+  AddInputFromArray<float>(TensorShape({}), {127.0f});
 
   TF_ASSERT_OK(RunOpKernel());
   // Here are the results we expect, from hand calculations:
@@ -116,10 +116,10 @@ TEST_F(QuantizedMatMulTest, Small_withNegBias) {
   AddInputFromArray<qint8>(TensorShape({3, 4}),
                            {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
   AddInputFromArray<qint32>(TensorShape({4}), {100, -200, 300, -400});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-  AddInputFromArray<float>(TensorShape({1}), {-127.0f});
-  AddInputFromArray<float>(TensorShape({1}), {127.0f});
+  AddInputFromArray<float>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {255.0f});
+  AddInputFromArray<float>(TensorShape({}), {-127.0f});
+  AddInputFromArray<float>(TensorShape({}), {127.0f});
 
   TF_ASSERT_OK(RunOpKernel());
   // Here are the results we expect, from hand calculations:
@@ -178,10 +178,10 @@ TEST_F(QuantizedMatMulTest, Small_WithNegInp) {
   AddInputFromArray<qint8>(TensorShape({3, 2}), {1, 4, 2, 5, 3, 6});
   // Bias
   AddInputFromArray<float>(TensorShape({2}), {10.0f, 20.0f});
-  AddInputFromArray<float>(TensorShape({1}), {-12.0f});
-  AddInputFromArray<float>(TensorShape({1}), {243.0f});
-  AddInputFromArray<float>(TensorShape({1}), {-127.0f});
-  AddInputFromArray<float>(TensorShape({1}), {127.0f});
+  AddInputFromArray<float>(TensorShape({}), {-12.0f});
+  AddInputFromArray<float>(TensorShape({}), {243.0f});
+  AddInputFromArray<float>(TensorShape({}), {-127.0f});
+  AddInputFromArray<float>(TensorShape({}), {127.0f});
 
   TF_ASSERT_OK(RunOpKernel());
   // First calculate C = A * B,
@@ -240,12 +240,12 @@ TEST_F(QuantizedMatMulTest, Small_withBiasAndReq) {
   AddInputFromArray<qint8>(TensorShape({3, 4}),
                            {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
   AddInputFromArray<qint32>(TensorShape({4}), {10, -20, 30, -40});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-  AddInputFromArray<float>(TensorShape({1}), {-127.0f});
-  AddInputFromArray<float>(TensorShape({1}), {127.0f});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
+  AddInputFromArray<float>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {255.0f});
+  AddInputFromArray<float>(TensorShape({}), {-127.0f});
+  AddInputFromArray<float>(TensorShape({}), {127.0f});
+  AddInputFromArray<float>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {255.0f});
 
   TF_ASSERT_OK(RunOpKernel());
   // Here are the results we expect, from hand calculations:
@@ -308,12 +308,12 @@ TEST_F(QuantizedMatMulTest, Small_withBiasAndDeq) {
   AddInputFromArray<qint8>(TensorShape({3, 4}),
                            {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
   AddInputFromArray<qint32>(TensorShape({4}), {10, -20, 30, -40});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-  AddInputFromArray<float>(TensorShape({1}), {-127.0f});
-  AddInputFromArray<float>(TensorShape({1}), {127.0f});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
+  AddInputFromArray<float>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {255.0f});
+  AddInputFromArray<float>(TensorShape({}), {-127.0f});
+  AddInputFromArray<float>(TensorShape({}), {127.0f});
+  AddInputFromArray<float>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {255.0f});
 
   TF_ASSERT_OK(RunOpKernel());
   // Here are the results we expect, from hand calculations:
@@ -375,10 +375,10 @@ TEST_F(QuantizedMatMulTest, Small_withBiasAndRelu) {
                            {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
   AddInputFromArray<float>(TensorShape({4}),
                            {100.0f, -200.0f, 300.0f, -400.0f});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-  AddInputFromArray<float>(TensorShape({1}), {-127.0f});
-  AddInputFromArray<float>(TensorShape({1}), {127.0f});
+  AddInputFromArray<float>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {255.0f});
+  AddInputFromArray<float>(TensorShape({}), {-127.0f});
+  AddInputFromArray<float>(TensorShape({}), {127.0f});
 
   TF_ASSERT_OK(RunOpKernel());
   // Here are the results we expect, from hand calculations:
@@ -431,12 +431,12 @@ TEST_F(QuantizedMatMulTest, Small_withBiasAndReluAndReq) {
   AddInputFromArray<qint8>(TensorShape({3, 4}),
                            {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
   AddInputFromArray<qint32>(TensorShape({4}), {10, -20, 30, -40});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-  AddInputFromArray<float>(TensorShape({1}), {-127.0f});
-  AddInputFromArray<float>(TensorShape({1}), {127.0f});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
+  AddInputFromArray<float>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {255.0f});
+  AddInputFromArray<float>(TensorShape({}), {-127.0f});
+  AddInputFromArray<float>(TensorShape({}), {127.0f});
+  AddInputFromArray<float>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {255.0f});
 
   TF_ASSERT_OK(RunOpKernel());
   // Here are the results we expect, from hand calculations:
@@ -502,10 +502,10 @@ TEST_F(QuantizedMatMulTest, Small_withWeightCached) {
   AddInputFromArray<qint8>(TensorShape({3, 4}),
                            {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
   AddInputFromArray<qint32>(TensorShape({4}), {1, 2, 3, 4});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-  AddInputFromArray<float>(TensorShape({1}), {-127.0f});
-  AddInputFromArray<float>(TensorShape({1}), {127.0f});
+  AddInputFromArray<float>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {255.0f});
+  AddInputFromArray<float>(TensorShape({}), {-127.0f});
+  AddInputFromArray<float>(TensorShape({}), {127.0f});
 
   int64 start_time = Env::Default()->NowMicros();
   TF_ASSERT_OK(RunOpKernel());
@@ -543,4 +543,4 @@ TEST_F(QuantizedMatMulTest, Small_withWeightCached) {
 
 }  // namespace tensorflow
 
-#endif  // INTEL_MKL && ENABLE_MKL
+#endif  // INTEL_MKL