tensorflow · andrehentz · Apr 12, 2018 · Feb 19, 2018 · Feb 26, 2018 · Feb 26, 2018
diff --git a/tensorflow/contrib/lite/kernels/div.cc b/tensorflow/contrib/lite/kernels/div.cc
@@ -37,59 +37,130 @@ constexpr int kInputTensor1 = 0;
 constexpr int kInputTensor2 = 1;
 constexpr int kOutputTensor = 0;
 
+struct OpData {
+  bool requires_broadcast;
+};
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  auto* data = new OpData;
+  data->requires_broadcast = false;
+  return data;
+}
+
+void Free(TfLiteContext* context, void* buffer) {
+  delete reinterpret_cast<OpData*>(buffer);
+}
+
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  OpData* data = reinterpret_cast<OpData*>(node->user_data);
+
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
   TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
   TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
 
-  TF_LITE_ENSURE_EQ(context, NumDimensions(input1), NumDimensions(input2));
-  for (int i = 0; i < NumDimensions(input1); ++i) {
-    TF_LITE_ENSURE_EQ(context, SizeOfDimension(input1, i),
-                      SizeOfDimension(input2, i));
-  }
+  TF_LITE_ENSURE_EQ(context, input1->type, input2->type);
+  output->type = input2->type;
+
+  data->requires_broadcast = !HaveSameShapes(input1, input2);
 
-  TF_LITE_ENSURE_EQ(context, input1->type, output->type);
-  TF_LITE_ENSURE_EQ(context, input2->type, output->type);
+  TfLiteIntArray* output_size = nullptr;
+  if (data->requires_broadcast) {
+    TF_LITE_ENSURE_OK(context, CalculateShapeForBroadcast(
+                                   context, input1, input2, &output_size));
+  } else {
+    output_size = TfLiteIntArrayCopy(input1->dims);
+  }
 
-  TfLiteIntArray* output_size = TfLiteIntArrayCopy(input1->dims);
   return context->ResizeTensor(context, output, output_size);
 }
 
 template <KernelType kernel_type>
-void EvalDivFloat(TfLiteContext* context, TfLiteNode* node,
-                  TfLiteDivParams* params, TfLiteTensor* input1,
-                  TfLiteTensor* input2, TfLiteTensor* output) {
+void EvalFloat(TfLiteContext* context, TfLiteNode* node,
+               TfLiteDivParams* params, const OpData* data,
+               TfLiteTensor* input1, TfLiteTensor* input2,
+               TfLiteTensor* output) {
   float output_activation_min, output_activation_max;
   CalculateActivationRangeFloat(params->activation, &output_activation_min,
                                 &output_activation_max);
-#define TF_LITE_DIV(type)                                        \
-  type::Div(GetTensorData<float>(input1), GetTensorDims(input1), \
-            GetTensorData<float>(input2), GetTensorDims(input2), \
-            output_activation_min, output_activation_max,        \
-            GetTensorData<float>(output), GetTensorDims(output))
+#define TF_LITE_DIV(type, opname)                                   \
+  type::opname(GetTensorData<float>(input1), GetTensorDims(input1), \
+               GetTensorData<float>(input2), GetTensorDims(input2), \
+               output_activation_min, output_activation_max,        \
+               GetTensorData<float>(output), GetTensorDims(output))
+  if (kernel_type == kReference) {
+    if (data->requires_broadcast) {
+      TF_LITE_DIV(reference_ops, BroadcastDiv);
+    } else {
+      TF_LITE_DIV(reference_ops, Div);
+    }
+  } else {
+    if (data->requires_broadcast) {
+      TF_LITE_DIV(optimized_ops, BroadcastDiv);
+    } else {
+      TF_LITE_DIV(optimized_ops, Div);
+    }
+  }
+#undef TF_LITE_DIV
+}
+
+template <KernelType kernel_type>
+void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
+                   TfLiteDivParams* params, const OpData* data,
+                   TfLiteTensor* input1, TfLiteTensor* input2,
+                   TfLiteTensor* output) {
+  auto input1_offset = -input1->params.zero_point;
+  auto input2_offset = -input2->params.zero_point;
+  auto output_offset = output->params.zero_point;
+
+  int32_t output_multiplier;
+  int output_shift;
+
+  double real_multiplier =
+      input1->params.scale * input2->params.scale / output->params.scale;
+  QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier,
+                                   &output_shift);
+
+  int32 output_activation_min, output_activation_max;
+  CalculateActivationRangeUint8(params->activation, output,
+                                &output_activation_min, &output_activation_max);
+
+#define TF_LITE_DIV(type, opname)                                      \
+  type::opname(GetTensorData<uint8_t>(input1), GetTensorDims(input1),  \
+               input1_offset, GetTensorData<uint8_t>(input2),          \
+               GetTensorDims(input2), input2_offset, output_offset,    \
+               output_multiplier, output_shift, output_activation_min, \
+               output_activation_max, GetTensorData<uint8_t>(output),  \
+               GetTensorDims(output));
+  // The quantized version of Div doesn't support activations, so we
+  // always use BroadcastDiv.
   if (kernel_type == kReference) {
-    TF_LITE_DIV(reference_ops);
+    TF_LITE_DIV(reference_ops, BroadcastDiv);
   } else {
-    TF_LITE_DIV(optimized_ops);
+    TF_LITE_DIV(optimized_ops, BroadcastDiv);
   }
 #undef TF_LITE_DIV
 }
 
 template <KernelType kernel_type>
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   auto* params = reinterpret_cast<TfLiteDivParams*>(node->builtin_data);
+  OpData* data = reinterpret_cast<OpData*>(node->user_data);
 
   TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
   TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
 
   if (output->type == kTfLiteFloat32) {
-    EvalDivFloat<kernel_type>(context, node, params, input1, input2, output);
+    EvalFloat<kernel_type>(context, node, params, data, input1, input2, output);
+  } else if (output->type == kTfLiteUInt8) {
+    EvalQuantized<kernel_type>(context, node, params, data, input1, input2,
+                               output);
   } else {
-    context->ReportError(context, "Inputs and outputs not all float types.");
+    context->ReportError(context,
+                         "Div only supports FLOAT32 and quantized UINT8 now.");
     return kTfLiteError;
   }
 
@@ -99,19 +170,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace div
 
 TfLiteRegistration* Register_DIV_REF() {
-  static TfLiteRegistration r = {nullptr, nullptr, div::Prepare,
+  static TfLiteRegistration r = {div::Init, div::Free, div::Prepare,
                                  div::Eval<div::kReference>};
   return &r;
 }
 
 TfLiteRegistration* Register_DIV_GENERIC_OPT() {
-  static TfLiteRegistration r = {nullptr, nullptr, div::Prepare,
+  static TfLiteRegistration r = {div::Init, div::Free, div::Prepare,
                                  div::Eval<div::kGenericOptimized>};
   return &r;
 }
 
 TfLiteRegistration* Register_DIV_NEON_OPT() {
-  static TfLiteRegistration r = {nullptr, nullptr, div::Prepare,
+  static TfLiteRegistration r = {div::Init, div::Free, div::Prepare,
                                  div::Eval<div::kNeonOptimized>};
   return &r;
 }

diff --git a/tensorflow/contrib/lite/kernels/div_test.cc b/tensorflow/contrib/lite/kernels/div_test.cc
@@ -0,0 +1,174 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/kernels/test_util.h"
+#include "tensorflow/contrib/lite/model.h"
+
+namespace tflite {
+namespace {
+
+using ::testing::ElementsAreArray;
+
+class BaseDivOpModel : public SingleOpModel {
+ public:
+  BaseDivOpModel(const TensorData& input1, const TensorData& input2,
+                 const TensorData& output,
+                 ActivationFunctionType activation_type) {
+    input1_ = AddInput(input1);
+    input2_ = AddInput(input2);
+    output_ = AddOutput(output);
+    SetBuiltinOp(BuiltinOperator_DIV, BuiltinOptions_DivOptions,
+                 CreateDivOptions(builder_, activation_type).Union());
+    BuildInterpreter({GetShape(input1_), GetShape(input2_)});
+  }
+
+  int input1() { return input1_; }
+  int input2() { return input2_; }
+
+ protected:
+  int input1_;
+  int input2_;
+  int output_;
+};
+
+class FloatDivOpModel : public BaseDivOpModel {
+ public:
+  using BaseDivOpModel::BaseDivOpModel;
+
+  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+};
+
+// For quantized Div, the error shouldn't exceed (2*step + step^2).
+// The param min=-1.0 & max=1.0 is used in the following tests.
+// The tolerance value is ~0.0157.
+const float kQuantizedStep = 2.0 / 255.0;
+const float kQuantizedTolerance =
+    2.0 * kQuantizedStep + kQuantizedStep * kQuantizedStep;
+
+class QuantizedDivOpModel : public BaseDivOpModel {
+ public:
+  using BaseDivOpModel::BaseDivOpModel;
+
+  std::vector<float> GetDequantizedOutput() {
+    return Dequantize<uint8_t>(ExtractVector<uint8_t>(output_),
+                               GetScale(output_), GetZeroPoint(output_));
+  }
+};
+
+TEST(FloatDivOpTest, NoActivation) {
+  FloatDivOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}},
+                    {TensorType_FLOAT32, {1, 2, 2, 1}},
+                    {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE);
+  m.PopulateTensor<float>(m.input1(), {-0.2, 0.2, -1.2, 0.8});
+  m.PopulateTensor<float>(m.input2(), {0.5, 0.2, -1.5, 0.5});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(),
+              ElementsAreArray(ArrayFloatNear({-0.4, 1.0, 0.8, 1.6})));
+}
+
+TEST(FloatDivOpTest, ActivationRELU_N1_TO_1) {
+  FloatDivOpModel m(
+      {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
+      {TensorType_FLOAT32, {}}, ActivationFunctionType_RELU_N1_TO_1);
+  m.PopulateTensor<float>(m.input1(), {-0.2, 0.2, -1.2, 0.8});
+  m.PopulateTensor<float>(m.input2(), {0.1, 0.2, -1.5, 0.5});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(),
+              ElementsAreArray(ArrayFloatNear({-1.0, 1.0, 0.8, 1.0})));
+}
+
+TEST(FloatDivOpTest, VariousInputShapes) {
+  std::vector<std::initializer_list<int>> test_shapes = {
+      {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
+  for (int i = 0; i < test_shapes.size(); ++i) {
+    FloatDivOpModel m({TensorType_FLOAT32, test_shapes[i]},
+                      {TensorType_FLOAT32, test_shapes[i]},
+                      {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE);
+    m.PopulateTensor<float>(m.input1(), {-2.0, 0.2, 0.3, 0.8, 1.1, -2.0});
+    m.PopulateTensor<float>(m.input2(), {0.1, 0.2, 0.6, 0.5, -1.1, -0.1});
+    m.Invoke();
+    EXPECT_THAT(
+        m.GetOutput(),
+        ElementsAreArray(ArrayFloatNear({-20.0, 1.0, 0.5, 1.6, -1.0, 20.0})))
+        << "With shape number " << i;
+  }
+}
+
+TEST(FloatDivOpTest, WithBroadcast) {
+  std::vector<std::initializer_list<int>> test_shapes = {
+      {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
+  for (int i = 0; i < test_shapes.size(); ++i) {
+    FloatDivOpModel m({TensorType_FLOAT32, test_shapes[i]},
+                      {TensorType_FLOAT32, {}},  // always a scalar
+                      {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE);
+    m.PopulateTensor<float>(m.input1(), {-0.2, 0.2, 0.07, 0.08, 0.11, -0.123});
+    m.PopulateTensor<float>(m.input2(), {0.1});
+    m.Invoke();
+    EXPECT_THAT(
+        m.GetOutput(),
+        ElementsAreArray(ArrayFloatNear({-2.0, 2.0, 0.7, 0.8, 1.1, -1.23})))
+        << "With shape number " << i;
+  }
+}
+
+TEST(QuantizedDivOpTest, NoActivation) {
+  QuantizedDivOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
+                        {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
+                        {TensorType_UINT8, {}, -1.0, 1.0},
+                        ActivationFunctionType_NONE);
+  m.QuantizeAndPopulate<uint8_t>(m.input1(), {-0.6, 0.2, 0.9, -0.7});
+  m.QuantizeAndPopulate<uint8_t>(m.input2(), {0.8, 0.4, 0.9, -0.8});
+  m.Invoke();
+  EXPECT_THAT(m.GetDequantizedOutput(),
+              ElementsAreArray(ArrayFloatNear({-0.75, 0.5, 1.0, 0.875},
+                                              kQuantizedTolerance)));
+}
+
+// for quantized Div, the error shouldn't exceed 2*step
+float GetTolerance(int min, int max) {
+  float kQuantizedStep = (max - min) / 255.0;
+  float kQuantizedTolerance = 2.0 * kQuantizedStep;
+  return kQuantizedTolerance;
+}
+
+TEST(QuantizedDivOpTest, WithBroadcast) {
+  float kQuantizedTolerance = GetTolerance(-3.0, 3.0);
+  std::vector<std::initializer_list<int>> test_shapes = {
+      {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
+  for (int i = 0; i < test_shapes.size(); ++i) {
+    QuantizedDivOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0},
+                          {TensorType_UINT8, {}, -3.0, 3.0},  // always a scalar
+                          {TensorType_UINT8, {}, -3.0, 3.0},
+                          ActivationFunctionType_NONE);
+    m.QuantizeAndPopulate<uint8_t>(m.input1(), {-0.2, 0.2, 0.07, 0.08, 0.11, -0.123});
+    m.QuantizeAndPopulate<uint8_t>(m.input2(), {0.1});
+    m.Invoke();
+    EXPECT_THAT(m.GetDequantizedOutput(),
+                ElementsAreArray(ArrayFloatNear(
+                    {-2.0, 2.0, 0.7, 0.8, 1.1, -1.23}, kQuantizedTolerance)))
+        << "With shape number " << i;
+  }
+}
+
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}