Updates based on discussion with Nat.

advaitjain · advaitjain · commit bbd388b7c327 · 2021-01-07T15:29:55.000-08:00
diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD
@@ -47,7 +47,9 @@ cc_library(
 
 cc_library(
     name = "fully_connected",
-    srcs = select({
+    srcs = [
+        "fully_connected_common.cc",
+    ] + select({
         "//conditions:default": [
             "fully_connected.cc",
         ],
diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc b/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc
@@ -13,24 +13,24 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
+#include "tensorflow/lite/micro/kernels/fully_connected.h"
 
 #include "CMSIS/NN/Include/arm_nnfunctions.h"
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/internal/common.h"
 #include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
 #include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
-#include "tensorflow/lite/micro/kernels/fully_connected.h"
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
 
 namespace tflite {
 namespace {
 
 struct OpData {
-  OpDataFullyConnectedReference reference_op_data;
+  OpDataFullyConnected reference_op_data;
 
   // Index to buffer for optimizations if applicable.
   int buffer_idx;
@@ -49,9 +49,9 @@ TfLiteStatus CalculateOpData(TfLiteContext* context,
                              OpData* data) {
   // Set buffer index to a reset value
   data->buffer_idx = -1;
-  return CalculateOpDataFullyConnectedReference(context, activation, data_type,
-                                                input, filter, bias, output,
-                                                &(data->reference_op_data));
+  return CalculateOpDataFullyConnected(context, activation, data_type, input,
+                                       filter, bias, output,
+                                       &(data->reference_op_data));
 }
 
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
@@ -177,8 +177,16 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
             tflite::micro::GetTensorData<int8_t>(output)),
         ARM_MATH_SUCCESS);
   } else {
-    return EvalQuantizedInt8FullyConnectedReference(
-        context, node, data.reference_op_data, input, filter, bias, output);
+    tflite::reference_integer_ops::FullyConnected(
+        FullyConnectedParamsQuantized(data.reference_op_data),
+        tflite::micro::GetTensorShape(input),
+        tflite::micro::GetTensorData<int8_t>(input),
+        tflite::micro::GetTensorShape(filter),
+        tflite::micro::GetTensorData<int8_t>(filter),
+        tflite::micro::GetTensorShape(bias),
+        tflite::micro::GetTensorData<int32_t>(bias),
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<int8_t>(output));
   }
   return kTfLiteOk;
 }
@@ -202,21 +210,41 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   // Checks in Prepare ensure input, output and filter types are all the same.
   switch (input->type) {
-    case kTfLiteFloat32:
-      return EvalFloatFullyConnectedReference(context, node, params->activation,
-                                              input, filter, bias, output);
-    case kTfLiteInt8:
+    case kTfLiteFloat32: {
+      tflite::reference_ops::FullyConnected(
+          FullyConnectedParamsFloat(params->activation),
+          tflite::micro::GetTensorShape(input),
+          tflite::micro::GetTensorData<float>(input),
+          tflite::micro::GetTensorShape(filter),
+          tflite::micro::GetTensorData<float>(filter),
+          tflite::micro::GetTensorShape(bias),
+          tflite::micro::GetTensorData<float>(bias),
+          tflite::micro::GetTensorShape(output),
+          tflite::micro::GetTensorData<float>(output));
+      break;
+    }
+    case kTfLiteInt8: {
       return EvalQuantizedInt8(context, node, data, input, filter, bias,
                                output);
-
-    case kTfLiteUInt8:
-      return EvalQuantizedFullyConnectedReference(
-          context, node, data.reference_op_data, input, filter, bias, output);
-
-    default:
+    }
+    case kTfLiteUInt8: {
+      tflite::reference_ops::FullyConnected(
+          FullyConnectedParamsQuantized(data.reference_op_data),
+          tflite::micro::GetTensorShape(input),
+          tflite::micro::GetTensorData<uint8_t>(input),
+          tflite::micro::GetTensorShape(filter),
+          tflite::micro::GetTensorData<uint8_t>(filter),
+          tflite::micro::GetTensorShape(bias),
+          tflite::micro::GetTensorData<int32_t>(bias),
+          tflite::micro::GetTensorShape(output),
+          tflite::micro::GetTensorData<uint8_t>(output));
+      break;
+    }
+    default: {
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                          TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
+    }
   }
   return kTfLiteOk;
 }
diff --git a/tensorflow/lite/micro/kernels/fully_connected.cc b/tensorflow/lite/micro/kernels/fully_connected.cc
@@ -13,26 +13,32 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
+#include "tensorflow/lite/micro/kernels/fully_connected.h"
 
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/internal/common.h"
 #include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
 #include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
-#include "tensorflow/lite/micro/kernels/fully_connected.h"
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
 
 namespace tflite {
 namespace {
 
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  return context->AllocatePersistentBuffer(context,
+                                           sizeof(OpDataFullyConnected));
+}
+
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->user_data != nullptr);
   TFLITE_DCHECK(node->builtin_data != nullptr);
 
-  auto* data = static_cast<OpDataFullyConnectedReference*>(node->user_data);
+  auto* data = static_cast<OpDataFullyConnected*>(node->user_data);
   const auto params =
       static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
 
@@ -51,9 +57,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_MSG(context, input->type == filter->type,
                      "Hybrid models are not supported on TFLite Micro.");
 
-  return CalculateOpDataFullyConnectedReference(context, params->activation,
-                                                input->type, input, filter,
-                                                bias, output, data);
+  return CalculateOpDataFullyConnected(context, params->activation, input->type,
+                                       input, filter, bias, output, data);
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
@@ -72,33 +77,64 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   TFLITE_DCHECK(node->user_data != nullptr);
   const auto& data =
-      *(static_cast<const OpDataFullyConnectedReference*>(node->user_data));
+      *(static_cast<const OpDataFullyConnected*>(node->user_data));
 
   // Checks in Prepare ensure input, output and filter types are all the same.
   switch (input->type) {
-    case kTfLiteFloat32:
-      return EvalFloatFullyConnectedReference(context, node, params->activation,
-                                              input, filter, bias, output);
-    case kTfLiteInt8:
-      return EvalQuantizedInt8FullyConnectedReference(
-          context, node, data, input, filter, bias, output);
-
-    case kTfLiteUInt8:
-      return EvalQuantizedFullyConnectedReference(context, node, data, input,
-                                                  filter, bias, output);
-
-    default:
+    case kTfLiteFloat32: {
+      tflite::reference_ops::FullyConnected(
+          FullyConnectedParamsFloat(params->activation),
+          tflite::micro::GetTensorShape(input),
+          tflite::micro::GetTensorData<float>(input),
+          tflite::micro::GetTensorShape(filter),
+          tflite::micro::GetTensorData<float>(filter),
+          tflite::micro::GetTensorShape(bias),
+          tflite::micro::GetTensorData<float>(bias),
+          tflite::micro::GetTensorShape(output),
+          tflite::micro::GetTensorData<float>(output));
+      break;
+    }
+
+    case kTfLiteInt8: {
+      tflite::reference_integer_ops::FullyConnected(
+          FullyConnectedParamsQuantized(data),
+          tflite::micro::GetTensorShape(input),
+          tflite::micro::GetTensorData<int8_t>(input),
+          tflite::micro::GetTensorShape(filter),
+          tflite::micro::GetTensorData<int8_t>(filter),
+          tflite::micro::GetTensorShape(bias),
+          tflite::micro::GetTensorData<int32_t>(bias),
+          tflite::micro::GetTensorShape(output),
+          tflite::micro::GetTensorData<int8_t>(output));
+      break;
+    }
+
+    case kTfLiteUInt8: {
+      tflite::reference_ops::FullyConnected(
+          FullyConnectedParamsQuantized(data),
+          tflite::micro::GetTensorShape(input),
+          tflite::micro::GetTensorData<uint8_t>(input),
+          tflite::micro::GetTensorShape(filter),
+          tflite::micro::GetTensorData<uint8_t>(filter),
+          tflite::micro::GetTensorShape(bias),
+          tflite::micro::GetTensorData<int32_t>(bias),
+          tflite::micro::GetTensorShape(output),
+          tflite::micro::GetTensorData<uint8_t>(output));
+      break;
+    }
+    default: {
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                          TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
+    }
   }
   return kTfLiteOk;
 }
 
 }  // namespace
 
 TfLiteRegistration Register_FULLY_CONNECTED() {
-  return {/*init=*/InitFullyConnectedReference,
+  return {/*init=*/Init,
           /*free=*/nullptr,
           /*prepare=*/Prepare,
           /*invoke=*/Eval,
diff --git a/tensorflow/lite/micro/kernels/fully_connected.h b/tensorflow/lite/micro/kernels/fully_connected.h
@@ -23,7 +23,7 @@ limitations under the License.
 
 namespace tflite {
 
-struct OpDataFullyConnectedReference {
+struct OpDataFullyConnected {
   // The scaling factor from input to output (aka the 'real multiplier') can
   // be represented as a fixed point multiplier plus a left shift.
   int32_t output_multiplier;
@@ -38,52 +38,27 @@ struct OpDataFullyConnectedReference {
   int32_t input_zero_point;
   int32_t filter_zero_point;
   int32_t output_zero_point;
-
-  // Returns a FullyConnectedParams struct with all the parameters needed for a
-  // quantized fully connected computation.
-  FullyConnectedParams ToQuantizedParams() const {
-    FullyConnectedParams op_params;
-    op_params.input_offset = -input_zero_point;
-    op_params.weights_offset = -filter_zero_point;
-    op_params.output_offset = output_zero_point;
-    op_params.output_multiplier = output_multiplier;
-    op_params.output_shift = output_shift;
-    op_params.quantized_activation_min = output_activation_min;
-    op_params.quantized_activation_max = output_activation_max;
-    return op_params;
-  }
 };
 
 extern const int kFullyConnectedInputTensor;
 extern const int kFullyConnectedWeightsTensor;
 extern const int kFullyConnectedBiasTensor;
 extern const int kFullyConnectedOutputTensor;
 
-TfLiteStatus CalculateOpDataFullyConnectedReference(
+// Returns a FullyConnectedParams struct with all the parameters needed for a
+// float computation.
+FullyConnectedParams FullyConnectedParamsFloat(
+    TfLiteFusedActivation activation);
+
+// Returns a FullyConnectedParams struct with all the parameters needed for a
+// quantized computation.
+FullyConnectedParams FullyConnectedParamsQuantized(
+    const OpDataFullyConnected& op_data);
+
+TfLiteStatus CalculateOpDataFullyConnected(
     TfLiteContext* context, TfLiteFusedActivation activation,
     TfLiteType data_type, const TfLiteTensor* input, const TfLiteTensor* filter,
-    const TfLiteTensor* bias, TfLiteTensor* output,
-    OpDataFullyConnectedReference* data);
-
-void* InitFullyConnectedReference(TfLiteContext* context, const char* buffer,
-                                  size_t length);
-
-TfLiteStatus EvalFloatFullyConnectedReference(
-    TfLiteContext* context, TfLiteNode* node, TfLiteFusedActivation activation,
-    const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
-    const TfLiteEvalTensor* bias, TfLiteEvalTensor* output);
-
-TfLiteStatus EvalQuantizedInt8FullyConnectedReference(
-    TfLiteContext* context, TfLiteNode* node,
-    const OpDataFullyConnectedReference& data, const TfLiteEvalTensor* input,
-    const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
-    TfLiteEvalTensor* output);
-
-TfLiteStatus EvalQuantizedFullyConnectedReference(
-    TfLiteContext* context, TfLiteNode* node,
-    const OpDataFullyConnectedReference& data, const TfLiteEvalTensor* input,
-    const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
-    TfLiteEvalTensor* output);
+    const TfLiteTensor* bias, TfLiteTensor* output, OpDataFullyConnected* data);
 
 // This is the most generic TfLiteRegistration. The actual supported types may
 // still be target dependent. The only requirement is that every implementation
diff --git a/tensorflow/lite/micro/kernels/fully_connected_common.cc b/tensorflow/lite/micro/kernels/fully_connected_common.cc
diff --git a/tensorflow/lite/micro/kernels/xtensa/fully_connected.cc b/tensorflow/lite/micro/kernels/xtensa/fully_connected.cc