Skip to content

Problems to register a custom op in TF Lite #50165

@Lucy20211

Description

@Lucy20211

@tensorflow/micro

System information

  • OS Platform and Distribution: Linux Ubuntu 20.10
  • TensorFlow version: 2.4.1
  • Python version : 3.8 (installed via pip)
  • STM32CubeIDE-Lnx: 1.3.0
  • STM32 Nucleo-64 development board with STM32F401RE MCU

Describe the problem

I'd like to forecast RMSE values for seven days from a household power consumption dataset, using seven input test values corresponding to certain power consumption values, with a model which uses a custom op (Conv1D) and two builtin ops (Reshape and Fully Connected). I'm using a STM32 Nucleo-64 development board. The code is the following:

      #include "main.h"
      #include <string.h>
     
      #include "tensorflow/lite/micro/kernels/micro_ops.h"
      #include "tensorflow/lite/micro/micro_error_reporter.h"
      #include "tensorflow/lite/micro/micro_interpreter.h"
      #include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
      #include "tensorflow/lite/version.h"
      #include "tensorflow/lite/schema/schema_generated.h"
      #include "power_consumption.h"
      #include "tensorflow/lite/micro/micro_allocator.h"
      #include "tensorflow/lite/micro/micro_op_resolver.h"
      #include "tensorflow/lite/core/api/flatbuffer_conversions.h"
      #include "flatbuffers/flatbuffers.h"  // from @flatbuffers
      #include "tensorflow/lite/c/builtin_op_data.h"
      #include "tensorflow/lite/c/common.h"
      #include "tensorflow/lite/core/api/error_reporter.h"
      #include "tensorflow/lite/kernels/internal/compatibility.h"
      #include "tensorflow/lite/kernels/internal/reference/conv_1d.h"
      #include "tensorflow/lite/schema/schema_generated.h"
      
      CRC_HandleTypeDef hcrc;
      TIM_HandleTypeDef htim11;
      UART_HandleTypeDef huart2;
      
      // TFLite globals
      namespace {
      tflite::ErrorReporter* error_reporter = nullptr;
      const tflite::Model* model = nullptr;
      tflite::MicroInterpreter* interpreter = nullptr;
      TfLiteTensor* model_input = nullptr;
      TfLiteTensor* model_output = nullptr;
      constexpr int kTensorArenaSize = 2 * 1024;
      __attribute__((aligned(16)))uint8_t tensor_arena[kTensorArenaSize];
      } // namespace


      void SystemClock_Config(void);
      static void MX_GPIO_Init(void);
      static void MX_USART2_UART_Init(void);
      static void MX_CRC_Init(void);
      static void MX_TIM11_Init(void);

      int main(void)
      {

        char buf[100];
        int buf_len = 0;
        TfLiteStatus tflite_status;
        uint32_t num_elements;
        uint32_t timestamp;
        float y_val[7];
        int i;

        HAL_Init();
        SystemClock_Config();
        MX_GPIO_Init();
        MX_USART2_UART_Init();
        MX_CRC_Init();
        MX_TIM11_Init();
        HAL_TIM_Base_Start(&htim11);

        static tflite::MicroErrorReporter micro_error_reporter;
        error_reporter = &micro_error_reporter;

        // Say something to test error reporter
        error_reporter->Report("STM32 TensorFlow Lite test");
    
       // Map the model into a usable data structure
       model = tflite::GetModel(power_consumption);
       if (model->version() != TFLITE_SCHEMA_VERSION)
       {
        error_reporter->Report("Model version does not match Schema");
        while(1);
       }

       // Pull in only needed operations (should match NN layers). 
       static tflite::MicroMutableOpResolver<3> micro_op_resolver;

       // Add custom neural network layer operation
       tflite_status = micro_op_resolver.AddCustom(
       "cd1", tflite::ops::custom::Register_CONV_1D());

       if (tflite_status != kTfLiteOk) {
  
        error_reporter->Report("Could not add Conv op");
        while(1);
       }

       tflite_status = micro_op_resolver.AddReshape();

       if (tflite_status != kTfLiteOk) {
         error_reporter->Report("Could not add RESHAPE op");
         while(1);
       }

      tflite_status = micro_op_resolver.AddFullyConnected();

      if (tflite_status != kTfLiteOk) {
      error_reporter->Report("Could not add FULLY CONNECTED op");
      while(1);
      }

      // Build an interpreter to run the model with.
      static tflite::MicroInterpreter static_interpreter(
      model, micro_op_resolver, tensor_arena, kTensorArenaSize, error_reporter);
      interpreter = &static_interpreter;

      // Allocate memory from the tensor_arena for the model's tensors.
      tflite_status = interpreter->AllocateTensors();
      if (tflite_status != kTfLiteOk) {
      error_reporter->Report("AllocateTensors() failed");
      while(1);
      }

     // Assign model input and output buffers (tensors) to pointers
     model_input = interpreter->input(0);
     model_output = interpreter->output(0);

     // Get number of elements in input tensor
     num_elements = model_input->bytes / sizeof(float);
     buf_len = sprintf(buf, "Number of input elements: %lu\r\n", num_elements);
     HAL_UART_Transmit(&huart2, (uint8_t *)buf, buf_len, 100);


      /* Infinite loop */
       while (1)
       {
        // Fill input buffer (use test value)
        for (uint32_t i = 0; i < num_elements; i++)
        {
          model_input->data.f[i] = 150.0f;
        }

        // Get current timestamp
        timestamp = htim11.Instance->CNT;

        // Run inference
        tflite_status = interpreter->Invoke();
        if (tflite_status != kTfLiteOk)
        {
          error_reporter->Report("Invoke failed");
        }

       // Read output RMSE (predicted y) of neural network
       for(i=0; i<7; i++) {
       y_val[i] = model_output->data.f[i];
       }

       // Print output of neural network along with inference time (microseconds)
       for(i=0; i<7; i++) {
       buf_len = sprintf(buf,
                    "Output: %f | Duration: %lu\r\n",
                    y_val[i],
                    htim11.Instance->CNT - timestamp);
       HAL_UART_Transmit(&huart2, (uint8_t *)buf, buf_len, 100);
       }
      // Wait before doing it again
      HAL_Delay(500);

   }
  }

I can compile it, but when I open PuTTY, I can read anything.
For the implementation of the custom op Conv_1D, I added a .cpp file and a .h file in tensorflow/lite/kernels.

The header file for the custom op is like this:

        #ifndef TENSORFLOW_LITE_KERNELS_CONV1D_H_
        #define TENSORFLOW_LITE_KERNELS_CONV1D_H_

        #include "tensorflow/lite/kernels/internal/types.h"
        #include "tensorflow/lite/kernels/kernel_util.h"

        namespace tflite {
        namespace ops {
        namespace custom {

        TfLiteRegistration* Register_CONV_1D();

         }  // namespace custom
         }  // namespace ops
         }  // namespace tflite

        #endif  // TENSORFLOW_LITE_KERNELS_CONV1D_H_

The .cpp file is like this (it's very simple and probably there are errors, for now I use certain weights because I don't know how to read the real ones from the trained model):

            #include "tensorflow/lite/kernels/conv_1d.h"

            #include <math.h>
            #include <stddef.h>
            #include <stdint.h>
            
            #include <vector>
            
            #include "tensorflow/lite/c/common.h"
            
            #include "tensorflow/lite/kernels/internal/common.h"
            #include "tensorflow/lite/kernels/internal/tensor.h"
            #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
            #include "tensorflow/lite/kernels/kernel_util.h"

            namespace tflite {
            namespace ops {
            namespace custom {
            namespace conv_1d {

            const int dim = 5;
            int dim_in;  
            int dim_out;  
            int dim_k = 3;    //kernel dimension
            float copy[dim];

            constexpr float kernel[3] = {1.2,2.0,4.2};
            constexpr int dilation = 2;   //dilation

            TfLiteStatus Conv1dPrepare(TfLiteContext* context, TfLiteNode* node) {

            TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
            TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);

            const TfLiteTensor* input = GetInput(context, node, 0);
            TfLiteTensor* output = GetOutput(context, node, 0);

             int num_dims = NumDimensions(input);

             TfLiteIntArray* output_size = TfLiteIntArrayCreate(num_dims);
              for (int i=0; i<num_dims; ++i) {
              output_size->data[i] = input->dims->data[i];
              }

              return context->ResizeTensor(context, output, output_size);
             }

             TfLiteStatus Conv1dEval(TfLiteContext* context, TfLiteNode* node) {

              const TfLiteTensor* input = GetInput(context, node,0);
              TfLiteTensor* output = GetOutput(context, node,0);

              float* input_data = input->data.f;
              float* output_data = output->data.f;

              if (output->dims->data[0] > 1) 
              dim_out = output->dims->data[0];

              else dim_out = output->dims->data[1];

              if (input->dims->data[0] > 1) 
              dim_in = input->dims->data[0];

              else dim_in = input->dims->data[1];

              float copy0[4+dim_in];

              for (int i=0; i<4; i++) {
              copy0[i] = 0;
              }

              for (int i=0; i<dim_in; i++) {
              copy0[i+4] = input_data[i];
              }

               for (int i=0; i<dim_out; i++) {
               for (int m=0; m<dim; m++) {
               copy[m] = copy0[m+i];
               } 
               for (int j=0; j<dim_k; j++) {
               output_data[i] = output_data[i] + copy[j*dilation]*kernel[j];
               }

               }
               return kTfLiteOk;
               }


           }  // namespace conv_1d

          TfLiteRegistration* Register_CONV_1D() {
          static TfLiteRegistration r = {nullptr, nullptr, conv_1d::Conv1dPrepare, conv_1d::Conv1dEval};
          return &r;
          }

          }  // namespace custom
          }  // namespace ops
          }  // namespace tflite

I think the problem is also not having register.cpp and register_ref.cpp files (I don't find them in tensorflow/lite/kernels), so I don't know if I can create these files myself or not (in this case I should be careful with the various header files).

In the BUILD file (I don't even have that) I'd write like this:

        cc_library(
        name = "builtin_op_kernels",
        srcs = BUILTIN_KERNEL_SRCS + [
           "conv_1d.cc",
         ], 
         hdrs = [
            "dequantize.h",
            "conv_1d.h",
         ],
         copts = tflite_copts() + tf_opts_nortti_if_android() + EXTRA_EIGEN_COPTS,
         visibility = ["//visibility:private"],
         deps = BUILTIN_KERNEL_DEPS + [
         "@ruy//ruy/profiler:instrumentation",
         "//tensorflow/lite/kernels/internal:cppmath",
         "//tensorflow/lite:string",
         "@farmhash_archive//:farmhash",
         ],
         )

I'm not sure if I'm missing some other steps to register the custom op.
I used this tutorial as a reference: https://www.digikey.com/en/maker/projects/tinyml-getting-started-with-tensorflow-lite-for-microcontrollers/c0cdd850f5004b098d263400aa294023.

Metadata

Metadata

Labels

TF 2.4for issues related to TF 2.4comp:liteTF Lite related issuestype:othersissues not falling in bug, perfromance, support, build and install or feature

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions