Problems to register a custom op in TF Lite

@tensorflow/micro

  **System information**

 - OS Platform and Distribution: Linux Ubuntu 20.10
 - TensorFlow version: 2.4.1
 - Python version : 3.8 (installed via pip)
 - STM32CubeIDE-Lnx: 1.3.0
 - STM32 Nucleo-64 development board with STM32F401RE MCU

**Describe the problem**

I'd like to forecast RMSE values for seven days from a household power consumption dataset, using seven input test values corresponding to certain power consumption values, with a model which uses a custom op (Conv1D) and two builtin ops (Reshape and Fully Connected). I'm using a STM32 Nucleo-64 development board. The code is the following:

          #include "main.h"
          #include <string.h>
         
          #include "tensorflow/lite/micro/kernels/micro_ops.h"
          #include "tensorflow/lite/micro/micro_error_reporter.h"
          #include "tensorflow/lite/micro/micro_interpreter.h"
          #include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
          #include "tensorflow/lite/version.h"
          #include "tensorflow/lite/schema/schema_generated.h"
          #include "power_consumption.h"
          #include "tensorflow/lite/micro/micro_allocator.h"
          #include "tensorflow/lite/micro/micro_op_resolver.h"
          #include "tensorflow/lite/core/api/flatbuffer_conversions.h"
          #include "flatbuffers/flatbuffers.h"  // from @flatbuffers
          #include "tensorflow/lite/c/builtin_op_data.h"
          #include "tensorflow/lite/c/common.h"
          #include "tensorflow/lite/core/api/error_reporter.h"
          #include "tensorflow/lite/kernels/internal/compatibility.h"
          #include "tensorflow/lite/kernels/internal/reference/conv_1d.h"
          #include "tensorflow/lite/schema/schema_generated.h"
          
          CRC_HandleTypeDef hcrc;
          TIM_HandleTypeDef htim11;
          UART_HandleTypeDef huart2;
          
          // TFLite globals
          namespace {
          tflite::ErrorReporter* error_reporter = nullptr;
          const tflite::Model* model = nullptr;
          tflite::MicroInterpreter* interpreter = nullptr;
          TfLiteTensor* model_input = nullptr;
          TfLiteTensor* model_output = nullptr;
          constexpr int kTensorArenaSize = 2 * 1024;
          __attribute__((aligned(16)))uint8_t tensor_arena[kTensorArenaSize];
          } // namespace
    
    
          void SystemClock_Config(void);
          static void MX_GPIO_Init(void);
          static void MX_USART2_UART_Init(void);
          static void MX_CRC_Init(void);
          static void MX_TIM11_Init(void);

          int main(void)
          {

            char buf[100];
            int buf_len = 0;
            TfLiteStatus tflite_status;
            uint32_t num_elements;
            uint32_t timestamp;
            float y_val[7];
            int i;

            HAL_Init();
            SystemClock_Config();
            MX_GPIO_Init();
            MX_USART2_UART_Init();
            MX_CRC_Init();
            MX_TIM11_Init();
            HAL_TIM_Base_Start(&htim11);

            static tflite::MicroErrorReporter micro_error_reporter;
            error_reporter = &micro_error_reporter;

            // Say something to test error reporter
            error_reporter->Report("STM32 TensorFlow Lite test");
        
           // Map the model into a usable data structure
           model = tflite::GetModel(power_consumption);
           if (model->version() != TFLITE_SCHEMA_VERSION)
           {
            error_reporter->Report("Model version does not match Schema");
            while(1);
           }

           // Pull in only needed operations (should match NN layers). 
           static tflite::MicroMutableOpResolver<3> micro_op_resolver;

           // Add custom neural network layer operation
           tflite_status = micro_op_resolver.AddCustom(
           "cd1", tflite::ops::custom::Register_CONV_1D());

           if (tflite_status != kTfLiteOk) {
      
            error_reporter->Report("Could not add Conv op");
            while(1);
           }

           tflite_status = micro_op_resolver.AddReshape();
 
           if (tflite_status != kTfLiteOk) {
             error_reporter->Report("Could not add RESHAPE op");
             while(1);
           }

          tflite_status = micro_op_resolver.AddFullyConnected();

          if (tflite_status != kTfLiteOk) {
          error_reporter->Report("Could not add FULLY CONNECTED op");
          while(1);
          }

          // Build an interpreter to run the model with.
          static tflite::MicroInterpreter static_interpreter(
          model, micro_op_resolver, tensor_arena, kTensorArenaSize, error_reporter);
          interpreter = &static_interpreter;

          // Allocate memory from the tensor_arena for the model's tensors.
          tflite_status = interpreter->AllocateTensors();
          if (tflite_status != kTfLiteOk) {
          error_reporter->Report("AllocateTensors() failed");
          while(1);
          }

         // Assign model input and output buffers (tensors) to pointers
         model_input = interpreter->input(0);
         model_output = interpreter->output(0);

         // Get number of elements in input tensor
         num_elements = model_input->bytes / sizeof(float);
         buf_len = sprintf(buf, "Number of input elements: %lu\r\n", num_elements);
         HAL_UART_Transmit(&huart2, (uint8_t *)buf, buf_len, 100);


          /* Infinite loop */
           while (1)
           {
            // Fill input buffer (use test value)
            for (uint32_t i = 0; i < num_elements; i++)
            {
              model_input->data.f[i] = 150.0f;
            }

            // Get current timestamp
            timestamp = htim11.Instance->CNT;

            // Run inference
            tflite_status = interpreter->Invoke();
            if (tflite_status != kTfLiteOk)
            {
              error_reporter->Report("Invoke failed");
            }

           // Read output RMSE (predicted y) of neural network
           for(i=0; i<7; i++) {
           y_val[i] = model_output->data.f[i];
           }

           // Print output of neural network along with inference time (microseconds)
           for(i=0; i<7; i++) {
           buf_len = sprintf(buf,
                        "Output: %f | Duration: %lu\r\n",
                        y_val[i],
                        htim11.Instance->CNT - timestamp);
           HAL_UART_Transmit(&huart2, (uint8_t *)buf, buf_len, 100);
           }
          // Wait before doing it again
          HAL_Delay(500);

       }
      }


I can compile it, but when I open PuTTY, I can read anything.
For the implementation of the custom op Conv_1D, I added a .cpp file and a .h file in tensorflow/lite/kernels.

The header file for the custom op is like this:

            #ifndef TENSORFLOW_LITE_KERNELS_CONV1D_H_
            #define TENSORFLOW_LITE_KERNELS_CONV1D_H_

            #include "tensorflow/lite/kernels/internal/types.h"
            #include "tensorflow/lite/kernels/kernel_util.h"

            namespace tflite {
            namespace ops {
            namespace custom {

            TfLiteRegistration* Register_CONV_1D();

             }  // namespace custom
             }  // namespace ops
             }  // namespace tflite

            #endif  // TENSORFLOW_LITE_KERNELS_CONV1D_H_

The .cpp file is like this (it's very simple and probably there are errors, for now I use certain weights because I don't know how to read the real ones from the trained model):

                #include "tensorflow/lite/kernels/conv_1d.h"

                #include <math.h>
                #include <stddef.h>
                #include <stdint.h>
                
                #include <vector>
                
                #include "tensorflow/lite/c/common.h"
                
                #include "tensorflow/lite/kernels/internal/common.h"
                #include "tensorflow/lite/kernels/internal/tensor.h"
                #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
                #include "tensorflow/lite/kernels/kernel_util.h"

                namespace tflite {
                namespace ops {
                namespace custom {
                namespace conv_1d {

                const int dim = 5;
                int dim_in;  
                int dim_out;  
                int dim_k = 3;    //kernel dimension
                float copy[dim];

                constexpr float kernel[3] = {1.2,2.0,4.2};
                constexpr int dilation = 2;   //dilation

                TfLiteStatus Conv1dPrepare(TfLiteContext* context, TfLiteNode* node) {
  
                TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
                TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);

                const TfLiteTensor* input = GetInput(context, node, 0);
                TfLiteTensor* output = GetOutput(context, node, 0);

                 int num_dims = NumDimensions(input);

                 TfLiteIntArray* output_size = TfLiteIntArrayCreate(num_dims);
                  for (int i=0; i<num_dims; ++i) {
                  output_size->data[i] = input->dims->data[i];
                  }

                  return context->ResizeTensor(context, output, output_size);
                 }

                 TfLiteStatus Conv1dEval(TfLiteContext* context, TfLiteNode* node) {
  
                  const TfLiteTensor* input = GetInput(context, node,0);
                  TfLiteTensor* output = GetOutput(context, node,0);

                  float* input_data = input->data.f;
                  float* output_data = output->data.f;
  
                  if (output->dims->data[0] > 1) 
                  dim_out = output->dims->data[0];
    
                  else dim_out = output->dims->data[1];
    
                  if (input->dims->data[0] > 1) 
                  dim_in = input->dims->data[0];
    
                  else dim_in = input->dims->data[1];
  
                  float copy0[4+dim_in];
  
                  for (int i=0; i<4; i++) {
                  copy0[i] = 0;
                  }
  
                  for (int i=0; i<dim_in; i++) {
                  copy0[i+4] = input_data[i];
                  }

                   for (int i=0; i<dim_out; i++) {
                   for (int m=0; m<dim; m++) {
                   copy[m] = copy0[m+i];
                   } 
                   for (int j=0; j<dim_k; j++) {
                   output_data[i] = output_data[i] + copy[j*dilation]*kernel[j];
                   }
    
                   }
                   return kTfLiteOk;
                   }


               }  // namespace conv_1d

              TfLiteRegistration* Register_CONV_1D() {
              static TfLiteRegistration r = {nullptr, nullptr, conv_1d::Conv1dPrepare, conv_1d::Conv1dEval};
              return &r;
              }

              }  // namespace custom
              }  // namespace ops
              }  // namespace tflite

I think the problem is also not having register.cpp and register_ref.cpp files (I don't find them in tensorflow/lite/kernels), so I don't know if I can create these files myself or not (in this case I should be careful with the various header files).

In the BUILD file (I don't even have that) I'd write like this:

            cc_library(
            name = "builtin_op_kernels",
            srcs = BUILTIN_KERNEL_SRCS + [
               "conv_1d.cc",
             ], 
             hdrs = [
                "dequantize.h",
                "conv_1d.h",
             ],
             copts = tflite_copts() + tf_opts_nortti_if_android() + EXTRA_EIGEN_COPTS,
             visibility = ["//visibility:private"],
             deps = BUILTIN_KERNEL_DEPS + [
             "@ruy//ruy/profiler:instrumentation",
             "//tensorflow/lite/kernels/internal:cppmath",
             "//tensorflow/lite:string",
             "@farmhash_archive//:farmhash",
             ],
             )


I'm not sure if I'm missing some other steps to register the custom op.
I used this tutorial as a reference: https://www.digikey.com/en/maker/projects/tinyml-getting-started-with-tensorflow-lite-for-microcontrollers/c0cdd850f5004b098d263400aa294023.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Problems to register a custom op in TF Lite #50165

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Problems to register a custom op in TF Lite #50165

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions