Custom deep learning layers support

opencv · Apr 6, 2018 · b5a27e0 · b5a27e0
1 parent 875b4e2
commit b5a27e0
Show file tree

Hide file tree

Showing 17 changed files with 848 additions and 138 deletions.
diff --git a/3rdparty/protobuf/src/google/protobuf/text_format.cc b/3rdparty/protobuf/src/google/protobuf/text_format.cc
@@ -469,8 +469,9 @@ class TextFormat::Parser::ParserImpl {
                       "\" has no field named \"" + field_name + "\".");
           return false;
         } else {
-          ReportWarning("Message type \"" + descriptor->full_name() +
-                        "\" has no field named \"" + field_name + "\".");
+          // No warnings to let user define custom layers (see https://github.com/opencv/opencv/pull/11129)
+          // ReportWarning("Message type \"" + descriptor->full_name() +
+          //               "\" has no field named \"" + field_name + "\".");
         }
       }
     }
@@ -485,10 +486,13 @@ class TextFormat::Parser::ParserImpl {
       // start with "{" or "<" which indicates the beginning of a message body.
       // If there is no ":" or there is a "{" or "<" after ":", this field has
       // to be a message or the input is ill-formed.
+      UnknownFieldSet* unknown_fields = reflection->MutableUnknownFields(message);
       if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) {
-        return SkipFieldValue();
+        UnknownFieldSet* unknown_field = unknown_fields->AddGroup(unknown_fields->field_count());
+        unknown_field->AddLengthDelimited(0, field_name);  // Add a field's name.
+        return SkipFieldValue(unknown_field);
       } else {
-        return SkipFieldMessage();
+        return SkipFieldMessage(unknown_fields);
       }
     }
 
@@ -571,7 +575,7 @@ class TextFormat::Parser::ParserImpl {
   }
 
   // Skips the next field including the field's name and value.
-  bool SkipField() {
+  bool SkipField(UnknownFieldSet* unknown_fields) {
     string field_name;
     if (TryConsume("[")) {
       // Extension name.
@@ -588,9 +592,11 @@ class TextFormat::Parser::ParserImpl {
     // If there is no ":" or there is a "{" or "<" after ":", this field has
     // to be a message or the input is ill-formed.
     if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) {
-      DO(SkipFieldValue());
+      UnknownFieldSet* unknown_field = unknown_fields->AddGroup(unknown_fields->field_count());
+      unknown_field->AddLengthDelimited(0, field_name);  // Add a field's name.
+      DO(SkipFieldValue(unknown_field));
     } else {
-      DO(SkipFieldMessage());
+      DO(SkipFieldMessage(unknown_fields));
     }
     // For historical reasons, fields may optionally be separated by commas or
     // semicolons.
@@ -625,11 +631,11 @@ class TextFormat::Parser::ParserImpl {
 
   // Skips the whole body of a message including the beginning delimiter and
   // the ending delimiter.
-  bool SkipFieldMessage() {
+  bool SkipFieldMessage(UnknownFieldSet* unknown_fields) {
     string delimiter;
     DO(ConsumeMessageDelimiter(&delimiter));
     while (!LookingAt(">") &&  !LookingAt("}")) {
-      DO(SkipField());
+      DO(SkipField(unknown_fields));
     }
     DO(Consume(delimiter));
     return true;
@@ -769,7 +775,7 @@ class TextFormat::Parser::ParserImpl {
     return true;
   }
 
-  bool SkipFieldValue() {
+  bool SkipFieldValue(UnknownFieldSet* unknown_field) {
     if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
       while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
         tokenizer_.Next();
@@ -779,9 +785,9 @@ class TextFormat::Parser::ParserImpl {
     if (TryConsume("[")) {
       while (true) {
         if (!LookingAt("{") && !LookingAt("<")) {
-          DO(SkipFieldValue());
+          DO(SkipFieldValue(unknown_field));
         } else {
-          DO(SkipFieldMessage());
+          DO(SkipFieldMessage(unknown_field));
         }
         if (TryConsume("]")) {
           break;
@@ -833,6 +839,8 @@ class TextFormat::Parser::ParserImpl {
         return false;
       }
     }
+    // Use a tag 1 because tag 0 is used for field's name.
+    unknown_field->AddLengthDelimited(1, tokenizer_.current().text);
     tokenizer_.Next();
     return true;
   }
@@ -1298,13 +1306,13 @@ class TextFormat::Printer::TextGenerator
 TextFormat::Finder::~Finder() {
 }
 
-TextFormat::Parser::Parser()
+TextFormat::Parser::Parser(bool allow_unknown_field)
   : error_collector_(NULL),
     finder_(NULL),
     parse_info_tree_(NULL),
     allow_partial_(false),
     allow_case_insensitive_field_(false),
-    allow_unknown_field_(false),
+    allow_unknown_field_(allow_unknown_field),
     allow_unknown_enum_(false),
     allow_field_number_(false),
     allow_relaxed_whitespace_(false),

diff --git a/3rdparty/protobuf/src/google/protobuf/text_format.h b/3rdparty/protobuf/src/google/protobuf/text_format.h
@@ -457,7 +457,7 @@ class LIBPROTOBUF_EXPORT TextFormat {
   // For more control over parsing, use this class.
   class LIBPROTOBUF_EXPORT Parser {
    public:
-    Parser();
+    Parser(bool allow_unknown_field = false);
     ~Parser();
 
     // Like TextFormat::Parse().

diff --git a/doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md b/doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md
@@ -0,0 +1,192 @@
+# Custom deep learning layers support {#tutorial_dnn_custom_layers}
+
+## Introduction
+Deep learning is a fast growing area. The new approaches to build neural networks
+usually introduce new types of layers. They could be modifications of existing
+ones or implement outstanding researching ideas.
+
+OpenCV gives an opportunity to import and run networks from different deep learning
+frameworks. There are a number of the most popular layers. However you can face
+a problem that your network cannot be imported using OpenCV because of unimplemented layers.
+
+The first solution is to create a feature request at https://github.com/opencv/opencv/issues
+mentioning details such a source of model and type of new layer. A new layer could
+be implemented if OpenCV community shares this need.
+
+The second way is to define a **custom layer** so OpenCV's deep learning engine
+will know how to use it. This tutorial is dedicated to show you a process of deep
+learning models import customization.
+
+## Define a custom layer in C++
+Deep learning layer is a building block of network's pipeline.
+It has connections to **input blobs** and produces results to **output blobs**.
+There are trained **weights** and **hyper-parameters**.
+Layers' names, types, weights and hyper-parameters are stored in files are generated by
+native frameworks during training. If OpenCV mets unknown layer type it throws an
+exception trying to read a model:
+
+```
+Unspecified error: Can't create layer "layer_name" of type "MyType" in function getLayerInstance
+```
+
+To import the model correctly you have to derive a class from cv::dnn::Layer with
+the following methods:
+
+@snippet dnn/custom_layers.cpp A custom layer interface
+
+And register it before the import:
+
+@snippet dnn/custom_layers.cpp Register a custom layer
+
+@note `MyType` is a type of unimplemented layer from the thrown exception.
+
+Let's see what all the methods do:
+
+- Constructor
+
+@snippet dnn/custom_layers.cpp MyLayer::MyLayer
+
+Retrieves hyper-parameters from cv::dnn::LayerParams. If your layer has trainable
+weights they will be already stored in the Layer's member cv::dnn::Layer::blobs.
+
+- A static method `create`
+
+@snippet dnn/custom_layers.cpp MyLayer::create
+
+This method should create an instance of you layer and return cv::Ptr with it.
+
+- Output blobs' shape computation
+
+@snippet dnn/custom_layers.cpp MyLayer::getMemoryShapes
+
+Returns layer's output shapes depends on input shapes. You may request an extra
+memory using `internals`.
+
+- Run a layer
+
+@snippet dnn/custom_layers.cpp MyLayer::forward
+
+Implement a layer's logic here. Compute outputs for given inputs.
+
+@note OpenCV manages memory allocated for layers. In the most cases the same memory
+can be reused between layers. So your `forward` implementation should not rely that
+the second invocation of `forward` will has the same data at `outputs` and `internals`.
+
+- Optional `finalize` method
+
+@snippet dnn/custom_layers.cpp MyLayer::finalize
+
+The chain of methods are the following: OpenCV deep learning engine calls `create`
+method once then it calls `getMemoryShapes` for an every created layer then you
+can make some preparations depends on known input dimensions at cv::dnn::Layer::finalize.
+After network was initialized only `forward` method is called for an every network's input.
+
+@note Varying input blobs' sizes such height or width or batch size you make OpenCV
+reallocate all the internal memory. That leads efficiency gaps. Try to initialize
+and deploy models using a fixed batch size and image's dimensions.
+
+## Example: custom layer from Caffe
+Let's create a custom layer `Interp` from https://github.com/cdmh/deeplab-public.
+It's just a simple resize that takes an input blob of size `N x C x Hi x Wi` and returns
+an output blob of size `N x C x Ho x Wo` where `N` is a batch size, `C` is a number of channels,
+`Hi x Wi` and `Ho x Wo` are input and output `height x width` correspondingly.
+This layer has no trainable weights but it has hyper-parameters to specify an output size.
+
+In example,
+~~~~~~~~~~~~~
+layer {
+  name: "output"
+  type: "Interp"
+  bottom: "input"
+  top: "output"
+  interp_param {
+    height: 9
+    width: 8
+  }
+}
+~~~~~~~~~~~~~
+
+This way our implementation can look like:
+
+@snippet dnn/custom_layers.cpp InterpLayer
+
+Next we need to register a new layer type and try to import the model.
+
+@snippet dnn/custom_layers.cpp Register InterpLayer
+
+## Example: custom layer from TensorFlow
+This is an example of how to import a network with [tf.image.resize_bilinear](https://www.tensorflow.org/versions/master/api_docs/python/tf/image/resize_bilinear)
+operation. This is also a resize but with an implementation different from OpenCV's or `Interp` above.
+
+Let's create a single layer network:
+~~~~~~~~~~~~~{.py}
+inp = tf.placeholder(tf.float32, [2, 3, 4, 5], 'input')
+resized = tf.image.resize_bilinear(inp, size=[9, 8], name='resize_bilinear')
+~~~~~~~~~~~~~
+OpenCV sees that TensorFlow's graph in the following way:
+
+```
+node {
+  name: "input"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+node {
+  name: "resize_bilinear/size"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\t\000\000\000\010\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "resize_bilinear"
+  op: "ResizeBilinear"
+  input: "input:0"
+  input: "resize_bilinear/size"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "align_corners"
+    value {
+      b: false
+    }
+  }
+}
+library {
+}
+```
+Custom layers import from TensorFlow is designed to put all layer's `attr` into
+cv::dnn::LayerParams but input `Const` blobs into cv::dnn::Layer::blobs.
+In our case resize's output shape will be stored in layer's `blobs[0]`.
+
+@snippet dnn/custom_layers.cpp ResizeBilinearLayer
+
+Next we register a layer and try to import the model.
+
+@snippet dnn/custom_layers.cpp Register ResizeBilinearLayer
diff --git a/doc/tutorials/dnn/table_of_content_dnn.markdown b/doc/tutorials/dnn/table_of_content_dnn.markdown
@@ -48,3 +48,11 @@ Deep Neural Networks (dnn module) {#tutorial_table_of_content_dnn}
     *Author:* Dmitry Kurtaev
 
     In this tutorial we'll run deep learning models in browser using OpenCV.js.
+
+-   @subpage tutorial_dnn_custom_layers
+
+    *Compatibility:* \> OpenCV 3.4.1
+
+    *Author:* Dmitry Kurtaev
+
+    How to define custom layers to import networks.
diff --git a/modules/dnn/include/opencv2/dnn/dict.hpp b/modules/dnn/include/opencv2/dnn/dict.hpp
@@ -142,6 +142,10 @@ class CV_EXPORTS Dict
     const T &set(const String &key, const T &value);
 
     friend std::ostream &operator<<(std::ostream &stream, const Dict &dict);
+
+    std::map<String, DictValue>::const_iterator begin() const;
+
+    std::map<String, DictValue>::const_iterator end() const;
 };
 
 //! @}

diff --git a/modules/dnn/include/opencv2/dnn/dnn.inl.hpp b/modules/dnn/include/opencv2/dnn/dnn.inl.hpp
@@ -102,9 +102,13 @@ inline int64 DictValue::get<int64>(int idx) const
 
         return (int64)doubleValue;
     }
+    else if (type == Param::STRING)
+    {
+        return std::atoi((*ps)[idx].c_str());
+    }
     else
     {
-        CV_Assert(isInt() || isReal());
+        CV_Assert(isInt() || isReal() || isString());
         return 0;
     }
 }
@@ -146,9 +150,13 @@ inline double DictValue::get<double>(int idx) const
     {
         return (double)(*pi)[idx];
     }
+    else if (type == Param::STRING)
+    {
+        return std::atof((*ps)[idx].c_str());
+    }
     else
     {
-        CV_Assert(isReal() || isInt());
+        CV_Assert(isReal() || isInt() || isString());
         return 0;
     }
 }
@@ -366,6 +374,16 @@ inline std::ostream &operator<<(std::ostream &stream, const Dict &dict)
     return stream;
 }
 
+inline std::map<String, DictValue>::const_iterator Dict::begin() const
+{
+    return dict.begin();
+}
+
+inline std::map<String, DictValue>::const_iterator Dict::end() const
+{
+    return dict.end();
+}
+
 CV__DNN_EXPERIMENTAL_NS_END
 }
 }