Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DNN: supports NonMaxSuppression operator from ONNX #22473

Draft
wants to merge 4 commits into
base: 4.x
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
21 changes: 21 additions & 0 deletions modules/dnn/include/opencv2/dnn/all_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1057,6 +1057,27 @@ CV__DNN_INLINE_NS_BEGIN
static Ptr<CumSumLayer> create(const LayerParams& params);
};

/**
* @brief Non-maximum-suppression layer from https://github.com/onnx/onnx/blob/master/docs/Operators.md#NonMaxSuppression.
*
* It works with only one class of bboxes.
*/
class CV_EXPORTS NonMaxSuppressionLayer : public Layer
{
public:
// attributes
int center_point_box;
// inputs
int max_output_boxes_per_class;
float iou_threshold;
float score_threshold;
// limits output
int keep_top_k;
int top_k;

static Ptr<NonMaxSuppressionLayer> create(const LayerParams& params);
};

//! @}
//! @}
CV__DNN_INLINE_NS_END
Expand Down
1 change: 1 addition & 0 deletions modules/dnn/src/init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ void initializeLayerFactory()
CV_DNN_REGISTER_LAYER_CLASS(Const, ConstLayer);
CV_DNN_REGISTER_LAYER_CLASS(Arg, ArgLayer);
CV_DNN_REGISTER_LAYER_CLASS(Reciprocal, ReciprocalLayer);
CV_DNN_REGISTER_LAYER_CLASS(NonMaxSuppression, NonMaxSuppressionLayer);

CV_DNN_REGISTER_LAYER_CLASS(Crop, CropLayer);
CV_DNN_REGISTER_LAYER_CLASS(Eltwise, EltwiseLayer);
Expand Down
154 changes: 154 additions & 0 deletions modules/dnn/src/layers/nms_layer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
#include "../precomp.hpp"
#include "layers_common.hpp"

#include <limits.h> // for INT_MAX
#include <string>
#include "../nms.inl.hpp"

namespace cv
{
namespace dnn
{

class NonMaxSuppressionLayerImpl CV_FINAL : public NonMaxSuppressionLayer
{
public:
NonMaxSuppressionLayerImpl(const LayerParams& params)
{
setParamsFrom(params);

// 0: [y1, x1, y2, x2] for TF models; 1: [cx, cy, w, h] for PyTorch models
center_point_box = params.get<int>("center_point_box", 0);
max_output_boxes_per_class = params.get<int>("max_output_boxes_per_class", INT_MAX);
iou_threshold = params.get<float>("iou_threshold", 0); // keep if iou <= iou_threshold
score_threshold = params.get<float>("score_threshold", 0); // keep if score >= score_threshold

// WARNINGS: magic number that works for most of the cases
top_k = 5000;
keep_top_k = 650;
}

virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV;
}

bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
// inputs[0]: boxes, [num_batches, num_boxes, 4]
// inputs[1]: scores, [num_batches, num_classes, num_boxes]
CV_Assert(inputs.size() == 2); // support with boxes & scores as inputs only
CV_Assert(inputs[0][0] == inputs[1][0]); // same batch size
CV_Assert(inputs[0][1] == inputs[1][2]); // same spatial dimension

int _num_batches = inputs[0][0];
int _num_classes = inputs[1][1];
// outputs[0]: selected_indices, num_selected_indices * [batch_index, class_index, box_index]
// consider the case whose _num_batches == 1 & _num_classes == 1
outputs.resize(1, shape(keep_top_k, 3));

return false;
}

static inline float rect2dOverlap(const Rect2d& a, const Rect2d& b)
{
return 1.f - static_cast<float>(jaccardDistance(a, b));
}

void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());

std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);

int num_batches = inputs[0].size[0];
int num_boxes = inputs[0].size[1];

std::vector<Rect2d> boxes;
std::vector<float> scores;
// Retrieve bboxes
boxes.resize(num_boxes);
const float* ptr_boxes = (float*)inputs[0].data;
if (center_point_box == 1) // num_boxes * [cx, cy, w, h]
{
float cx, cy, w, h;
for (size_t i = 0; i < boxes.size(); i++)
{
Rect2d& box = boxes[i];
cx = ptr_boxes[i * 4];
cy = ptr_boxes[i * 4 + 1];
w = ptr_boxes[i * 4 + 2];
h = ptr_boxes[i * 4 + 3];

box.x = cx - 0.5 * w;
box.y = cy - 0.5 * h;
box.width = w;
box.height = h;
}
}
else // num_boxes * [y1, x1, y2, x2]
{
float x1, y1, x2, y2;
for (size_t i = 0; i < boxes.size(); i++)
{
Rect2d& box = boxes[i];
y1 = ptr_boxes[i * 4];
x1 = ptr_boxes[i * 4 + 1];
y2 = ptr_boxes[i * 4 + 2];
x2 = ptr_boxes[i * 4 + 3];

box.x = x1;
box.y = y1;
box.width = x2 - x1;
box.height = y2 - y1;
}
}
// Retrieve scores
const float* ptr_scores = (float*)inputs[1].data;
if (inputs[1].isContinuous())
{
std::cout << "It is continuous!!!" << std::endl;
scores.assign(ptr_scores, ptr_scores + inputs[1].total());
}
else
{
scores.resize(num_boxes);
for (size_t i = 0; i < scores.size(); i++)
{
scores[i] = ptr_scores[i];
}
}

// NMS
std::vector<int> keep_indices;
NMSFast_(boxes, scores, score_threshold, iou_threshold, 1.0, top_k, keep_indices, rect2dOverlap, keep_top_k);

// Store to output
outputs[0].setTo(-1);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since dnn does not support dynamic shape for now, the number of output boxes have to be set fixed (650 is used in this implementation, but there will not be so many boxes in one single image typically speaking). So invalid indices have to be set to -1.

@rogday if the nms operator is ported from torchvision.ops.nms, the NonMaxSuppression operator is always followed by a Gather operator (see the visualization in opencv/opencv_extra#1005). I wonder if it is reasonable to drop those invalid indices in your pull request for the Gather operator.

if (keep_indices.size() == 0)
return;

float* outputsData = outputs[0].ptr<float>();
for (int i = 0; i < keep_indices.size(); i++)
{
outputsData[i * 3] = 0;
outputsData[i * 3 + 1] = 0;
outputsData[i * 3 + 2] = keep_indices[i];
}
outputs_arr.assign(outputs);
}
};

Ptr<NonMaxSuppressionLayer> NonMaxSuppressionLayer::create(const LayerParams& params)
{
return Ptr<NonMaxSuppressionLayer>(new NonMaxSuppressionLayerImpl(params));
}

}
}
25 changes: 25 additions & 0 deletions modules/dnn/src/onnx/onnx_importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ class ONNXImporter
void parseElementWise (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseDepthToSpace (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseSimpleLayers (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseNonMaxSuppression (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);

// Domain: com.microsoft
// URL: https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md
Expand Down Expand Up @@ -3080,6 +3081,29 @@ void ONNXImporter::parseDepthToSpace(LayerParams& layerParams, const opencv_onnx
addLayer(layerParams, node_proto);
}

void ONNXImporter::parseNonMaxSuppression(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
{
// The NonMaxSuppression converted from torchvision.ops.nms has 4 inputs typically:
// 0. boxes
// 1. scores
// 2. max_output_boxes_per_class (INT64_MAX by default)
// 3. iou_threshold
CV_Assert(node_proto.input_size() == 4);

layerParams.type = "NonMaxSuppression";
if (constBlobs.find(node_proto.input(2)) != constBlobs.end())
{
Mat max_output_boxes_per_class = getBlob(node_proto, 2);
layerParams.set("max_output_boxes_per_class", max_output_boxes_per_class.at<int>(0)); // INT64_MAX -> INT32_MAX
}
if (constBlobs.find(node_proto.input(3)) != constBlobs.end())
{
Mat iou_threshold = getBlob(node_proto, 3);
layerParams.set("iou_threshold", iou_threshold.at<float>(0));
}
addLayer(layerParams, node_proto);
}

void ONNXImporter::parseSimpleLayers(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
{
for (int j = 0; j < node_proto.input_size(); j++) {
Expand Down Expand Up @@ -3685,6 +3709,7 @@ void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version)
dispatch["Equal"] = dispatch["Greater"] = dispatch["Less"] = dispatch["Pow"] = dispatch["Add"] =
dispatch["Sub"] = dispatch["Mul"] = dispatch["Div"] = &ONNXImporter::parseElementWise;
dispatch["Sum"] = dispatch["Min"] = dispatch["Max"] = &ONNXImporter::parseElementWise;
dispatch["NonMaxSuppression"] = &ONNXImporter::parseNonMaxSuppression;

std::vector<std::string> simpleLayers{"Acos", "Acosh", "Asin", "Asinh", "Atan", "Atanh", "Ceil", "Celu", "Cos",
"Cosh", "Dropout", "Erf", "Exp", "Floor", "HardSigmoid", "HardSwish",
Expand Down
5 changes: 5 additions & 0 deletions modules/dnn/test/test_onnx_importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,11 @@ class Test_ONNX_layers : public DNNTestLayer
}
};

TEST_P(Test_ONNX_layers, NonMaxSuppression)
{
testONNXModels("nms", npy, 0, 0, false, false, 2);
}

TEST_P(Test_ONNX_layers, InstanceNorm)
{
if(backend == DNN_BACKEND_CUDA)
Expand Down