Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GSoC Add ONNX Support for GatherElements #24092

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
16 changes: 16 additions & 0 deletions modules/dnn/include/opencv2/dnn/all_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,22 @@ CV__DNN_INLINE_NS_BEGIN
static Ptr<GatherLayer> create(const LayerParams& params);
};

/** @brief GatherElements layer
* GatherElements takes two inputs data and indices of the same rank r >= 1 and an optional attribute axis and works such that:
* output[i][j][k] = data[index[i][j][k]][j][k] if axis = 0 and r = 3
* output[i][j][k] = data[i][index[i][j][k]][k] if axis = 1 and r = 3
* output[i][j][k] = data[i][j][index[i][j][k]] if axis = 2 and r = 3
*
* Gather, on the other hand, takes a data tensor of rank r >= 1, and indices tensor of rank q, and works such that:
* it gathers the enteries along axis dimension of the input data indexed by indices and concatenates them in an output tensor of rank q + (r - 1)
* e.g. If axis = 0, let k = indices[i_{0}, ..., i_{q-1}] then output[i_{0}, ..., i_{q-1}, j_{0}, ..., j_{r-2}] = input[k , j_{0}, ..., j_{r-2}]:
**/
class CV_EXPORTS GatherElementsLayer : public Layer
{
public:
static Ptr<GatherElementsLayer> create(const LayerParams& params);
};

dkurt marked this conversation as resolved.
Show resolved Hide resolved
class CV_EXPORTS PoolingLayer : public Layer
{
public:
Expand Down
51 changes: 51 additions & 0 deletions modules/dnn/perf/perf_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,56 @@ PERF_TEST_P_(Layer_LayerNormExpanded, DISABLED_LayerNormExpanded)
test_layer({N, H ,W});
}

struct Layer_GatherElements : public TestBaseWithParam<tuple<Backend, Target> >
{
void test_layer(const std::vector<int>& data_shape, const std::vector<int>& indices_shape, int axis = 0)
{
int backendId = get<0>(GetParam());
int targetId = get<1>(GetParam());

Mat data(data_shape, CV_32FC1);
Mat indices(indices_shape, CV_32FC1);

randu(data, 0.f, 1.f);
randu(indices, 0, data_shape[axis]);

Net net;
LayerParams lp;
lp.type = "GatherElements";
lp.name = "testLayer";
lp.set("axis", axis);
int id = net.addLayerToPrev(lp.name, lp.type, lp);
net.connect(0, 0, id, 0);
net.connect(0, 1, id, 1);

// warmup
{
std::vector<String> inpNames(3);
inpNames[0] = "data";
inpNames[1] = "indices";
net.setInputsNames(inpNames);
net.setInput(data, inpNames[0]);
net.setInput(indices, inpNames[1]);

net.setPreferableBackend(backendId);
net.setPreferableTarget(targetId);
Mat out = net.forward();
}

TEST_CYCLE()
{
Mat res = net.forward();
}

SANITY_CHECK_NOTHING();
}
};

PERF_TEST_P_(Layer_GatherElements, GatherElements)
{
test_layer({2700, 1, 2914}, {2700, 1, 81}, 2);
}

INSTANTIATE_TEST_CASE_P(/**/, Layer_Slice, dnnBackendsAndTargets(false, false));
INSTANTIATE_TEST_CASE_P(/**/, Layer_NaryEltwise, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
#ifdef HAVE_CUDA
Expand All @@ -642,6 +692,7 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_Scatter, testing::Values(std::make_tuple(DNN
INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNormExpanded, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
INSTANTIATE_TEST_CASE_P(/**/, Layer_GatherElements, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));


typedef TestBaseWithParam<tuple<Vec4i, int, bool, tuple<Backend, Target> > > Layer_FullyConnected;
Expand Down
1 change: 1 addition & 0 deletions modules/dnn/src/init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ void initializeLayerFactory()
CV_DNN_REGISTER_LAYER_CLASS(Arg, ArgLayer);
CV_DNN_REGISTER_LAYER_CLASS(Reciprocal, ReciprocalLayer);
CV_DNN_REGISTER_LAYER_CLASS(Gather, GatherLayer);
CV_DNN_REGISTER_LAYER_CLASS(GatherElements, GatherElementsLayer);
CV_DNN_REGISTER_LAYER_CLASS(LayerNormalization, LayerNormLayer);
CV_DNN_REGISTER_LAYER_CLASS(Expand, ExpandLayer);

Expand Down
154 changes: 154 additions & 0 deletions modules/dnn/src/layers/gather_elements_layer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.

#include "../precomp.hpp"
#include <opencv2/dnn/shape_utils.hpp>

namespace cv { namespace dnn {

static inline int calculateOffset(int outer_dim, const MatShape &shape_indices, int axis_skip, const MatStep &step_data) {
int offset = 0;
for (int axis = static_cast<int>(shape_indices.size()) - 2; axis >= 0; axis--) {
int dim = shape_indices[axis];
if (axis != axis_skip) {
offset += (outer_dim % dim) * step_data[axis];
}
outer_dim /= dim;
}
return offset;
}

class GatherElementsLayerImpl CV_FINAL : public GatherElementsLayer
{
public:
GatherElementsLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
axis = params.get<int>("axis", 0);
}

virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV;
}

virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_CheckEQ(inputs.size(), 2ull, "GatherElements: requires two inputs");

const auto &data = inputs[0];
const auto &indices = inputs[1];
CV_CheckEQ(data.size(), indices.size(), "GatherElements: data and indices should have the same dimension");

int normalized_axis = normalize_axis(axis, static_cast<int>(data.size()));
CV_CheckGE(normalized_axis, 0, "GatherElements: axis out of range");
CV_CheckLT(normalized_axis, static_cast<int>(data.size()), "GatherElements: axis out of range");
for (size_t i = 0; i < data.size(); i++) {
if (i != normalized_axis) {
CV_CheckEQ(data[i], indices[i], "GatherElements: shape mismatched");
}
}

outputs.assign(1, inputs[1]); // shape of output is same as indices
return false;
}

virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE {
std::vector<Mat> inputs;
inputs_arr.getMatVector(inputs);

const auto &data = inputs[0];
axis = normalize_axis(axis, data.dims);
}

void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());

std::vector<Mat> inputs, outputs;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Implementation should handle FP16 datatype too

[  FAILED  ] 3 tests, listed below:
[  FAILED  ] Test_ONNX_conformance.Layer_Test/test_gather_elements_0_OCV_OCL_FP16, where GetParam() = (test_gather_elements_0, OCV/OCL_FP16)
[  FAILED  ] Test_ONNX_conformance.Layer_Test/test_gather_elements_1_OCV_OCL_FP16, where GetParam() = (test_gather_elements_1, OCV/OCL_FP16)
[  FAILED  ] Test_ONNX_conformance.Layer_Test/test_gather_elements_negative_indices_OCV_OCL_FP16, where GetParam() = (test_gather_elements_negative_indices, OCV/OCL_FP16)

https://pullrequest.opencv.org/buildbot/builders/4_x-lin64/builds/100283

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I will do this.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See #24427

inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);

const Mat& data = inputs[0];
const Mat& indices = inputs[1];
Mat& out = outputs[0];

typeDispatch(outputs[0].type(), data, indices, out);
}

template <typename T>
void forward_impl(const Mat& data_, const Mat& indices_, Mat& out_)
{
const auto *ptr_data = data_.ptr<const T>();
const auto *ptr_indices = indices_.ptr<const T>();
auto *ptr_out = out_.ptr<T>();

const auto shape_data = shape(data_);
const auto &step_data = data_.step;
const auto shape_indices = shape(indices_);

int inner_most_dim = shape_indices.back();
int axis_dim = shape_data[axis];
size_t axis_step = static_cast<size_t>(step_data[axis] / sizeof(T));

bool innermost_axis = axis == static_cast<int>(shape_data.size() - 1);

auto fn = [&](const Range &r) {
for (int i = r.start; i < r.end; i++) {
auto *data = ptr_data + static_cast<size_t>(calculateOffset(i, shape_indices, axis, step_data) / sizeof(T));
auto *indices = ptr_indices + i * inner_most_dim;
auto *out = ptr_out + i * inner_most_dim;

if (innermost_axis) {
for (int j = 0; j < inner_most_dim; j++) {
int index = static_cast<int>((indices[j] + axis_dim)) % axis_dim; // TODO: Check out-of-range index
out[j] = data[index];
}
} else {
for (int j = 0; j < inner_most_dim; j++) {
int index = static_cast<int>(indices[j] + axis_dim) % axis_dim; // TODO: Check out-of-range index
out[j] = data[index * axis_step + j];
}
}
}
};

int outer_dims = total(shape_indices, 0, shape_indices.size() - 1);
double nstripes = static_cast<size_t>(outer_dims * inner_most_dim * (1 / 1024.0));
parallel_for_(Range(0, outer_dims), fn, nstripes);
}

template<typename... Args>
inline void typeDispatch(const int type, Args&&... args)
{
switch (type)
{
case CV_8U:
forward_impl<uint8_t>(std::forward<Args>(args)...);
break;
case CV_32S:
forward_impl<int32_t>(std::forward<Args>(args)...);
break;
case CV_32F:
forward_impl<float>(std::forward<Args>(args)...);
break;
default:
CV_Error(cv::Error::BadDepth, "DNN/GatherElements: Unsupported type.");
};
}

private:
int axis;
};

Ptr<GatherElementsLayer> GatherElementsLayer::create(const LayerParams& params)
{
return makePtr<GatherElementsLayerImpl>(params);
}

}} // namespace cv::dnn
49 changes: 49 additions & 0 deletions modules/dnn/src/onnx/onnx_importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ class ONNXImporter
void parseCast (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseConstantFill (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseGather (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseGatherElements (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseConcat (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseResize (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseUpsample (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
Expand Down Expand Up @@ -2553,6 +2554,53 @@ void ONNXImporter::parseGather(LayerParams& layerParams, const opencv_onnx::Node
addLayer(layerParams, node_proto);
}

void ONNXImporter::parseGatherElements(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
{
CV_CheckEQ(node_proto.input_size(), 2, "GatherElements: two inputs are required");

fengyuentau marked this conversation as resolved.
Show resolved Hide resolved
size_t num_const = 0;
for (size_t i = 0; i < node_proto.input_size(); ++i){
if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
++num_const;
}

if (num_const == node_proto.input_size())
{
std::vector<Mat> inputs, output;
for (size_t i = 0; i < node_proto.input_size(); i++) {
Mat blob = getBlob(node_proto, i);
if (i == 1) { // indices, from int32/int64 to float32 for compatibility
blob.convertTo(blob, CV_32F);
}
inputs.push_back(blob);
}
runLayer(layerParams, inputs, output);
CV_Assert(output.size() == 1);
addConstant(node_proto.output(0), output[0]);
return;
} else if (num_const > 0) {
for (size_t i = 0; i < node_proto.input_size(); i++) {
if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) {
Mat blob = getBlob(node_proto, i);
if (i == 1) { // indices, from int32/int64 to float32 for compatibility
blob.convertTo(blob, CV_32F);
}

LayerParams constParams;
constParams.name = node_proto.input(i);
constParams.type = "Const";
constParams.blobs.push_back(blob);

opencv_onnx::NodeProto proto;
proto.add_output(constParams.name);
addLayer(constParams, proto);
}
}
}
dkurt marked this conversation as resolved.
Show resolved Hide resolved

addLayer(layerParams, node_proto);
}

void ONNXImporter::parseConcat(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
{
bool hasVariableInps = false;
Expand Down Expand Up @@ -3901,6 +3949,7 @@ void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version)
dispatch["Cast"] = &ONNXImporter::parseCast;
dispatch["ConstantFill"] = dispatch["ConstantOfShape"] = &ONNXImporter::parseConstantFill;
dispatch["Gather"] = &ONNXImporter::parseGather;
dispatch["GatherElements"] = &ONNXImporter::parseGatherElements;
dispatch["Concat"] = &ONNXImporter::parseConcat;
dispatch["Resize"] = &ONNXImporter::parseResize;
dispatch["Upsample"] = &ONNXImporter::parseUpsample;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@
"test_flatten_negative_axis1",
"test_flatten_negative_axis2",
"test_flatten_negative_axis4",
"test_gather_elements_0",
"test_gather_elements_1",
"test_gather_elements_negative_indices",
"test_logsoftmax_default_axis",
"test_maxpool_2d_dilations",
"test_maxpool_2d_same_lower",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,6 @@
"test_gather_0",
"test_gather_1",
"test_gather_2d_indices",
"test_gather_elements_0",
"test_gather_elements_1",
"test_gather_elements_negative_indices",
"test_gather_negative_indices",
"test_gathernd_example_float32",
"test_gathernd_example_int32",
Expand Down
29 changes: 29 additions & 0 deletions modules/dnn/test/test_onnx_importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "test_precomp.hpp"
#include "npy_blob.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#include <numeric>
namespace opencv_test { namespace {

template<typename TString>
Expand Down Expand Up @@ -2129,6 +2130,34 @@ TEST_P(Test_ONNX_nets, Alexnet)
expectNoFallbacksFromIE(net);
}

TEST_P(Test_ONNX_nets, RAFT)
{
applyTestTag(CV_TEST_TAG_LONG, CV_TEST_TAG_DEBUG_VERYLONG, CV_TEST_TAG_MEMORY_2GB);

std::string weight_path = _tf("models/optical_flow_estimation_raft_2023aug.onnx", false);
std::string img0_path = findDataFile(std::string("gpu/opticalflow/frame0.png"));
std::string img1_path = findDataFile(std::string("gpu/opticalflow/frame1.png"));

Size target_size{480, 360};
auto img0 = imread(img0_path);
auto img1 = imread(img1_path);
auto blob0 = blobFromImage(img0, 1.0, target_size, 0, true);
auto blob1 = blobFromImage(img1, 1.0, target_size, 0, true);

auto net = readNet(weight_path);
net.setInput(blob0, "0");
net.setInput(blob1, "1");
std::vector<std::string> outnames{"12007", "12006"};
std::vector<Mat> outs;
net.forward(outs, outnames);

// output 12006 is not checked to save space in opencv_extra since its ref is > 1MB,
// and output 12006 is calculated from 12007 so checking 12007 is sufficient.
std::string ref_12700_path = _tf("data/output_optical_flow_estimation_raft_2023aug.npy");
auto ref0 = blobFromNPY(ref_12700_path);
normAssert(ref0, outs[0], "", 1e-5, 1.8e-4);
}

TEST_P(Test_ONNX_nets, Squeezenet)
{
testONNXModels("squeezenet", pb);
Expand Down