diff --git a/modules/dnn/perf/perf_layer.cpp b/modules/dnn/perf/perf_layer.cpp index 1e7552c86c44..d5a9bb34af14 100644 --- a/modules/dnn/perf/perf_layer.cpp +++ b/modules/dnn/perf/perf_layer.cpp @@ -66,8 +66,13 @@ struct Layer_NaryEltwise : public TestBaseWithParam > if (!isRef && backendId == DNN_BACKEND_CUDA) { - if (a_shape != b_shape) - throw SkipTestException("The test is skipped because inputs with different shapes are not supported."); + if (a_shape.size() != b_shape.size()) + throw SkipTestException("The test is skipped because inputs with different shape size are not supported."); + + for(int i = 0; i < a_shape.size(); i++) + if (a_shape[i] != b_shape[i] && a_shape[i] != 1 && b_shape[i] != 1) + throw SkipTestException("The test is skipped because inputs are not supported."); + if (nary_eltwise_cuda_deny_ops.find(op) != nary_eltwise_cuda_deny_ops.end()) throw SkipTestException("The operator '" + op + "' is skipped because is not support with cuda currently."); } @@ -215,6 +220,11 @@ PERF_TEST_P_(Layer_NaryEltwise, NHWC_C) test_layer({N, H, W, C}, {1, C}, "sum"); } +PERF_TEST_P_(Layer_NaryEltwise, NHWC_H) +{ + test_layer({N, H, W, C}, {1, H, 1, 1}, "sum"); +} + PERF_TEST_P_(Layer_Slice, YOLOv4_tiny_1) { const int inputShape[4] = {1, 64, 104, 104}; diff --git a/modules/dnn/src/layers/nary_eltwise_layers.cpp b/modules/dnn/src/layers/nary_eltwise_layers.cpp index 2c46b1165b82..5a8d56adee57 100644 --- a/modules/dnn/src/layers/nary_eltwise_layers.cpp +++ b/modules/dnn/src/layers/nary_eltwise_layers.cpp @@ -673,12 +673,17 @@ class NaryEltwiseLayerImpl CV_FINAL : public NaryEltwiseLayer { auto context = reinterpret_cast(context_); - auto input_wrapper = inputs[0].dynamicCast(); + auto input_0_shape = inputs[0].dynamicCast()->getShape(); for (int i = 1; i < inputs.size(); i++) { - auto from_wrapper = inputs[i].dynamicCast(); - if (input_wrapper->getShape() != from_wrapper->getShape()) + auto input_i_shape = inputs[i].dynamicCast()->getShape(); + if (input_0_shape.size() != input_i_shape.size()) return Ptr(); + // check if the shape can be supported by `eltwise_ops.cu`, or return the default BackendNode + for (int j = 0; j < input_0_shape.size(); j++) + if (input_0_shape[j] != input_i_shape[j] && + input_0_shape[j] != 1 && input_i_shape[j] != 1) + return Ptr(); } cuda4dnn::EltwiseOpType op_ = cuda4dnn::EltwiseOpType::SUM;