From f19a58242beac3bf3ed329ae5d4189a5caa1f1ad Mon Sep 17 00:00:00 2001 From: chacha21 Date: Tue, 12 Dec 2023 11:17:36 +0100 Subject: [PATCH 1/6] add interleaved versions of phase/cartToPolar/polarToCart This PR is for performance only (at the cost of more template code and increased GPU code size) The additional variants can help the caller skip the creation of temporary GPU mats (where memory is more likely to be a critical resource), and can even allow in-place processing. magnitude/angles/x/y are often already interleaved when dealing with DFTs. --- .../cudaarithm/include/opencv2/cudaarithm.hpp | 53 +++++ modules/cudaarithm/src/cuda/polar_cart.cu | 208 ++++++++++++++++++ .../test/test_element_operations.cpp | 149 +++++++++++++ .../opencv2/cudev/functional/functional.hpp | 48 ++++ 4 files changed, 458 insertions(+) diff --git a/modules/cudaarithm/include/opencv2/cudaarithm.hpp b/modules/cudaarithm/include/opencv2/cudaarithm.hpp index bb74ea18918..a16c271881e 100644 --- a/modules/cudaarithm/include/opencv2/cudaarithm.hpp +++ b/modules/cudaarithm/include/opencv2/cudaarithm.hpp @@ -433,6 +433,17 @@ CV_EXPORTS_W void magnitudeSqr(InputArray x, InputArray y, OutputArray magnitude */ CV_EXPORTS_W void phase(InputArray x, InputArray y, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); +/** @brief Computes polar angles of complex matrix elements. + +@param xy Source matrix containing real and imaginary components ( CV_32FC2 ). +@param angle Destination matrix of angles ( CV_32FC1 ). +@param angleInDegrees Flag for angles that must be evaluated in degrees. +@param stream Stream for the asynchronous version. + +@sa phase +*/ +CV_EXPORTS_W void phase(InputArray xy, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); + /** @brief Converts Cartesian coordinates into polar. @param x Source matrix containing real components ( CV_32FC1 ). @@ -446,6 +457,29 @@ CV_EXPORTS_W void phase(InputArray x, InputArray y, OutputArray angle, bool angl */ CV_EXPORTS_W void cartToPolar(InputArray x, InputArray y, OutputArray magnitude, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); +/** @brief Converts Cartesian coordinates into polar. + +@param xy Source matrix containing real and imaginary components ( CV_32FC2 ). +@param magnitude Destination matrix of float magnitudes ( CV_32FC1 ). +@param angle Destination matrix of angles ( CV_32FC1 ). +@param angleInDegrees Flag for angles that must be evaluated in degrees. +@param stream Stream for the asynchronous version. + +@sa cartToPolar +*/ +CV_EXPORTS_W void cartToPolar(InputArray xy, OutputArray magnitude, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); + +/** @brief Converts Cartesian coordinates into polar. + +@param xy Source matrix containing real and imaginary components ( CV_32FC2 ). +@param magnitudeAngle Destination matrix of float magnitudes and angles ( CV_32FC2 ). +@param angleInDegrees Flag for angles that must be evaluated in degrees. +@param stream Stream for the asynchronous version. + +@sa cartToPolar +*/ +CV_EXPORTS_W void cartToPolar(InputArray xy, OutputArray magnitudeAngle, bool angleInDegrees = false, Stream& stream = Stream::Null()); + /** @brief Converts polar coordinates into Cartesian. @param magnitude Source matrix containing magnitudes ( CV_32FC1 or CV_64FC1 ). @@ -457,6 +491,25 @@ CV_EXPORTS_W void cartToPolar(InputArray x, InputArray y, OutputArray magnitude, */ CV_EXPORTS_W void polarToCart(InputArray magnitude, InputArray angle, OutputArray x, OutputArray y, bool angleInDegrees = false, Stream& stream = Stream::Null()); +/** @brief Converts polar coordinates into Cartesian. + +@param magnitude Source matrix containing magnitudes ( CV_32FC1 or CV_64FC1 ). +@param angle Source matrix containing angles ( same type as magnitude ). +@param xy Destination matrix of real and imaginary components ( same depth as magnitude, i.e. CV_32FC2 or CV_64FC2 ). +@param angleInDegrees Flag that indicates angles in degrees. +@param stream Stream for the asynchronous version. +*/ +CV_EXPORTS_W void polarToCart(InputArray magnitude, InputArray angle, OutputArray xy, bool angleInDegrees = false, Stream& stream = Stream::Null()); + +/** @brief Converts polar coordinates into Cartesian. + +@param magnitudeAngle Source matrix containing magnitudes and angles ( CV_32FC2 or CV_64FC2 ). +@param xy Destination matrix of real and imaginary components ( same depth as source ). +@param angleInDegrees Flag that indicates angles in degrees. +@param stream Stream for the asynchronous version. +*/ +CV_EXPORTS_W void polarToCart(InputArray magnitudeAngle, OutputArray xy, bool angleInDegrees = false, Stream& stream = Stream::Null()); + //! @} cudaarithm_elem //! @addtogroup cudaarithm_core diff --git a/modules/cudaarithm/src/cuda/polar_cart.cu b/modules/cudaarithm/src/cuda/polar_cart.cu index 2fb1315e619..879aed0e4a1 100644 --- a/modules/cudaarithm/src/cuda/polar_cart.cu +++ b/modules/cudaarithm/src/cuda/polar_cart.cu @@ -116,6 +116,25 @@ void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleI syncOutput(dst, _dst, stream); } +void cv::cuda::phase(InputArray _xy, OutputArray _dst, bool angleInDegrees, Stream& stream) +{ + GpuMat xy = getInputMat(_xy, stream); + + CV_Assert( xy.type() == CV_32FC2 ); + + GpuMat dst = getOutputMat(_dst, xy.size(), CV_32FC1, stream); + + GpuMat_ xyc(xy.reshape(2)); + GpuMat_ anglec(dst.reshape(1)); + + if (angleInDegrees) + gridTransformUnary(xyc, anglec, direction_interleaved_func(), stream); + else + gridTransformUnary(xyc, anglec, direction_interleaved_func(), stream); + + syncOutput(dst, _dst, stream); +} + void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, OutputArray _angle, bool angleInDegrees, Stream& stream) { GpuMat x = getInputMat(_x, stream); @@ -155,6 +174,71 @@ void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, Outpu syncOutput(angle, _angle, stream); } +void cv::cuda::cartToPolar(InputArray _xy, OutputArray _mag, OutputArray _angle, bool angleInDegrees, Stream& stream) +{ + GpuMat xy = getInputMat(_xy, stream); + + CV_Assert( xy.type() == CV_32FC2 ); + + GpuMat mag = getOutputMat(_mag, xy.size(), CV_32FC1, stream); + GpuMat angle = getOutputMat(_angle, xy.size(), CV_32FC1, stream); + + GpuMat_ xyc(xy.reshape(2)); + GpuMat_ magc(mag.reshape(1)); + GpuMat_ anglec(angle.reshape(1)); + + if (angleInDegrees) + { + gridTransformTuple(xyc, + tie(magc, anglec), + make_tuple( + magnitude_interleaved_func(), + direction_interleaved_func()), + stream); + } + else + { + gridTransformTuple(xyc, + tie(magc, anglec), + make_tuple( + magnitude_interleaved_func(), + direction_interleaved_func()), + stream); + } + + syncOutput(mag, _mag, stream); + syncOutput(angle, _angle, stream); +} + +void cv::cuda::cartToPolar(InputArray _xy, OutputArray _magAngle, bool angleInDegrees, Stream& stream) +{ + GpuMat xy = getInputMat(_xy, stream); + + CV_Assert( xy.type() == CV_32FC2 ); + + GpuMat magAngle = getOutputMat(_magAngle, xy.size(), CV_32FC2, stream); + + GpuMat_ xyc(xy.reshape(2)); + GpuMat_ magAnglec(magAngle.reshape(2)); + + if (angleInDegrees) + { + gridTransformUnary(xyc, + magAnglec, + magnitude_direction_interleaved_func(), + stream); + } + else + { + gridTransformUnary(xyc, + magAnglec, + magnitude_direction_interleaved_func(), + stream); + } + + syncOutput(magAngle, _magAngle, stream); +} + namespace { template struct sincos_op @@ -192,6 +276,49 @@ namespace ymat(y, x) = mag_val * sin_a; } + template + __global__ void polarToCartDstInterleavedImpl_(const GlobPtr mag, const GlobPtr angle, GlobPtr::type > xymat, const T scale, const int rows, const int cols) + { + typedef typename MakeVec::type T2; + const int x = blockDim.x * blockIdx.x + threadIdx.x; + const int y = blockDim.y * blockIdx.y + threadIdx.y; + + if (x >= cols || y >= rows) + return; + + const T mag_val = useMag ? mag(y, x) : static_cast(1.0); + const T angle_val = angle(y, x); + + T sin_a, cos_a; + sincos_op op; + op(scale * angle_val, &sin_a, &cos_a); + + const T2 xy = {mag_val * cos_a, mag_val * sin_a}; + xymat(y, x) = xy; + } + + template + __global__ void polarToCartInterleavedImpl_(const GlobPtr::type > magAngle, GlobPtr::type > xymat, const T scale, const int rows, const int cols) + { + typedef typename MakeVec::type T2; + const int x = blockDim.x * blockIdx.x + threadIdx.x; + const int y = blockDim.y * blockIdx.y + threadIdx.y; + + if (x >= cols || y >= rows) + return; + + const T2 magAngle_val = magAngle(y, x); + const T mag_val = useMag ? magAngle_val.x : static_cast(1.0); + const T angle_val = magAngle_val.y; + + T sin_a, cos_a; + sincos_op op; + op(scale * angle_val, &sin_a, &cos_a); + + const T2 xy = {mag_val * cos_a, mag_val * sin_a}; + xymat(y, x) = xy; + } + template void polarToCartImpl(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t& stream) { @@ -210,6 +337,43 @@ namespace else polarToCartImpl_ << > >(shrinkPtr(magc), shrinkPtr(anglec), shrinkPtr(xc), shrinkPtr(yc), scale, anglec.rows, anglec.cols); } + + template + void polarToCartDstInterleavedImpl(const GpuMat& mag, const GpuMat& angle, GpuMat& xy, bool angleInDegrees, cudaStream_t& stream) + { + typedef typename MakeVec::type T2; + GpuMat_ xyc(xy.reshape(2)); + GpuMat_ magc(mag.reshape(1)); + GpuMat_ anglec(angle.reshape(1)); + + const dim3 block(32, 8); + const dim3 grid(divUp(anglec.cols, block.x), divUp(anglec.rows, block.y)); + + const T scale = angleInDegrees ? static_cast(CV_PI / 180.0) : static_cast(1.0); + + if (magc.empty()) + polarToCartDstInterleavedImpl_ << > >(shrinkPtr(magc), shrinkPtr(anglec), shrinkPtr(xyc), scale, anglec.rows, anglec.cols); + else + polarToCartDstInterleavedImpl_ << > >(shrinkPtr(magc), shrinkPtr(anglec), shrinkPtr(xyc), scale, anglec.rows, anglec.cols); + } + + template + void polarToCartInterleavedImpl(const GpuMat& magAngle, GpuMat& xy, bool angleInDegrees, cudaStream_t& stream) + { + typedef typename MakeVec::type T2; + GpuMat_ xyc(xy.reshape(2)); + GpuMat_ magAnglec(magAngle.reshape(2)); + + const dim3 block(32, 8); + const dim3 grid(divUp(magAnglec.cols, block.x), divUp(magAnglec.rows, block.y)); + + const T scale = angleInDegrees ? static_cast(CV_PI / 180.0) : static_cast(1.0); + + if (magAnglec.empty()) + polarToCartInterleavedImpl_ << > >(shrinkPtr(magAnglec), shrinkPtr(xyc), scale, magAnglec.rows, magAnglec.cols); + else + polarToCartInterleavedImpl_ << > >(shrinkPtr(magAnglec), shrinkPtr(xyc), scale, magAnglec.rows, magAnglec.cols); + } } void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, OutputArray _y, bool angleInDegrees, Stream& _stream) @@ -237,4 +401,48 @@ void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, O CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); } +void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _xy, bool angleInDegrees, Stream& _stream) +{ + typedef void(*func_t)(const GpuMat& mag, const GpuMat& angle, GpuMat& xy, bool angleInDegrees, cudaStream_t& stream); + static const func_t funcs[7] = { 0, 0, 0, 0, 0, polarToCartDstInterleavedImpl, polarToCartDstInterleavedImpl }; + + GpuMat mag = getInputMat(_mag, _stream); + GpuMat angle = getInputMat(_angle, _stream); + + CV_Assert(angle.depth() == CV_32F || angle.depth() == CV_64F); + CV_Assert( mag.empty() || (mag.type() == angle.type() && mag.size() == angle.size()) ); + + GpuMat xy = getOutputMat(_xy, angle.size(), CV_MAKETYPE(angle.depth(), 2), _stream); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + funcs[angle.depth()](mag, angle, xy, angleInDegrees, stream); + CV_CUDEV_SAFE_CALL( cudaGetLastError() ); + + syncOutput(xy, _xy, _stream); + + if (stream == 0) + CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); +} + +void cv::cuda::polarToCart(InputArray _magAngle, OutputArray _xy, bool angleInDegrees, Stream& _stream) +{ + typedef void(*func_t)(const GpuMat& magAngle, GpuMat& xy, bool angleInDegrees, cudaStream_t& stream); + static const func_t funcs[7] = { 0, 0, 0, 0, 0, polarToCartInterleavedImpl, polarToCartInterleavedImpl }; + + GpuMat magAngle = getInputMat(_magAngle, _stream); + + CV_Assert(magAngle.type() == CV_32FC2 || magAngle.type() == CV_64FC2); + + GpuMat xy = getOutputMat(_xy, magAngle.size(), magAngle.type(), _stream); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + funcs[magAngle.depth()](magAngle, xy, angleInDegrees, stream); + CV_CUDEV_SAFE_CALL( cudaGetLastError() ); + + syncOutput(xy, _xy, _stream); + + if (stream == 0) + CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); +} + #endif diff --git a/modules/cudaarithm/test/test_element_operations.cpp b/modules/cudaarithm/test/test_element_operations.cpp index d2e314b10d9..f02a7a30cbf 100644 --- a/modules/cudaarithm/test/test_element_operations.cpp +++ b/modules/cudaarithm/test/test_element_operations.cpp @@ -2765,6 +2765,48 @@ INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Phase, testing::Combine( testing::Values(AngleInDegrees(false), AngleInDegrees(true)), WHOLE_SUBMAT)); +PARAM_TEST_CASE(PhaseInterleaved, cv::cuda::DeviceInfo, cv::Size, AngleInDegrees, UseRoi) +{ + cv::cuda::DeviceInfo devInfo; + cv::Size size; + bool angleInDegrees; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + angleInDegrees = GET_PARAM(2); + useRoi = GET_PARAM(3); + + cv::cuda::setDevice(devInfo.deviceID()); + } +}; + +CUDA_TEST_P(PhaseInterleaved, Accuracy) +{ + cv::Mat xyChannels[2]; + cv::Mat& x = xyChannels[0]; + cv::Mat& y = xyChannels[1]; + x = randomMat(size, CV_32FC1); + y = randomMat(size, CV_32FC1); + cv::Mat xy; + cv::merge(xyChannels, 2, xy); + + cv::cuda::GpuMat dstX1Y1 = createMat(size, CV_32FC1, useRoi); + cv::cuda::GpuMat dstXY2 = createMat(size, CV_32FC1, useRoi); + cv::cuda::phase(loadMat(x, useRoi), loadMat(y, useRoi), dstX1Y1, angleInDegrees); + cv::cuda::phase(loadMat(xy, useRoi), dstXY2, angleInDegrees); + + EXPECT_MAT_NEAR(dstX1Y1, dstXY2, angleInDegrees ? 1e-2 : 1e-3); +} + +INSTANTIATE_TEST_CASE_P(CUDA_Arithm, PhaseInterleaved, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(AngleInDegrees(false), AngleInDegrees(true)), + WHOLE_SUBMAT)); + //////////////////////////////////////////////////////////////////////////////// // CartToPolar @@ -2809,6 +2851,60 @@ INSTANTIATE_TEST_CASE_P(CUDA_Arithm, CartToPolar, testing::Combine( testing::Values(AngleInDegrees(false), AngleInDegrees(true)), WHOLE_SUBMAT)); +PARAM_TEST_CASE(CartToPolarInterleaved, cv::cuda::DeviceInfo, cv::Size, AngleInDegrees, UseRoi) +{ + cv::cuda::DeviceInfo devInfo; + cv::Size size; + bool angleInDegrees; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + angleInDegrees = GET_PARAM(2); + useRoi = GET_PARAM(3); + + cv::cuda::setDevice(devInfo.deviceID()); + } +}; + +CUDA_TEST_P(CartToPolarInterleaved, Accuracy) +{ + cv::Mat xyChannels[2]; + cv::Mat& x = xyChannels[0]; + cv::Mat& y = xyChannels[1]; + x = randomMat(size, CV_32FC1); + y = randomMat(size, CV_32FC1); + cv::Mat xy; + cv::merge(xyChannels, 2, xy); + + cv::cuda::GpuMat mag1 = createMat(size, CV_32FC1, useRoi); + cv::cuda::GpuMat angle1 = createMat(size, CV_32FC1, useRoi); + cv::cuda::cartToPolar(loadMat(x, useRoi), loadMat(y, useRoi), mag1, angle1, angleInDegrees); + + cv::cuda::GpuMat mag2 = createMat(size, CV_32FC1, useRoi); + cv::cuda::GpuMat angle2 = createMat(size, CV_32FC1, useRoi); + cv::cuda::cartToPolar(loadMat(xy, useRoi), mag2, angle2, angleInDegrees); + + cv::cuda::GpuMat magAngle = createMat(size, CV_32FC2, useRoi); + cv::cuda::cartToPolar(loadMat(xy, useRoi), magAngle, angleInDegrees); + cv::cuda::GpuMat magAngleChannels[2]; + cv::cuda::split(magAngle, magAngleChannels); + + EXPECT_MAT_NEAR(mag1, mag2, 1e-4); + EXPECT_MAT_NEAR(angle1, angle2, angleInDegrees ? 1e-2 : 1e-3); + EXPECT_MAT_NEAR(angle1, angle2, angleInDegrees ? 1e-2 : 1e-3); + EXPECT_MAT_NEAR(mag1, magAngleChannels[0], 1e-4); + EXPECT_MAT_NEAR(angle1, magAngleChannels[1], angleInDegrees ? 1e-2 : 1e-3); +} + +INSTANTIATE_TEST_CASE_P(CUDA_Arithm, CartToPolarInterleaved, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(AngleInDegrees(false), AngleInDegrees(true)), + WHOLE_SUBMAT)); + //////////////////////////////////////////////////////////////////////////////// // polarToCart @@ -2857,5 +2953,58 @@ INSTANTIATE_TEST_CASE_P(CUDA_Arithm, PolarToCart, testing::Combine( testing::Values(AngleInDegrees(false), AngleInDegrees(true)), WHOLE_SUBMAT)); +PARAM_TEST_CASE(PolarToCartInterleaved, cv::cuda::DeviceInfo, cv::Size, MatType, AngleInDegrees, UseRoi) +{ + cv::cuda::DeviceInfo devInfo; + cv::Size size; + int type; + bool angleInDegrees; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + type = GET_PARAM(2); + angleInDegrees = GET_PARAM(3); + useRoi = GET_PARAM(4); + + cv::cuda::setDevice(devInfo.deviceID()); + } +}; + +CUDA_TEST_P(PolarToCartInterleaved, Accuracy) +{ + cv::Mat magnitudeAngleChannels[2]; + cv::Mat& magnitude = magnitudeAngleChannels[0]; + cv::Mat& angle = magnitudeAngleChannels[1]; + magnitude = randomMat(size, type); + angle = randomMat(size, type); + cv::Mat magnitudeAngle; + cv::merge(magnitudeAngleChannels, 2, magnitudeAngle); + const double tol = (type == CV_32FC1 ? 1.6e-4 : 1e-4) * (angleInDegrees ? 1.0 : 19.47); + + cv::cuda::GpuMat x = createMat(size, type, useRoi); + cv::cuda::GpuMat y = createMat(size, type, useRoi); + cv::cuda::GpuMat xy = createMat(size, CV_MAKETYPE(CV_MAT_DEPTH(type), 2), useRoi); + cv::cuda::GpuMat xy2 = createMat(size, CV_MAKETYPE(CV_MAT_DEPTH(type), 2), useRoi); + cv::cuda::polarToCart(loadMat(magnitude, useRoi), loadMat(angle, useRoi), x, y, angleInDegrees); + cv::cuda::polarToCart(loadMat(magnitude, useRoi), loadMat(angle, useRoi), xy2, angleInDegrees); + cv::cuda::polarToCart(loadMat(magnitudeAngle, useRoi), xy, angleInDegrees); + cv::cuda::GpuMat xyChannels[2]; + cv::cuda::split(xy, xyChannels); + + EXPECT_MAT_NEAR(x, xyChannels[0], tol); + EXPECT_MAT_NEAR(y, xyChannels[1], tol); + EXPECT_MAT_NEAR(xy.reshape(1), xy2.reshape(1), tol); +} + +INSTANTIATE_TEST_CASE_P(CUDA_Arithm, PolarToCartInterleaved, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(CV_32FC1, CV_64FC1), + testing::Values(AngleInDegrees(false), AngleInDegrees(true)), + WHOLE_SUBMAT)); + }} // namespace #endif // HAVE_CUDA diff --git a/modules/cudev/include/opencv2/cudev/functional/functional.hpp b/modules/cudev/include/opencv2/cudev/functional/functional.hpp index e4165358c28..3bef053ad71 100644 --- a/modules/cudev/include/opencv2/cudev/functional/functional.hpp +++ b/modules/cudev/include/opencv2/cudev/functional/functional.hpp @@ -619,6 +619,16 @@ template struct magnitude_func : binary_function struct magnitude_interleaved_func : unary_function::elem_type> +{ + typedef VecTraits::elem_type T; + __device__ __forceinline__ typename T operator ()(typename TypeTraits::parameter_type ab) const + { + sqrt_func::type> f; + return f(ab.x * ab.x + ab.y * ab.y); + } +}; + template struct magnitude_sqr_func : binary_function::type> { __device__ __forceinline__ typename functional_detail::FloatType::type operator ()(typename TypeTraits::parameter_type a, typename TypeTraits::parameter_type b) const @@ -627,6 +637,15 @@ template struct magnitude_sqr_func : binary_function struct magnitude_sqr_interleaved_func : unary_function::elem_type> +{ + typedef VecTraits::elem_type T; + __device__ __forceinline__ typename T operator ()(typename TypeTraits::parameter_type ab) const + { + return ab.x * ab.x + ab.y * ab.y; + } +}; + template struct direction_func : binary_function { __device__ T operator ()(T x, T y) const @@ -643,6 +662,35 @@ template struct direction_func : binary_functi } }; +template struct direction_interleaved_func : unary_function::elem_type> +{ + typedef typename VecTraits::elem_type T; + __device__ T operator ()(T2 xy) const + { + atan2_func f; + typename atan2_func::result_type angle = f(xy.y, xy.x); + + angle += (angle < 0) * (2.0f * CV_PI_F); + + if (angleInDegrees) + angle *= (180.0f / CV_PI_F); + + return saturate_cast(angle); + } +}; + +template struct magnitude_direction_interleaved_func : unary_function::elem_type> +{ + typedef typename VecTraits::elem_type T; + __device__ T2 operator ()(T2 xy) const + { + const T mag = magnitude_interleaved_func()(xy); + const T angle = direction_interleaved_func()(xy); + const T2 magAngle = {saturate_cast(mag), saturate_cast(angle)}; + return magAngle; + } +}; + template struct pow_func : binary_function { __device__ __forceinline__ float operator ()(T val, float power) const From b330b6c5a00cbddabb463356f18e66ad3875bf3b Mon Sep 17 00:00:00 2001 From: chacha21 Date: Tue, 26 Dec 2023 10:11:43 +0100 Subject: [PATCH 2/6] fixed compilation additional "typename" disambiguifiers are required by some compilers --- modules/cudev/include/opencv2/cudev/functional/functional.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/cudev/include/opencv2/cudev/functional/functional.hpp b/modules/cudev/include/opencv2/cudev/functional/functional.hpp index 3bef053ad71..9dd42c48c15 100644 --- a/modules/cudev/include/opencv2/cudev/functional/functional.hpp +++ b/modules/cudev/include/opencv2/cudev/functional/functional.hpp @@ -621,7 +621,7 @@ template struct magnitude_func : binary_function struct magnitude_interleaved_func : unary_function::elem_type> { - typedef VecTraits::elem_type T; + typedef typename VecTraits::elem_type T; __device__ __forceinline__ typename T operator ()(typename TypeTraits::parameter_type ab) const { sqrt_func::type> f; @@ -639,7 +639,7 @@ template struct magnitude_sqr_func : binary_function struct magnitude_sqr_interleaved_func : unary_function::elem_type> { - typedef VecTraits::elem_type T; + typedef typename VecTraits::elem_type T; __device__ __forceinline__ typename T operator ()(typename TypeTraits::parameter_type ab) const { return ab.x * ab.x + ab.y * ab.y; From 7e1435b3b0bf8df3477819261aee37a506b657f4 Mon Sep 17 00:00:00 2001 From: chacha21 Date: Mon, 8 Jan 2024 13:31:25 +0100 Subject: [PATCH 3/6] simplifications as suggested use globPtr() and PtrStepSz<> to bypass confusing reshape() refactor tests --- modules/cudaarithm/src/cuda/polar_cart.cu | 85 +++----- .../test/test_element_operations.cpp | 186 +++++++++++++----- 2 files changed, 162 insertions(+), 109 deletions(-) diff --git a/modules/cudaarithm/src/cuda/polar_cart.cu b/modules/cudaarithm/src/cuda/polar_cart.cu index 879aed0e4a1..cab3ff5df74 100644 --- a/modules/cudaarithm/src/cuda/polar_cart.cu +++ b/modules/cudaarithm/src/cuda/polar_cart.cu @@ -66,11 +66,7 @@ void cv::cuda::magnitude(InputArray _x, InputArray _y, OutputArray _dst, Stream& GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream); - GpuMat_ xc(x.reshape(1)); - GpuMat_ yc(y.reshape(1)); - GpuMat_ magc(dst.reshape(1)); - - gridTransformBinary(xc, yc, magc, magnitude_func(), stream); + gridTransformBinary(globPtr(x), globPtr(y), globPtr(dst), magnitude_func(), stream); syncOutput(dst, _dst, stream); } @@ -85,11 +81,7 @@ void cv::cuda::magnitudeSqr(InputArray _x, InputArray _y, OutputArray _dst, Stre GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream); - GpuMat_ xc(x.reshape(1)); - GpuMat_ yc(y.reshape(1)); - GpuMat_ magc(dst.reshape(1)); - - gridTransformBinary(xc, yc, magc, magnitude_sqr_func(), stream); + gridTransformBinary(globPtr(x), globPtr(y), globPtr(dst), magnitude_sqr_func(), stream); syncOutput(dst, _dst, stream); } @@ -104,14 +96,10 @@ void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleI GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream); - GpuMat_ xc(x.reshape(1)); - GpuMat_ yc(y.reshape(1)); - GpuMat_ anglec(dst.reshape(1)); - if (angleInDegrees) - gridTransformBinary(xc, yc, anglec, direction_func(), stream); + gridTransformBinary(globPtr(x), globPtr(y), globPtr(dst), direction_func(), stream); else - gridTransformBinary(xc, yc, anglec, direction_func(), stream); + gridTransformBinary(globPtr(x), globPtr(y), globPtr(dst), direction_func(), stream); syncOutput(dst, _dst, stream); } @@ -124,13 +112,10 @@ void cv::cuda::phase(InputArray _xy, OutputArray _dst, bool angleInDegrees, Stre GpuMat dst = getOutputMat(_dst, xy.size(), CV_32FC1, stream); - GpuMat_ xyc(xy.reshape(2)); - GpuMat_ anglec(dst.reshape(1)); - if (angleInDegrees) - gridTransformUnary(xyc, anglec, direction_interleaved_func(), stream); + gridTransformUnary(globPtr(xy), globPtr(dst), direction_interleaved_func(), stream); else - gridTransformUnary(xyc, anglec, direction_interleaved_func(), stream); + gridTransformUnary(globPtr(xy), globPtr(dst), direction_interleaved_func(), stream); syncOutput(dst, _dst, stream); } @@ -183,13 +168,12 @@ void cv::cuda::cartToPolar(InputArray _xy, OutputArray _mag, OutputArray _angle, GpuMat mag = getOutputMat(_mag, xy.size(), CV_32FC1, stream); GpuMat angle = getOutputMat(_angle, xy.size(), CV_32FC1, stream); - GpuMat_ xyc(xy.reshape(2)); GpuMat_ magc(mag.reshape(1)); GpuMat_ anglec(angle.reshape(1)); if (angleInDegrees) { - gridTransformTuple(xyc, + gridTransformTuple(globPtr(xy), tie(magc, anglec), make_tuple( magnitude_interleaved_func(), @@ -198,7 +182,7 @@ void cv::cuda::cartToPolar(InputArray _xy, OutputArray _mag, OutputArray _angle, } else { - gridTransformTuple(xyc, + gridTransformTuple(globPtr(xy), tie(magc, anglec), make_tuple( magnitude_interleaved_func(), @@ -217,21 +201,18 @@ void cv::cuda::cartToPolar(InputArray _xy, OutputArray _magAngle, bool angleInDe CV_Assert( xy.type() == CV_32FC2 ); GpuMat magAngle = getOutputMat(_magAngle, xy.size(), CV_32FC2, stream); - - GpuMat_ xyc(xy.reshape(2)); - GpuMat_ magAnglec(magAngle.reshape(2)); - + if (angleInDegrees) { - gridTransformUnary(xyc, - magAnglec, + gridTransformUnary(globPtr(xy), + globPtr(magAngle), magnitude_direction_interleaved_func(), stream); } else { - gridTransformUnary(xyc, - magAnglec, + gridTransformUnary(globPtr(xy), + globPtr(magAngle), magnitude_direction_interleaved_func(), stream); } @@ -257,7 +238,7 @@ namespace }; template - __global__ void polarToCartImpl_(const GlobPtr mag, const GlobPtr angle, GlobPtr xmat, GlobPtr ymat, const T scale, const int rows, const int cols) + __global__ void polarToCartImpl_(const PtrStepSz mag, const PtrStepSz angle, PtrStepSz xmat, PtrStepSz ymat, const T scale, const int rows, const int cols) { const int x = blockDim.x * blockIdx.x + threadIdx.x; const int y = blockDim.y * blockIdx.y + threadIdx.y; @@ -277,7 +258,7 @@ namespace } template - __global__ void polarToCartDstInterleavedImpl_(const GlobPtr mag, const GlobPtr angle, GlobPtr::type > xymat, const T scale, const int rows, const int cols) + __global__ void polarToCartDstInterleavedImpl_(const PtrStepSz mag, const PtrStepSz angle, PtrStepSz::type > xymat, const T scale, const int rows, const int cols) { typedef typename MakeVec::type T2; const int x = blockDim.x * blockIdx.x + threadIdx.x; @@ -298,7 +279,7 @@ namespace } template - __global__ void polarToCartInterleavedImpl_(const GlobPtr::type > magAngle, GlobPtr::type > xymat, const T scale, const int rows, const int cols) + __global__ void polarToCartInterleavedImpl_(const PtrStepSz::type > magAngle, PtrStepSz::type > xymat, const T scale, const int rows, const int cols) { typedef typename MakeVec::type T2; const int x = blockDim.x * blockIdx.x + threadIdx.x; @@ -322,57 +303,47 @@ namespace template void polarToCartImpl(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t& stream) { - GpuMat_ xc(x.reshape(1)); - GpuMat_ yc(y.reshape(1)); - GpuMat_ magc(mag.reshape(1)); - GpuMat_ anglec(angle.reshape(1)); - const dim3 block(32, 8); - const dim3 grid(divUp(anglec.cols, block.x), divUp(anglec.rows, block.y)); + const dim3 grid(divUp(angle.cols, block.x), divUp(angle.rows, block.y)); const T scale = angleInDegrees ? static_cast(CV_PI / 180.0) : static_cast(1.0); - if (magc.empty()) - polarToCartImpl_ << > >(shrinkPtr(magc), shrinkPtr(anglec), shrinkPtr(xc), shrinkPtr(yc), scale, anglec.rows, anglec.cols); + if (mag.empty()) + polarToCartImpl_ << > >(mag, angle, x, y, scale, angle.rows, angle.cols); else - polarToCartImpl_ << > >(shrinkPtr(magc), shrinkPtr(anglec), shrinkPtr(xc), shrinkPtr(yc), scale, anglec.rows, anglec.cols); + polarToCartImpl_ << > >(mag, angle, x, y, scale, angle.rows, angle.cols); } template void polarToCartDstInterleavedImpl(const GpuMat& mag, const GpuMat& angle, GpuMat& xy, bool angleInDegrees, cudaStream_t& stream) { typedef typename MakeVec::type T2; - GpuMat_ xyc(xy.reshape(2)); - GpuMat_ magc(mag.reshape(1)); - GpuMat_ anglec(angle.reshape(1)); const dim3 block(32, 8); - const dim3 grid(divUp(anglec.cols, block.x), divUp(anglec.rows, block.y)); + const dim3 grid(divUp(angle.cols, block.x), divUp(angle.rows, block.y)); const T scale = angleInDegrees ? static_cast(CV_PI / 180.0) : static_cast(1.0); - if (magc.empty()) - polarToCartDstInterleavedImpl_ << > >(shrinkPtr(magc), shrinkPtr(anglec), shrinkPtr(xyc), scale, anglec.rows, anglec.cols); + if (mag.empty()) + polarToCartDstInterleavedImpl_ << > >(mag, angle, xy, scale, angle.rows, angle.cols); else - polarToCartDstInterleavedImpl_ << > >(shrinkPtr(magc), shrinkPtr(anglec), shrinkPtr(xyc), scale, anglec.rows, anglec.cols); + polarToCartDstInterleavedImpl_ << > >(mag, angle, xy, scale, angle.rows, angle.cols); } template void polarToCartInterleavedImpl(const GpuMat& magAngle, GpuMat& xy, bool angleInDegrees, cudaStream_t& stream) { typedef typename MakeVec::type T2; - GpuMat_ xyc(xy.reshape(2)); - GpuMat_ magAnglec(magAngle.reshape(2)); const dim3 block(32, 8); - const dim3 grid(divUp(magAnglec.cols, block.x), divUp(magAnglec.rows, block.y)); + const dim3 grid(divUp(magAngle.cols, block.x), divUp(magAngle.rows, block.y)); const T scale = angleInDegrees ? static_cast(CV_PI / 180.0) : static_cast(1.0); - if (magAnglec.empty()) - polarToCartInterleavedImpl_ << > >(shrinkPtr(magAnglec), shrinkPtr(xyc), scale, magAnglec.rows, magAnglec.cols); + if (magAngle.empty()) + polarToCartInterleavedImpl_ << > >(magAngle, xy, scale, magAngle.rows, magAngle.cols); else - polarToCartInterleavedImpl_ << > >(shrinkPtr(magAnglec), shrinkPtr(xyc), scale, magAnglec.rows, magAnglec.cols); + polarToCartInterleavedImpl_ << > >(magAngle, xy, scale, magAngle.rows, magAngle.cols); } } diff --git a/modules/cudaarithm/test/test_element_operations.cpp b/modules/cudaarithm/test/test_element_operations.cpp index f02a7a30cbf..6c2166bc845 100644 --- a/modules/cudaarithm/test/test_element_operations.cpp +++ b/modules/cudaarithm/test/test_element_operations.cpp @@ -2785,20 +2785,19 @@ PARAM_TEST_CASE(PhaseInterleaved, cv::cuda::DeviceInfo, cv::Size, AngleInDegrees CUDA_TEST_P(PhaseInterleaved, Accuracy) { - cv::Mat xyChannels[2]; - cv::Mat& x = xyChannels[0]; - cv::Mat& y = xyChannels[1]; - x = randomMat(size, CV_32FC1); - y = randomMat(size, CV_32FC1); + cv::Mat x = randomMat(size, CV_32FC1); + cv::Mat y = randomMat(size, CV_32FC1); cv::Mat xy; - cv::merge(xyChannels, 2, xy); + std::vector xyChannels = {x, y}; + cv::merge(xyChannels, xy); - cv::cuda::GpuMat dstX1Y1 = createMat(size, CV_32FC1, useRoi); - cv::cuda::GpuMat dstXY2 = createMat(size, CV_32FC1, useRoi); - cv::cuda::phase(loadMat(x, useRoi), loadMat(y, useRoi), dstX1Y1, angleInDegrees); - cv::cuda::phase(loadMat(xy, useRoi), dstXY2, angleInDegrees); + cv::cuda::GpuMat dst = createMat(size, CV_32FC1, useRoi); + cv::cuda::phase(loadMat(xy, useRoi), dst, angleInDegrees); - EXPECT_MAT_NEAR(dstX1Y1, dstXY2, angleInDegrees ? 1e-2 : 1e-3); + cv::Mat dst_gold; + cv::phase(x, y, dst_gold, angleInDegrees); + + EXPECT_MAT_NEAR(dst_gold, dst, angleInDegrees ? 1e-2 : 1e-3); } INSTANTIATE_TEST_CASE_P(CUDA_Arithm, PhaseInterleaved, testing::Combine( @@ -2851,7 +2850,7 @@ INSTANTIATE_TEST_CASE_P(CUDA_Arithm, CartToPolar, testing::Combine( testing::Values(AngleInDegrees(false), AngleInDegrees(true)), WHOLE_SUBMAT)); -PARAM_TEST_CASE(CartToPolarInterleaved, cv::cuda::DeviceInfo, cv::Size, AngleInDegrees, UseRoi) +PARAM_TEST_CASE(CartToPolarInterleaved1, cv::cuda::DeviceInfo, cv::Size, AngleInDegrees, UseRoi) { cv::cuda::DeviceInfo devInfo; cv::Size size; @@ -2869,37 +2868,74 @@ PARAM_TEST_CASE(CartToPolarInterleaved, cv::cuda::DeviceInfo, cv::Size, AngleInD } }; -CUDA_TEST_P(CartToPolarInterleaved, Accuracy) +CUDA_TEST_P(CartToPolarInterleaved1, Accuracy) { - cv::Mat xyChannels[2]; - cv::Mat& x = xyChannels[0]; - cv::Mat& y = xyChannels[1]; - x = randomMat(size, CV_32FC1); - y = randomMat(size, CV_32FC1); + cv::Mat x = randomMat(size, CV_32FC1); + cv::Mat y = randomMat(size, CV_32FC1); cv::Mat xy; - cv::merge(xyChannels, 2, xy); + std::vector xyChannels = {x, y}; + cv::merge(xyChannels, xy); + + cv::cuda::GpuMat mag = createMat(size, CV_32FC1, useRoi); + cv::cuda::GpuMat angle = createMat(size, CV_32FC1, useRoi); + cv::cuda::cartToPolar(loadMat(xy, useRoi), mag, angle, angleInDegrees); + + cv::Mat mag_gold; + cv::Mat angle_gold; + cv::cartToPolar(x, y, mag_gold, angle_gold, angleInDegrees); + + EXPECT_MAT_NEAR(mag_gold, mag, 1e-4); + EXPECT_MAT_NEAR(angle_gold, angle, angleInDegrees ? 1e-2 : 1e-3); +} + +INSTANTIATE_TEST_CASE_P(CUDA_Arithm, CartToPolarInterleaved1, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(AngleInDegrees(false), AngleInDegrees(true)), + WHOLE_SUBMAT)); + +PARAM_TEST_CASE(CartToPolarInterleaved2, cv::cuda::DeviceInfo, cv::Size, AngleInDegrees, UseRoi) +{ + cv::cuda::DeviceInfo devInfo; + cv::Size size; + bool angleInDegrees; + bool useRoi; - cv::cuda::GpuMat mag1 = createMat(size, CV_32FC1, useRoi); - cv::cuda::GpuMat angle1 = createMat(size, CV_32FC1, useRoi); - cv::cuda::cartToPolar(loadMat(x, useRoi), loadMat(y, useRoi), mag1, angle1, angleInDegrees); + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + angleInDegrees = GET_PARAM(2); + useRoi = GET_PARAM(3); + + cv::cuda::setDevice(devInfo.deviceID()); + } +}; - cv::cuda::GpuMat mag2 = createMat(size, CV_32FC1, useRoi); - cv::cuda::GpuMat angle2 = createMat(size, CV_32FC1, useRoi); - cv::cuda::cartToPolar(loadMat(xy, useRoi), mag2, angle2, angleInDegrees); +CUDA_TEST_P(CartToPolarInterleaved2, Accuracy) +{ + cv::Mat x = randomMat(size, CV_32FC1); + cv::Mat y = randomMat(size, CV_32FC1); + cv::Mat xy; + std::vector xyChannels = {x, y}; + cv::merge(xyChannels, xy); cv::cuda::GpuMat magAngle = createMat(size, CV_32FC2, useRoi); cv::cuda::cartToPolar(loadMat(xy, useRoi), magAngle, angleInDegrees); - cv::cuda::GpuMat magAngleChannels[2]; + std::vector magAngleChannels; cv::cuda::split(magAngle, magAngleChannels); + cv::cuda::GpuMat& mag = magAngleChannels[0]; + cv::cuda::GpuMat& angle = magAngleChannels[1]; - EXPECT_MAT_NEAR(mag1, mag2, 1e-4); - EXPECT_MAT_NEAR(angle1, angle2, angleInDegrees ? 1e-2 : 1e-3); - EXPECT_MAT_NEAR(angle1, angle2, angleInDegrees ? 1e-2 : 1e-3); - EXPECT_MAT_NEAR(mag1, magAngleChannels[0], 1e-4); - EXPECT_MAT_NEAR(angle1, magAngleChannels[1], angleInDegrees ? 1e-2 : 1e-3); + cv::Mat mag_gold; + cv::Mat angle_gold; + cv::cartToPolar(x, y, mag_gold, angle_gold, angleInDegrees); + + EXPECT_MAT_NEAR(mag_gold, mag, 1e-4); + EXPECT_MAT_NEAR(angle_gold, angle, angleInDegrees ? 1e-2 : 1e-3); } -INSTANTIATE_TEST_CASE_P(CUDA_Arithm, CartToPolarInterleaved, testing::Combine( +INSTANTIATE_TEST_CASE_P(CUDA_Arithm, CartToPolarInterleaved2, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, testing::Values(AngleInDegrees(false), AngleInDegrees(true)), @@ -2953,7 +2989,7 @@ INSTANTIATE_TEST_CASE_P(CUDA_Arithm, PolarToCart, testing::Combine( testing::Values(AngleInDegrees(false), AngleInDegrees(true)), WHOLE_SUBMAT)); -PARAM_TEST_CASE(PolarToCartInterleaved, cv::cuda::DeviceInfo, cv::Size, MatType, AngleInDegrees, UseRoi) +PARAM_TEST_CASE(PolarToCartInterleave1, cv::cuda::DeviceInfo, cv::Size, MatType, AngleInDegrees, UseRoi) { cv::cuda::DeviceInfo devInfo; cv::Size size; @@ -2973,33 +3009,79 @@ PARAM_TEST_CASE(PolarToCartInterleaved, cv::cuda::DeviceInfo, cv::Size, MatType, } }; -CUDA_TEST_P(PolarToCartInterleaved, Accuracy) +CUDA_TEST_P(PolarToCartInterleave1, Accuracy) { - cv::Mat magnitudeAngleChannels[2]; - cv::Mat& magnitude = magnitudeAngleChannels[0]; - cv::Mat& angle = magnitudeAngleChannels[1]; - magnitude = randomMat(size, type); - angle = randomMat(size, type); - cv::Mat magnitudeAngle; - cv::merge(magnitudeAngleChannels, 2, magnitudeAngle); + cv::Mat magnitude = randomMat(size, type); + cv::Mat angle = randomMat(size, type); const double tol = (type == CV_32FC1 ? 1.6e-4 : 1e-4) * (angleInDegrees ? 1.0 : 19.47); - cv::cuda::GpuMat x = createMat(size, type, useRoi); - cv::cuda::GpuMat y = createMat(size, type, useRoi); cv::cuda::GpuMat xy = createMat(size, CV_MAKETYPE(CV_MAT_DEPTH(type), 2), useRoi); - cv::cuda::GpuMat xy2 = createMat(size, CV_MAKETYPE(CV_MAT_DEPTH(type), 2), useRoi); - cv::cuda::polarToCart(loadMat(magnitude, useRoi), loadMat(angle, useRoi), x, y, angleInDegrees); - cv::cuda::polarToCart(loadMat(magnitude, useRoi), loadMat(angle, useRoi), xy2, angleInDegrees); - cv::cuda::polarToCart(loadMat(magnitudeAngle, useRoi), xy, angleInDegrees); - cv::cuda::GpuMat xyChannels[2]; + cv::cuda::polarToCart(loadMat(magnitude, useRoi), loadMat(angle, useRoi), xy, angleInDegrees); + std::vector xyChannels; cv::cuda::split(xy, xyChannels); + cv::cuda::GpuMat& x = xyChannels[0]; + cv::cuda::GpuMat& y = xyChannels[1]; + + cv::Mat x_gold; + cv::Mat y_gold; + cv::polarToCart(magnitude, angle, x_gold, y_gold, angleInDegrees); + + EXPECT_MAT_NEAR(x_gold, x, tol); + EXPECT_MAT_NEAR(y_gold, y, tol); +} + +INSTANTIATE_TEST_CASE_P(CUDA_Arithm, PolarToCartInterleave1, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(CV_32FC1, CV_64FC1), + testing::Values(AngleInDegrees(false), AngleInDegrees(true)), + WHOLE_SUBMAT)); - EXPECT_MAT_NEAR(x, xyChannels[0], tol); - EXPECT_MAT_NEAR(y, xyChannels[1], tol); - EXPECT_MAT_NEAR(xy.reshape(1), xy2.reshape(1), tol); +PARAM_TEST_CASE(PolarToCartInterleave2, cv::cuda::DeviceInfo, cv::Size, MatType, AngleInDegrees, UseRoi) +{ + cv::cuda::DeviceInfo devInfo; + cv::Size size; + int type; + bool angleInDegrees; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + type = GET_PARAM(2); + angleInDegrees = GET_PARAM(3); + useRoi = GET_PARAM(4); + + cv::cuda::setDevice(devInfo.deviceID()); + } +}; + +CUDA_TEST_P(PolarToCartInterleave2, Accuracy) +{ + cv::Mat magnitude = randomMat(size, type); + cv::Mat angle = randomMat(size, type); + std::vector magAngleChannels = {magnitude, angle}; + cv::Mat magAngle; + cv::merge(magAngleChannels, magAngle); + const double tol = (type == CV_32FC1 ? 1.6e-4 : 1e-4) * (angleInDegrees ? 1.0 : 19.47); + + cv::cuda::GpuMat xy = createMat(size, CV_MAKETYPE(CV_MAT_DEPTH(type), 2), useRoi); + cv::cuda::polarToCart(loadMat(magAngle, useRoi), xy, angleInDegrees); + std::vector xyChannels; + cv::cuda::split(xy, xyChannels); + cv::cuda::GpuMat& x = xyChannels[0]; + cv::cuda::GpuMat& y = xyChannels[1]; + + cv::Mat x_gold; + cv::Mat y_gold; + cv::polarToCart(magnitude, angle, x_gold, y_gold, angleInDegrees); + + EXPECT_MAT_NEAR(x_gold, x, tol); + EXPECT_MAT_NEAR(y_gold, y, tol); } -INSTANTIATE_TEST_CASE_P(CUDA_Arithm, PolarToCartInterleaved, testing::Combine( +INSTANTIATE_TEST_CASE_P(CUDA_Arithm, PolarToCartInterleave2, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, testing::Values(CV_32FC1, CV_64FC1), From 0552aedbb71795ece95ab337e5ee24a01bbe9a85 Mon Sep 17 00:00:00 2001 From: chacha21 Date: Mon, 8 Jan 2024 13:41:51 +0100 Subject: [PATCH 4/6] more simplifications as suggested --- modules/cudaarithm/src/cuda/polar_cart.cu | 24 +++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/modules/cudaarithm/src/cuda/polar_cart.cu b/modules/cudaarithm/src/cuda/polar_cart.cu index cab3ff5df74..bfae5276aa3 100644 --- a/modules/cudaarithm/src/cuda/polar_cart.cu +++ b/modules/cudaarithm/src/cuda/polar_cart.cu @@ -238,12 +238,12 @@ namespace }; template - __global__ void polarToCartImpl_(const PtrStepSz mag, const PtrStepSz angle, PtrStepSz xmat, PtrStepSz ymat, const T scale, const int rows, const int cols) + __global__ void polarToCartImpl_(const PtrStepSz mag, const PtrStepSz angle, PtrStepSz xmat, PtrStepSz ymat, const T scale) { const int x = blockDim.x * blockIdx.x + threadIdx.x; const int y = blockDim.y * blockIdx.y + threadIdx.y; - if (x >= cols || y >= rows) + if (x >= mag.cols || y >= mag.rows) return; const T mag_val = useMag ? mag(y, x) : static_cast(1.0); @@ -258,13 +258,13 @@ namespace } template - __global__ void polarToCartDstInterleavedImpl_(const PtrStepSz mag, const PtrStepSz angle, PtrStepSz::type > xymat, const T scale, const int rows, const int cols) + __global__ void polarToCartDstInterleavedImpl_(const PtrStepSz mag, const PtrStepSz angle, PtrStepSz::type > xymat, const T scale) { typedef typename MakeVec::type T2; const int x = blockDim.x * blockIdx.x + threadIdx.x; const int y = blockDim.y * blockIdx.y + threadIdx.y; - if (x >= cols || y >= rows) + if (x >= xymat.cols || y >= xymat.rows) return; const T mag_val = useMag ? mag(y, x) : static_cast(1.0); @@ -279,13 +279,13 @@ namespace } template - __global__ void polarToCartInterleavedImpl_(const PtrStepSz::type > magAngle, PtrStepSz::type > xymat, const T scale, const int rows, const int cols) + __global__ void polarToCartInterleavedImpl_(const PtrStepSz::type > magAngle, PtrStepSz::type > xymat, const T scale) { typedef typename MakeVec::type T2; const int x = blockDim.x * blockIdx.x + threadIdx.x; const int y = blockDim.y * blockIdx.y + threadIdx.y; - if (x >= cols || y >= rows) + if (x >= magAngle.cols || y >= magAngle.rows) return; const T2 magAngle_val = magAngle(y, x); @@ -309,9 +309,9 @@ namespace const T scale = angleInDegrees ? static_cast(CV_PI / 180.0) : static_cast(1.0); if (mag.empty()) - polarToCartImpl_ << > >(mag, angle, x, y, scale, angle.rows, angle.cols); + polarToCartImpl_ << > >(mag, angle, x, y, scale); else - polarToCartImpl_ << > >(mag, angle, x, y, scale, angle.rows, angle.cols); + polarToCartImpl_ << > >(mag, angle, x, y, scale); } template @@ -325,9 +325,9 @@ namespace const T scale = angleInDegrees ? static_cast(CV_PI / 180.0) : static_cast(1.0); if (mag.empty()) - polarToCartDstInterleavedImpl_ << > >(mag, angle, xy, scale, angle.rows, angle.cols); + polarToCartDstInterleavedImpl_ << > >(mag, angle, xy, scale); else - polarToCartDstInterleavedImpl_ << > >(mag, angle, xy, scale, angle.rows, angle.cols); + polarToCartDstInterleavedImpl_ << > >(mag, angle, xy, scale); } template @@ -341,9 +341,9 @@ namespace const T scale = angleInDegrees ? static_cast(CV_PI / 180.0) : static_cast(1.0); if (magAngle.empty()) - polarToCartInterleavedImpl_ << > >(magAngle, xy, scale, magAngle.rows, magAngle.cols); + polarToCartInterleavedImpl_ << > >(magAngle, xy, scale); else - polarToCartInterleavedImpl_ << > >(magAngle, xy, scale, magAngle.rows, magAngle.cols); + polarToCartInterleavedImpl_ << > >(magAngle, xy, scale); } } From 094d5176b54760d5f60459f182d6d65beea097d4 Mon Sep 17 00:00:00 2001 From: chacha21 Date: Tue, 9 Jan 2024 08:35:13 +0100 Subject: [PATCH 5/6] fixed bug the "empty mag" feature is useless for interleaved case get row/col size from angle mat rather than mag mat than could be empty in other cases --- modules/cudaarithm/src/cuda/polar_cart.cu | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/modules/cudaarithm/src/cuda/polar_cart.cu b/modules/cudaarithm/src/cuda/polar_cart.cu index bfae5276aa3..a4ab3c0235f 100644 --- a/modules/cudaarithm/src/cuda/polar_cart.cu +++ b/modules/cudaarithm/src/cuda/polar_cart.cu @@ -243,7 +243,7 @@ namespace const int x = blockDim.x * blockIdx.x + threadIdx.x; const int y = blockDim.y * blockIdx.y + threadIdx.y; - if (x >= mag.cols || y >= mag.rows) + if (x >= angle.cols || y >= angle.rows) return; const T mag_val = useMag ? mag(y, x) : static_cast(1.0); @@ -278,7 +278,7 @@ namespace xymat(y, x) = xy; } - template + template __global__ void polarToCartInterleavedImpl_(const PtrStepSz::type > magAngle, PtrStepSz::type > xymat, const T scale) { typedef typename MakeVec::type T2; @@ -289,7 +289,7 @@ namespace return; const T2 magAngle_val = magAngle(y, x); - const T mag_val = useMag ? magAngle_val.x : static_cast(1.0); + const T mag_val = magAngle_val.x; const T angle_val = magAngle_val.y; T sin_a, cos_a; @@ -340,10 +340,7 @@ namespace const T scale = angleInDegrees ? static_cast(CV_PI / 180.0) : static_cast(1.0); - if (magAngle.empty()) - polarToCartInterleavedImpl_ << > >(magAngle, xy, scale); - else - polarToCartInterleavedImpl_ << > >(magAngle, xy, scale); + polarToCartInterleavedImpl_ << > >(magAngle, xy, scale); } } From 19c772f04843472b44e02cd15e2091f249c54a4a Mon Sep 17 00:00:00 2001 From: chacha21 Date: Tue, 9 Jan 2024 14:36:34 +0100 Subject: [PATCH 6/6] modifications as suggested code style and simplifications --- modules/cudaarithm/src/cuda/polar_cart.cu | 20 ++++++++-------- .../test/test_element_operations.cpp | 24 +++++++++---------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/modules/cudaarithm/src/cuda/polar_cart.cu b/modules/cudaarithm/src/cuda/polar_cart.cu index a4ab3c0235f..8150184c43d 100644 --- a/modules/cudaarithm/src/cuda/polar_cart.cu +++ b/modules/cudaarithm/src/cuda/polar_cart.cu @@ -131,10 +131,10 @@ void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, Outpu GpuMat mag = getOutputMat(_mag, x.size(), CV_32FC1, stream); GpuMat angle = getOutputMat(_angle, x.size(), CV_32FC1, stream); - GpuMat_ xc(x.reshape(1)); - GpuMat_ yc(y.reshape(1)); - GpuMat_ magc(mag.reshape(1)); - GpuMat_ anglec(angle.reshape(1)); + GpuMat_ xc(x); + GpuMat_ yc(y); + GpuMat_ magc(mag); + GpuMat_ anglec(angle); if (angleInDegrees) { @@ -168,8 +168,8 @@ void cv::cuda::cartToPolar(InputArray _xy, OutputArray _mag, OutputArray _angle, GpuMat mag = getOutputMat(_mag, xy.size(), CV_32FC1, stream); GpuMat angle = getOutputMat(_angle, xy.size(), CV_32FC1, stream); - GpuMat_ magc(mag.reshape(1)); - GpuMat_ anglec(angle.reshape(1)); + GpuMat_ magc(mag); + GpuMat_ anglec(angle); if (angleInDegrees) { @@ -238,7 +238,7 @@ namespace }; template - __global__ void polarToCartImpl_(const PtrStepSz mag, const PtrStepSz angle, PtrStepSz xmat, PtrStepSz ymat, const T scale) + __global__ void polarToCartImpl_(const PtrStep mag, const PtrStepSz angle, PtrStep xmat, PtrStep ymat, const T scale) { const int x = blockDim.x * blockIdx.x + threadIdx.x; const int y = blockDim.y * blockIdx.y + threadIdx.y; @@ -258,13 +258,13 @@ namespace } template - __global__ void polarToCartDstInterleavedImpl_(const PtrStepSz mag, const PtrStepSz angle, PtrStepSz::type > xymat, const T scale) + __global__ void polarToCartDstInterleavedImpl_(const PtrStep mag, const PtrStepSz angle, PtrStep::type > xymat, const T scale) { typedef typename MakeVec::type T2; const int x = blockDim.x * blockIdx.x + threadIdx.x; const int y = blockDim.y * blockIdx.y + threadIdx.y; - if (x >= xymat.cols || y >= xymat.rows) + if (x >= angle.cols || y >= angle.rows) return; const T mag_val = useMag ? mag(y, x) : static_cast(1.0); @@ -279,7 +279,7 @@ namespace } template - __global__ void polarToCartInterleavedImpl_(const PtrStepSz::type > magAngle, PtrStepSz::type > xymat, const T scale) + __global__ void polarToCartInterleavedImpl_(const PtrStepSz::type > magAngle, PtrStep::type > xymat, const T scale) { typedef typename MakeVec::type T2; const int x = blockDim.x * blockIdx.x + threadIdx.x; diff --git a/modules/cudaarithm/test/test_element_operations.cpp b/modules/cudaarithm/test/test_element_operations.cpp index 6c2166bc845..a15ad7b3ee5 100644 --- a/modules/cudaarithm/test/test_element_operations.cpp +++ b/modules/cudaarithm/test/test_element_operations.cpp @@ -2850,7 +2850,7 @@ INSTANTIATE_TEST_CASE_P(CUDA_Arithm, CartToPolar, testing::Combine( testing::Values(AngleInDegrees(false), AngleInDegrees(true)), WHOLE_SUBMAT)); -PARAM_TEST_CASE(CartToPolarInterleaved1, cv::cuda::DeviceInfo, cv::Size, AngleInDegrees, UseRoi) +PARAM_TEST_CASE(CartToPolarInterleavedXY, cv::cuda::DeviceInfo, cv::Size, AngleInDegrees, UseRoi) { cv::cuda::DeviceInfo devInfo; cv::Size size; @@ -2868,7 +2868,7 @@ PARAM_TEST_CASE(CartToPolarInterleaved1, cv::cuda::DeviceInfo, cv::Size, AngleIn } }; -CUDA_TEST_P(CartToPolarInterleaved1, Accuracy) +CUDA_TEST_P(CartToPolarInterleavedXY, Accuracy) { cv::Mat x = randomMat(size, CV_32FC1); cv::Mat y = randomMat(size, CV_32FC1); @@ -2888,13 +2888,13 @@ CUDA_TEST_P(CartToPolarInterleaved1, Accuracy) EXPECT_MAT_NEAR(angle_gold, angle, angleInDegrees ? 1e-2 : 1e-3); } -INSTANTIATE_TEST_CASE_P(CUDA_Arithm, CartToPolarInterleaved1, testing::Combine( +INSTANTIATE_TEST_CASE_P(CUDA_Arithm, CartToPolarInterleavedXY, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, testing::Values(AngleInDegrees(false), AngleInDegrees(true)), WHOLE_SUBMAT)); -PARAM_TEST_CASE(CartToPolarInterleaved2, cv::cuda::DeviceInfo, cv::Size, AngleInDegrees, UseRoi) +PARAM_TEST_CASE(CartToPolarInterleavedXYMagAngle, cv::cuda::DeviceInfo, cv::Size, AngleInDegrees, UseRoi) { cv::cuda::DeviceInfo devInfo; cv::Size size; @@ -2912,7 +2912,7 @@ PARAM_TEST_CASE(CartToPolarInterleaved2, cv::cuda::DeviceInfo, cv::Size, AngleIn } }; -CUDA_TEST_P(CartToPolarInterleaved2, Accuracy) +CUDA_TEST_P(CartToPolarInterleavedXYMagAngle, Accuracy) { cv::Mat x = randomMat(size, CV_32FC1); cv::Mat y = randomMat(size, CV_32FC1); @@ -2935,7 +2935,7 @@ CUDA_TEST_P(CartToPolarInterleaved2, Accuracy) EXPECT_MAT_NEAR(angle_gold, angle, angleInDegrees ? 1e-2 : 1e-3); } -INSTANTIATE_TEST_CASE_P(CUDA_Arithm, CartToPolarInterleaved2, testing::Combine( +INSTANTIATE_TEST_CASE_P(CUDA_Arithm, CartToPolarInterleavedXYMagAngle, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, testing::Values(AngleInDegrees(false), AngleInDegrees(true)), @@ -2989,7 +2989,7 @@ INSTANTIATE_TEST_CASE_P(CUDA_Arithm, PolarToCart, testing::Combine( testing::Values(AngleInDegrees(false), AngleInDegrees(true)), WHOLE_SUBMAT)); -PARAM_TEST_CASE(PolarToCartInterleave1, cv::cuda::DeviceInfo, cv::Size, MatType, AngleInDegrees, UseRoi) +PARAM_TEST_CASE(PolarToCartInterleaveXY, cv::cuda::DeviceInfo, cv::Size, MatType, AngleInDegrees, UseRoi) { cv::cuda::DeviceInfo devInfo; cv::Size size; @@ -3009,7 +3009,7 @@ PARAM_TEST_CASE(PolarToCartInterleave1, cv::cuda::DeviceInfo, cv::Size, MatType, } }; -CUDA_TEST_P(PolarToCartInterleave1, Accuracy) +CUDA_TEST_P(PolarToCartInterleaveXY, Accuracy) { cv::Mat magnitude = randomMat(size, type); cv::Mat angle = randomMat(size, type); @@ -3030,14 +3030,14 @@ CUDA_TEST_P(PolarToCartInterleave1, Accuracy) EXPECT_MAT_NEAR(y_gold, y, tol); } -INSTANTIATE_TEST_CASE_P(CUDA_Arithm, PolarToCartInterleave1, testing::Combine( +INSTANTIATE_TEST_CASE_P(CUDA_Arithm, PolarToCartInterleaveXY, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, testing::Values(CV_32FC1, CV_64FC1), testing::Values(AngleInDegrees(false), AngleInDegrees(true)), WHOLE_SUBMAT)); -PARAM_TEST_CASE(PolarToCartInterleave2, cv::cuda::DeviceInfo, cv::Size, MatType, AngleInDegrees, UseRoi) +PARAM_TEST_CASE(PolarToCartInterleaveMagAngleXY, cv::cuda::DeviceInfo, cv::Size, MatType, AngleInDegrees, UseRoi) { cv::cuda::DeviceInfo devInfo; cv::Size size; @@ -3057,7 +3057,7 @@ PARAM_TEST_CASE(PolarToCartInterleave2, cv::cuda::DeviceInfo, cv::Size, MatType, } }; -CUDA_TEST_P(PolarToCartInterleave2, Accuracy) +CUDA_TEST_P(PolarToCartInterleaveMagAngleXY, Accuracy) { cv::Mat magnitude = randomMat(size, type); cv::Mat angle = randomMat(size, type); @@ -3081,7 +3081,7 @@ CUDA_TEST_P(PolarToCartInterleave2, Accuracy) EXPECT_MAT_NEAR(y_gold, y, tol); } -INSTANTIATE_TEST_CASE_P(CUDA_Arithm, PolarToCartInterleave2, testing::Combine( +INSTANTIATE_TEST_CASE_P(CUDA_Arithm, PolarToCartInterleaveMagAngleXY, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, testing::Values(CV_32FC1, CV_64FC1),