diff --git a/modules/cudaarithm/CMakeLists.txt b/modules/cudaarithm/CMakeLists.txt new file mode 100644 index 00000000000..d552bb4ebe9 --- /dev/null +++ b/modules/cudaarithm/CMakeLists.txt @@ -0,0 +1,27 @@ +if(IOS OR WINRT OR (NOT HAVE_CUDA AND NOT BUILD_CUDA_STUBS)) + ocv_module_disable(cudaarithm) +endif() + +set(the_description "CUDA-accelerated Operations on Matrices") + +ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations -Wshadow) + +ocv_add_module(cudaarithm opencv_core OPTIONAL opencv_cudev WRAP python) + +ocv_module_include_directories() +ocv_glob_module_sources() + +set(extra_libs "") + +if(HAVE_CUBLAS) + list(APPEND extra_libs ${CUDA_cublas_LIBRARY}) +endif() + +if(HAVE_CUFFT) + list(APPEND extra_libs ${CUDA_cufft_LIBRARY}) +endif() + +ocv_create_module(${extra_libs}) + +ocv_add_accuracy_tests(DEPENDS_ON opencv_imgproc) +ocv_add_perf_tests(DEPENDS_ON opencv_imgproc) diff --git a/modules/cudaarithm/include/opencv2/cudaarithm.hpp b/modules/cudaarithm/include/opencv2/cudaarithm.hpp new file mode 100644 index 00000000000..c357f77b4f1 --- /dev/null +++ b/modules/cudaarithm/include/opencv2/cudaarithm.hpp @@ -0,0 +1,878 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDAARITHM_HPP +#define OPENCV_CUDAARITHM_HPP + +#ifndef __cplusplus +# error cudaarithm.hpp header must be compiled as C++ +#endif + +#include "opencv2/core/cuda.hpp" + +/** + @addtogroup cuda + @{ + @defgroup cudaarithm Operations on Matrices + @{ + @defgroup cudaarithm_core Core Operations on Matrices + @defgroup cudaarithm_elem Per-element Operations + @defgroup cudaarithm_reduce Matrix Reductions + @defgroup cudaarithm_arithm Arithm Operations on Matrices + @} + @} + */ + +namespace cv { namespace cuda { + +//! @addtogroup cudaarithm +//! @{ + +//! @addtogroup cudaarithm_elem +//! @{ + +/** @brief Computes a matrix-matrix or matrix-scalar sum. + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. Matrix should have the same size and type as src1 . +@param dst Destination matrix that has the same size and number of channels as the input array(s). +The depth is defined by dtype or src1 depth. +@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the +destination array to be changed. The mask can be used only with single channel images. +@param dtype Optional depth of the output array. +@param stream Stream for the asynchronous version. + +@sa add + */ +CV_EXPORTS_W void add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null()); + +/** @brief Computes a matrix-matrix or matrix-scalar difference. + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. Matrix should have the same size and type as src1 . +@param dst Destination matrix that has the same size and number of channels as the input array(s). +The depth is defined by dtype or src1 depth. +@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the +destination array to be changed. The mask can be used only with single channel images. +@param dtype Optional depth of the output array. +@param stream Stream for the asynchronous version. + +@sa subtract + */ +CV_EXPORTS_W void subtract(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null()); + +/** @brief Computes a matrix-matrix or matrix-scalar per-element product. + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and number of channels as the input array(s). +The depth is defined by dtype or src1 depth. +@param scale Optional scale factor. +@param dtype Optional depth of the output array. +@param stream Stream for the asynchronous version. + +@sa multiply + */ +CV_EXPORTS_W void multiply(InputArray src1, InputArray src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); + +/** @brief Computes a matrix-matrix or matrix-scalar division. + +@param src1 First source matrix or a scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and number of channels as the input array(s). +The depth is defined by dtype or src1 depth. +@param scale Optional scale factor. +@param dtype Optional depth of the output array. +@param stream Stream for the asynchronous version. + +This function, in contrast to divide, uses a round-down rounding mode. + +@sa divide + */ +CV_EXPORTS_W void divide(InputArray src1, InputArray src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); + +/** @brief Computes per-element absolute difference of two matrices (or of a matrix and scalar). + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and type as the input array(s). +@param stream Stream for the asynchronous version. + +@sa absdiff + */ +CV_EXPORTS_W void absdiff(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Computes an absolute value of each matrix element. + +@param src Source matrix. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + +@sa abs + */ +CV_EXPORTS_W void abs(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Computes a square value of each matrix element. + +@param src Source matrix. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS_W void sqr(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Computes a square root of each matrix element. + +@param src Source matrix. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + +@sa sqrt + */ +CV_EXPORTS_W void sqrt(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Computes an exponent of each matrix element. + +@param src Source matrix. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + +@sa exp + */ +CV_EXPORTS_W void exp(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Computes a natural logarithm of absolute value of each matrix element. + +@param src Source matrix. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + +@sa log + */ +CV_EXPORTS_W void log(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Raises every matrix element to a power. + +@param src Source matrix. +@param power Exponent of power. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + +The function pow raises every element of the input matrix to power : + +\f[\texttt{dst} (I) = \fork{\texttt{src}(I)^power}{if \texttt{power} is integer}{|\texttt{src}(I)|^power}{otherwise}\f] + +@sa pow + */ +CV_EXPORTS_W void pow(InputArray src, double power, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Compares elements of two matrices (or of a matrix and scalar). + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and type as the input array(s). +@param cmpop Flag specifying the relation between the elements to be checked: +- **CMP_EQ:** a(.) == b(.) +- **CMP_GT:** a(.) \> b(.) +- **CMP_GE:** a(.) \>= b(.) +- **CMP_LT:** a(.) \< b(.) +- **CMP_LE:** a(.) \<= b(.) +- **CMP_NE:** a(.) != b(.) +@param stream Stream for the asynchronous version. + +@sa compare + */ +CV_EXPORTS_W void compare(InputArray src1, InputArray src2, OutputArray dst, int cmpop, Stream& stream = Stream::Null()); + +/** @brief Performs a per-element bitwise inversion. + +@param src Source matrix. +@param dst Destination matrix with the same size and type as src . +@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the +destination array to be changed. The mask can be used only with single channel images. +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS_W void bitwise_not(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +/** @brief Performs a per-element bitwise disjunction of two matrices (or of matrix and scalar). + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and type as the input array(s). +@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the +destination array to be changed. The mask can be used only with single channel images. +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS_W void bitwise_or(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +/** @brief Performs a per-element bitwise conjunction of two matrices (or of matrix and scalar). + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and type as the input array(s). +@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the +destination array to be changed. The mask can be used only with single channel images. +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS_W void bitwise_and(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +/** @brief Performs a per-element bitwise exclusive or operation of two matrices (or of matrix and scalar). + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and type as the input array(s). +@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the +destination array to be changed. The mask can be used only with single channel images. +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS_W void bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +/** @brief Performs pixel by pixel right shift of an image by a constant value. + +@param src Source matrix. Supports 1, 3 and 4 channels images with integers elements. +@param val Constant values, one per channel. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS void rshift(InputArray src, Scalar_ val, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Performs pixel by pixel right left of an image by a constant value. + +@param src Source matrix. Supports 1, 3 and 4 channels images with CV_8U , CV_16U or CV_32S +depth. +@param val Constant values, one per channel. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS void lshift(InputArray src, Scalar_ val, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Computes the per-element minimum of two matrices (or a matrix and a scalar). + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and type as the input array(s). +@param stream Stream for the asynchronous version. + +@sa min + */ +CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Computes the per-element maximum of two matrices (or a matrix and a scalar). + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and type as the input array(s). +@param stream Stream for the asynchronous version. + +@sa max + */ +CV_EXPORTS_W void max(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Computes the weighted sum of two arrays. + +@param src1 First source array. +@param alpha Weight for the first array elements. +@param src2 Second source array of the same size and channel number as src1 . +@param beta Weight for the second array elements. +@param dst Destination array that has the same size and number of channels as the input arrays. +@param gamma Scalar added to each sum. +@param dtype Optional depth of the destination array. When both input arrays have the same depth, +dtype can be set to -1, which will be equivalent to src1.depth(). +@param stream Stream for the asynchronous version. + +The function addWeighted calculates the weighted sum of two arrays as follows: + +\f[\texttt{dst} (I)= \texttt{saturate} ( \texttt{src1} (I)* \texttt{alpha} + \texttt{src2} (I)* \texttt{beta} + \texttt{gamma} )\f] + +where I is a multi-dimensional index of array elements. In case of multi-channel arrays, each +channel is processed independently. + +@sa addWeighted + */ +CV_EXPORTS_W void addWeighted(InputArray src1, double alpha, InputArray src2, double beta, double gamma, OutputArray dst, + int dtype = -1, Stream& stream = Stream::Null()); + +//! adds scaled array to another one (dst = alpha*src1 + src2) +static inline void scaleAdd(InputArray src1, double alpha, InputArray src2, OutputArray dst, Stream& stream = Stream::Null()) +{ + addWeighted(src1, alpha, src2, 1.0, 0.0, dst, -1, stream); +} + +/** @brief Applies a fixed-level threshold to each array element. + +@param src Source array (single-channel). +@param dst Destination array with the same size and type as src . +@param thresh Threshold value. +@param maxval Maximum value to use with THRESH_BINARY and THRESH_BINARY_INV threshold types. +@param type Threshold type. For details, see threshold . The THRESH_OTSU and THRESH_TRIANGLE +threshold types are not supported. +@param stream Stream for the asynchronous version. + +@sa threshold + */ +CV_EXPORTS_W double threshold(InputArray src, OutputArray dst, double thresh, double maxval, int type, Stream& stream = Stream::Null()); + +/** @brief Computes magnitudes of complex matrix elements. + +@param xy Source complex matrix in the interleaved format ( CV_32FC2 ). +@param magnitude Destination matrix of float magnitudes ( CV_32FC1 ). +@param stream Stream for the asynchronous version. + +@sa magnitude + */ +CV_EXPORTS_W void magnitude(InputArray xy, OutputArray magnitude, Stream& stream = Stream::Null()); + +/** @brief Computes squared magnitudes of complex matrix elements. + +@param xy Source complex matrix in the interleaved format ( CV_32FC2 ). +@param magnitude Destination matrix of float magnitude squares ( CV_32FC1 ). +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS_W void magnitudeSqr(InputArray xy, OutputArray magnitude, Stream& stream = Stream::Null()); + +/** @overload + computes magnitude of each (x(i), y(i)) vector + supports only floating-point source +@param x Source matrix containing real components ( CV_32FC1 ). +@param y Source matrix containing imaginary components ( CV_32FC1 ). +@param magnitude Destination matrix of float magnitudes ( CV_32FC1 ). +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS_W void magnitude(InputArray x, InputArray y, OutputArray magnitude, Stream& stream = Stream::Null()); + +/** @overload + computes squared magnitude of each (x(i), y(i)) vector + supports only floating-point source +@param x Source matrix containing real components ( CV_32FC1 ). +@param y Source matrix containing imaginary components ( CV_32FC1 ). +@param magnitude Destination matrix of float magnitude squares ( CV_32FC1 ). +@param stream Stream for the asynchronous version. +*/ +CV_EXPORTS_W void magnitudeSqr(InputArray x, InputArray y, OutputArray magnitude, Stream& stream = Stream::Null()); + +/** @brief Computes polar angles of complex matrix elements. + +@param x Source matrix containing real components ( CV_32FC1 ). +@param y Source matrix containing imaginary components ( CV_32FC1 ). +@param angle Destination matrix of angles ( CV_32FC1 ). +@param angleInDegrees Flag for angles that must be evaluated in degrees. +@param stream Stream for the asynchronous version. + +@sa phase + */ +CV_EXPORTS_W void phase(InputArray x, InputArray y, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); + +/** @brief Converts Cartesian coordinates into polar. + +@param x Source matrix containing real components ( CV_32FC1 ). +@param y Source matrix containing imaginary components ( CV_32FC1 ). +@param magnitude Destination matrix of float magnitudes ( CV_32FC1 ). +@param angle Destination matrix of angles ( CV_32FC1 ). +@param angleInDegrees Flag for angles that must be evaluated in degrees. +@param stream Stream for the asynchronous version. + +@sa cartToPolar + */ +CV_EXPORTS_W void cartToPolar(InputArray x, InputArray y, OutputArray magnitude, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); + +/** @brief Converts polar coordinates into Cartesian. + +@param magnitude Source matrix containing magnitudes ( CV_32FC1 ). +@param angle Source matrix containing angles ( CV_32FC1 ). +@param x Destination matrix of real components ( CV_32FC1 ). +@param y Destination matrix of imaginary components ( CV_32FC1 ). +@param angleInDegrees Flag that indicates angles in degrees. +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS_W void polarToCart(InputArray magnitude, InputArray angle, OutputArray x, OutputArray y, bool angleInDegrees = false, Stream& stream = Stream::Null()); + +//! @} cudaarithm_elem + +//! @addtogroup cudaarithm_core +//! @{ + +/** @brief Makes a multi-channel matrix out of several single-channel matrices. + +@param src Array/vector of source matrices. +@param n Number of source matrices. +@param dst Destination matrix. +@param stream Stream for the asynchronous version. + +@sa merge + */ +CV_EXPORTS_W void merge(const GpuMat* src, size_t n, OutputArray dst, Stream& stream = Stream::Null()); +/** @overload */ +CV_EXPORTS_W void merge(const std::vector& src, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Copies each plane of a multi-channel matrix into an array. + +@param src Source matrix. +@param dst Destination array/vector of single-channel matrices. +@param stream Stream for the asynchronous version. + +@sa split + */ +CV_EXPORTS_W void split(InputArray src, GpuMat* dst, Stream& stream = Stream::Null()); +/** @overload */ +CV_EXPORTS_W void split(InputArray src, std::vector& dst, Stream& stream = Stream::Null()); + +/** @brief Transposes a matrix. + +@param src1 Source matrix. 1-, 4-, 8-byte element sizes are supported for now. +@param dst Destination matrix. +@param stream Stream for the asynchronous version. + +@sa transpose + */ +CV_EXPORTS_W void transpose(InputArray src1, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Flips a 2D matrix around vertical, horizontal, or both axes. + +@param src Source matrix. Supports 1, 3 and 4 channels images with CV_8U, CV_16U, CV_32S or +CV_32F depth. +@param dst Destination matrix. +@param flipCode Flip mode for the source: +- 0 Flips around x-axis. +- \> 0 Flips around y-axis. +- \< 0 Flips around both axes. +@param stream Stream for the asynchronous version. + +@sa flip + */ +CV_EXPORTS_W void flip(InputArray src, OutputArray dst, int flipCode, Stream& stream = Stream::Null()); + +/** @brief Base class for transform using lookup table. + */ +class CV_EXPORTS_W LookUpTable : public Algorithm +{ +public: + /** @brief Transforms the source matrix into the destination matrix using the given look-up table: + dst(I) = lut(src(I)) . + + @param src Source matrix. CV_8UC1 and CV_8UC3 matrices are supported for now. + @param dst Destination matrix. + @param stream Stream for the asynchronous version. + */ + CV_WRAP virtual void transform(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) = 0; +}; + +/** @brief Creates implementation for cuda::LookUpTable . + +@param lut Look-up table of 256 elements. It is a continuous CV_8U matrix. + */ +CV_EXPORTS_W Ptr createLookUpTable(InputArray lut); + +/** @brief Forms a border around an image. + +@param src Source image. CV_8UC1 , CV_8UC4 , CV_32SC1 , and CV_32FC1 types are supported. +@param dst Destination image with the same type as src. The size is +Size(src.cols+left+right, src.rows+top+bottom) . +@param top +@param bottom +@param left +@param right Number of pixels in each direction from the source image rectangle to extrapolate. +For example: top=1, bottom=1, left=1, right=1 mean that 1 pixel-wide border needs to be built. +@param borderType Border type. See borderInterpolate for details. BORDER_REFLECT101 , +BORDER_REPLICATE , BORDER_CONSTANT , BORDER_REFLECT and BORDER_WRAP are supported for now. +@param value Border value. +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS_W void copyMakeBorder(InputArray src, OutputArray dst, int top, int bottom, int left, int right, int borderType, + Scalar value = Scalar(), Stream& stream = Stream::Null()); + +//! @} cudaarithm_core + +//! @addtogroup cudaarithm_reduce +//! @{ + +/** @brief Returns the norm of a matrix (or difference of two matrices). + +@param src1 Source matrix. Any matrices except 64F are supported. +@param normType Norm type. NORM_L1 , NORM_L2 , and NORM_INF are supported for now. +@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type. + +@sa norm + */ +CV_EXPORTS_W double norm(InputArray src1, int normType, InputArray mask = noArray()); +/** @overload */ +CV_EXPORTS_W void calcNorm(InputArray src, OutputArray dst, int normType, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +/** @brief Returns the difference of two matrices. + +@param src1 Source matrix. Any matrices except 64F are supported. +@param src2 Second source matrix (if any) with the same size and type as src1. +@param normType Norm type. NORM_L1 , NORM_L2 , and NORM_INF are supported for now. + +@sa norm + */ +CV_EXPORTS_W double norm(InputArray src1, InputArray src2, int normType=NORM_L2); +/** @overload */ +CV_EXPORTS_W void calcNormDiff(InputArray src1, InputArray src2, OutputArray dst, int normType=NORM_L2, Stream& stream = Stream::Null()); + +/** @brief Returns the sum of matrix elements. + +@param src Source image of any depth except for CV_64F . +@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type. + +@sa sum + */ +CV_EXPORTS_W Scalar sum(InputArray src, InputArray mask = noArray()); +/** @overload */ +CV_EXPORTS_W void calcSum(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +/** @brief Returns the sum of absolute values for matrix elements. + +@param src Source image of any depth except for CV_64F . +@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type. + */ +CV_EXPORTS_W Scalar absSum(InputArray src, InputArray mask = noArray()); +/** @overload */ +CV_EXPORTS_W void calcAbsSum(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +/** @brief Returns the squared sum of matrix elements. + +@param src Source image of any depth except for CV_64F . +@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type. + */ +CV_EXPORTS_W Scalar sqrSum(InputArray src, InputArray mask = noArray()); +/** @overload */ +CV_EXPORTS_W void calcSqrSum(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +/** @brief Finds global minimum and maximum matrix elements and returns their values. + +@param src Single-channel source image. +@param minVal Pointer to the returned minimum value. Use NULL if not required. +@param maxVal Pointer to the returned maximum value. Use NULL if not required. +@param mask Optional mask to select a sub-matrix. + +The function does not work with CV_64F images on GPUs with the compute capability \< 1.3. + +@sa minMaxLoc + */ +CV_EXPORTS_W void minMax(InputArray src, double* minVal, double* maxVal, InputArray mask = noArray()); +/** @overload */ +CV_EXPORTS_W void findMinMax(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +/** @brief Finds global minimum and maximum matrix elements and returns their values with locations. + +@param src Single-channel source image. +@param minVal Pointer to the returned minimum value. Use NULL if not required. +@param maxVal Pointer to the returned maximum value. Use NULL if not required. +@param minLoc Pointer to the returned minimum location. Use NULL if not required. +@param maxLoc Pointer to the returned maximum location. Use NULL if not required. +@param mask Optional mask to select a sub-matrix. + +The function does not work with CV_64F images on GPU with the compute capability \< 1.3. + +@sa minMaxLoc + */ +CV_EXPORTS_W void minMaxLoc(InputArray src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, + InputArray mask = noArray()); +/** @overload */ +CV_EXPORTS_W void findMinMaxLoc(InputArray src, OutputArray minMaxVals, OutputArray loc, + InputArray mask = noArray(), Stream& stream = Stream::Null()); + +/** @brief Counts non-zero matrix elements. + +@param src Single-channel source image. + +The function does not work with CV_64F images on GPUs with the compute capability \< 1.3. + +@sa countNonZero + */ +CV_EXPORTS_W int countNonZero(InputArray src); +/** @overload */ +CV_EXPORTS_W void countNonZero(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Reduces a matrix to a vector. + +@param mtx Source 2D matrix. +@param vec Destination vector. Its size and type is defined by dim and dtype parameters. +@param dim Dimension index along which the matrix is reduced. 0 means that the matrix is reduced +to a single row. 1 means that the matrix is reduced to a single column. +@param reduceOp Reduction operation that could be one of the following: +- **CV_REDUCE_SUM** The output is the sum of all rows/columns of the matrix. +- **CV_REDUCE_AVG** The output is the mean vector of all rows/columns of the matrix. +- **CV_REDUCE_MAX** The output is the maximum (column/row-wise) of all rows/columns of the +matrix. +- **CV_REDUCE_MIN** The output is the minimum (column/row-wise) of all rows/columns of the +matrix. +@param dtype When it is negative, the destination vector will have the same type as the source +matrix. Otherwise, its type will be CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), mtx.channels()) . +@param stream Stream for the asynchronous version. + +The function reduce reduces the matrix to a vector by treating the matrix rows/columns as a set of +1D vectors and performing the specified operation on the vectors until a single row/column is +obtained. For example, the function can be used to compute horizontal and vertical projections of a +raster image. In case of CV_REDUCE_SUM and CV_REDUCE_AVG , the output may have a larger element +bit-depth to preserve accuracy. And multi-channel arrays are also supported in these two reduction +modes. + +@sa reduce + */ +CV_EXPORTS_W void reduce(InputArray mtx, OutputArray vec, int dim, int reduceOp, int dtype = -1, Stream& stream = Stream::Null()); + +/** @brief Computes a mean value and a standard deviation of matrix elements. + +@param mtx Source matrix. CV_8UC1 matrices are supported for now. +@param mean Mean value. +@param stddev Standard deviation value. + +@sa meanStdDev + */ +CV_EXPORTS_W void meanStdDev(InputArray mtx, Scalar& mean, Scalar& stddev); +/** @overload */ +CV_EXPORTS_W void meanStdDev(InputArray mtx, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Computes a standard deviation of integral images. + +@param src Source image. Only the CV_32SC1 type is supported. +@param sqr Squared source image. Only the CV_32FC1 type is supported. +@param dst Destination image with the same type and size as src . +@param rect Rectangular window. +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS_W void rectStdDev(InputArray src, InputArray sqr, OutputArray dst, Rect rect, Stream& stream = Stream::Null()); + +/** @brief Normalizes the norm or value range of an array. + +@param src Input array. +@param dst Output array of the same size as src . +@param alpha Norm value to normalize to or the lower range boundary in case of the range +normalization. +@param beta Upper range boundary in case of the range normalization; it is not used for the norm +normalization. +@param norm_type Normalization type ( NORM_MINMAX , NORM_L2 , NORM_L1 or NORM_INF ). +@param dtype When negative, the output array has the same type as src; otherwise, it has the same +number of channels as src and the depth =CV_MAT_DEPTH(dtype). +@param mask Optional operation mask. +@param stream Stream for the asynchronous version. + +@sa normalize + */ +CV_EXPORTS_W void normalize(InputArray src, OutputArray dst, double alpha, double beta, + int norm_type, int dtype, InputArray mask = noArray(), + Stream& stream = Stream::Null()); + +/** @brief Computes an integral image. + +@param src Source image. Only CV_8UC1 images are supported for now. +@param sum Integral image containing 32-bit unsigned integer values packed into CV_32SC1 . +@param stream Stream for the asynchronous version. + +@sa integral + */ +CV_EXPORTS_W void integral(InputArray src, OutputArray sum, Stream& stream = Stream::Null()); + +/** @brief Computes a squared integral image. + +@param src Source image. Only CV_8UC1 images are supported for now. +@param sqsum Squared integral image containing 64-bit unsigned integer values packed into +CV_64FC1 . +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS_W void sqrIntegral(InputArray src, OutputArray sqsum, Stream& stream = Stream::Null()); + +//! @} cudaarithm_reduce + +//! @addtogroup cudaarithm_arithm +//! @{ + +/** @brief Performs generalized matrix multiplication. + +@param src1 First multiplied input matrix that should have CV_32FC1 , CV_64FC1 , CV_32FC2 , or +CV_64FC2 type. +@param src2 Second multiplied input matrix of the same type as src1 . +@param alpha Weight of the matrix product. +@param src3 Third optional delta matrix added to the matrix product. It should have the same type +as src1 and src2 . +@param beta Weight of src3 . +@param dst Destination matrix. It has the proper size and the same type as input matrices. +@param flags Operation flags: +- **GEMM_1_T** transpose src1 +- **GEMM_2_T** transpose src2 +- **GEMM_3_T** transpose src3 +@param stream Stream for the asynchronous version. + +The function performs generalized matrix multiplication similar to the gemm functions in BLAS level +3. For example, gemm(src1, src2, alpha, src3, beta, dst, GEMM_1_T + GEMM_3_T) corresponds to + +\f[\texttt{dst} = \texttt{alpha} \cdot \texttt{src1} ^T \cdot \texttt{src2} + \texttt{beta} \cdot \texttt{src3} ^T\f] + +@note Transposition operation doesn't support CV_64FC2 input type. + +@sa gemm + */ +CV_EXPORTS_W void gemm(InputArray src1, InputArray src2, double alpha, + InputArray src3, double beta, OutputArray dst, int flags = 0, Stream& stream = Stream::Null()); + +/** @brief Performs a per-element multiplication of two Fourier spectrums. + +@param src1 First spectrum. +@param src2 Second spectrum with the same size and type as a . +@param dst Destination spectrum. +@param flags Mock parameter used for CPU/CUDA interfaces similarity. +@param conjB Optional flag to specify if the second spectrum needs to be conjugated before the +multiplication. +@param stream Stream for the asynchronous version. + +Only full (not packed) CV_32FC2 complex spectrums in the interleaved format are supported for now. + +@sa mulSpectrums + */ +CV_EXPORTS_W void mulSpectrums(InputArray src1, InputArray src2, OutputArray dst, int flags, bool conjB=false, Stream& stream = Stream::Null()); + +/** @brief Performs a per-element multiplication of two Fourier spectrums and scales the result. + +@param src1 First spectrum. +@param src2 Second spectrum with the same size and type as a . +@param dst Destination spectrum. +@param flags Mock parameter used for CPU/CUDA interfaces similarity, simply add a `0` value. +@param scale Scale constant. +@param conjB Optional flag to specify if the second spectrum needs to be conjugated before the +multiplication. +@param stream Stream for the asynchronous version. + +Only full (not packed) CV_32FC2 complex spectrums in the interleaved format are supported for now. + +@sa mulSpectrums + */ +CV_EXPORTS_W void mulAndScaleSpectrums(InputArray src1, InputArray src2, OutputArray dst, int flags, float scale, bool conjB=false, Stream& stream = Stream::Null()); + +/** @brief Performs a forward or inverse discrete Fourier transform (1D or 2D) of the floating point matrix. + +@param src Source matrix (real or complex). +@param dst Destination matrix (real or complex). +@param dft_size Size of a discrete Fourier transform. +@param flags Optional flags: +- **DFT_ROWS** transforms each individual row of the source matrix. +- **DFT_SCALE** scales the result: divide it by the number of elements in the transform +(obtained from dft_size ). +- **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real +cases are always forward and inverse, respectively). +- **DFT_COMPLEX_INPUT** Specifies that input is complex input with 2 channels. +- **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of +real-complex transform, so the destination matrix must be real. +@param stream Stream for the asynchronous version. + +Use to handle real matrices ( CV32FC1 ) and complex matrices in the interleaved format ( CV32FC2 ). + +The source matrix should be continuous, otherwise reallocation and data copying is performed. The +function chooses an operation mode depending on the flags, size, and channel count of the source +matrix: + +- If the source matrix is complex and the output is not specified as real, the destination +matrix is complex and has the dft_size size and CV_32FC2 type. The destination matrix +contains a full result of the DFT (forward or inverse). +- If the source matrix is complex and the output is specified as real, the function assumes that +its input is the result of the forward transform (see the next item). The destination matrix +has the dft_size size and CV_32FC1 type. It contains the result of the inverse DFT. +- If the source matrix is real (its type is CV_32FC1 ), forward DFT is performed. The result of +the DFT is packed into complex ( CV_32FC2 ) matrix. So, the width of the destination matrix +is dft_size.width / 2 + 1 . But if the source is a single column, the height is reduced +instead of the width. + +@sa dft + */ +CV_EXPORTS_W void dft(InputArray src, OutputArray dst, Size dft_size, int flags=0, Stream& stream = Stream::Null()); + +/** @brief Base class for DFT operator as a cv::Algorithm. : + */ +class CV_EXPORTS_W DFT : public Algorithm +{ +public: + /** @brief Computes an FFT of a given image. + + @param image Source image. Only CV_32FC1 images are supported for now. + @param result Result image. + @param stream Stream for the asynchronous version. + */ + CV_WRAP virtual void compute(InputArray image, OutputArray result, Stream& stream = Stream::Null()) = 0; +}; + +/** @brief Creates implementation for cuda::DFT. + +@param dft_size The image size. +@param flags Optional flags: +- **DFT_ROWS** transforms each individual row of the source matrix. +- **DFT_SCALE** scales the result: divide it by the number of elements in the transform +(obtained from dft_size ). +- **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real +cases are always forward and inverse, respectively). +- **DFT_COMPLEX_INPUT** Specifies that inputs will be complex with 2 channels. +- **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of +real-complex transform, so the destination matrix must be real. + */ +CV_EXPORTS_W Ptr createDFT(Size dft_size, int flags); + +/** @brief Base class for convolution (or cross-correlation) operator. : + */ +class CV_EXPORTS_W Convolution : public Algorithm +{ +public: + /** @brief Computes a convolution (or cross-correlation) of two images. + + @param image Source image. Only CV_32FC1 images are supported for now. + @param templ Template image. The size is not greater than the image size. The type is the same as + image . + @param result Result image. If image is *W x H* and templ is *w x h*, then result must be *W-w+1 x + H-h+1*. + @param ccorr Flags to evaluate cross-correlation instead of convolution. + @param stream Stream for the asynchronous version. + */ + virtual void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr = false, Stream& stream = Stream::Null()) = 0; +}; + +/** @brief Creates implementation for cuda::Convolution . + +@param user_block_size Block size. If you leave default value Size(0,0) then automatic +estimation of block size will be used (which is optimized for speed). By varying user_block_size +you can reduce memory requirements at the cost of speed. + */ +CV_EXPORTS_W Ptr createConvolution(Size user_block_size = Size()); + +//! @} cudaarithm_arithm + +//! @} cudaarithm + +}} // namespace cv { namespace cuda { + +#endif /* OPENCV_CUDAARITHM_HPP */ diff --git a/modules/cudaarithm/perf/perf_arithm.cpp b/modules/cudaarithm/perf/perf_arithm.cpp new file mode 100644 index 00000000000..ca23e19dc14 --- /dev/null +++ b/modules/cudaarithm/perf/perf_arithm.cpp @@ -0,0 +1,254 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "perf_precomp.hpp" + +namespace opencv_test { namespace { + +////////////////////////////////////////////////////////////////////// +// GEMM + +#ifdef HAVE_CUBLAS + +CV_FLAGS(GemmFlags, 0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T) +#define ALL_GEMM_FLAGS Values(GemmFlags(0), GemmFlags(cv::GEMM_1_T), GemmFlags(cv::GEMM_2_T), GemmFlags(cv::GEMM_3_T), \ + GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T | cv::GEMM_3_T)) + +DEF_PARAM_TEST(Sz_Type_Flags, cv::Size, MatType, GemmFlags); + +PERF_TEST_P(Sz_Type_Flags, GEMM, + Combine(Values(cv::Size(512, 512), cv::Size(1024, 1024)), + Values(CV_32FC1, CV_32FC2, CV_64FC1), + ALL_GEMM_FLAGS)) +{ + const cv::Size size = GET_PARAM(0); + const int type = GET_PARAM(1); + const int flags = GET_PARAM(2); + + cv::Mat src1(size, type); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, type); + declare.in(src2, WARMUP_RNG); + + cv::Mat src3(size, type); + declare.in(src3, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + declare.time(5.0); + + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + const cv::cuda::GpuMat d_src3(src3); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, dst, flags); + + CUDA_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE); + } + else + { + declare.time(50.0); + + cv::Mat dst; + + TEST_CYCLE() cv::gemm(src1, src2, 1.0, src3, 1.0, dst, flags); + + CPU_SANITY_CHECK(dst); + } +} + +#endif + +////////////////////////////////////////////////////////////////////// +// MulSpectrums + +CV_FLAGS(DftFlags, 0, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT) + +DEF_PARAM_TEST(Sz_Flags, cv::Size, DftFlags); + +PERF_TEST_P(Sz_Flags, MulSpectrums, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(0, DftFlags(cv::DFT_ROWS)))) +{ + const cv::Size size = GET_PARAM(0); + const int flag = GET_PARAM(1); + + cv::Mat a(size, CV_32FC2); + cv::Mat b(size, CV_32FC2); + declare.in(a, b, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_a(a); + const cv::cuda::GpuMat d_b(b); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::mulSpectrums(d_a, d_b, dst, flag); + + CUDA_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::mulSpectrums(a, b, dst, flag); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// MulAndScaleSpectrums + +PERF_TEST_P(Sz, MulAndScaleSpectrums, + CUDA_TYPICAL_MAT_SIZES) +{ + const cv::Size size = GetParam(); + + const float scale = 1.f / size.area(); + + cv::Mat src1(size, CV_32FC2); + cv::Mat src2(size, CV_32FC2); + declare.in(src1,src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::mulAndScaleSpectrums(d_src1, d_src2, dst, cv::DFT_ROWS, scale, false); + + CUDA_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE); + } + else + { + FAIL_NO_CPU(); + } +} + +////////////////////////////////////////////////////////////////////// +// Dft + +PERF_TEST_P(Sz_Flags, Dft, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(0, DftFlags(cv::DFT_ROWS), DftFlags(cv::DFT_INVERSE)))) +{ + declare.time(10.0); + + const cv::Size size = GET_PARAM(0); + const int flag = GET_PARAM(1); + + cv::Mat src(size, CV_32FC2); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::dft(d_src, dst, size, flag); + + CUDA_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::dft(src, dst, flag); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// Convolve + +DEF_PARAM_TEST(Sz_KernelSz_Ccorr, cv::Size, int, bool); + +PERF_TEST_P(Sz_KernelSz_Ccorr, Convolve, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(17, 27, 32, 64), + Bool())) +{ + declare.time(10.0); + + const cv::Size size = GET_PARAM(0); + const int templ_size = GET_PARAM(1); + const bool ccorr = GET_PARAM(2); + + const cv::Mat image(size, CV_32FC1); + const cv::Mat templ(templ_size, templ_size, CV_32FC1); + declare.in(image, templ, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + cv::cuda::GpuMat d_image = cv::cuda::createContinuous(size, CV_32FC1); + d_image.upload(image); + + cv::cuda::GpuMat d_templ = cv::cuda::createContinuous(templ_size, templ_size, CV_32FC1); + d_templ.upload(templ); + + cv::Ptr convolution = cv::cuda::createConvolution(); + + cv::cuda::GpuMat dst; + + TEST_CYCLE() convolution->convolve(d_image, d_templ, dst, ccorr); + + CUDA_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE); + } + else + { + if (ccorr) + FAIL_NO_CPU(); + + cv::Mat dst; + + TEST_CYCLE() cv::filter2D(image, dst, image.depth(), templ); + + CPU_SANITY_CHECK(dst); + } +} + +}} // namespace diff --git a/modules/cudaarithm/perf/perf_core.cpp b/modules/cudaarithm/perf/perf_core.cpp new file mode 100644 index 00000000000..bc9f0e2f715 --- /dev/null +++ b/modules/cudaarithm/perf/perf_core.cpp @@ -0,0 +1,323 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "perf_precomp.hpp" + +namespace opencv_test { namespace { + +#define ARITHM_MAT_DEPTH Values(CV_8U, CV_16U, CV_32F, CV_64F) + +////////////////////////////////////////////////////////////////////// +// Merge + +DEF_PARAM_TEST(Sz_Depth_Cn, cv::Size, MatDepth, MatCn); + +PERF_TEST_P(Sz_Depth_Cn, Merge, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH, + Values(2, 3, 4))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + + std::vector src(channels); + for (int i = 0; i < channels; ++i) + { + src[i].create(size, depth); + declare.in(src[i], WARMUP_RNG); + } + + if (PERF_RUN_CUDA()) + { + std::vector d_src(channels); + for (int i = 0; i < channels; ++i) + d_src[i].upload(src[i]); + + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::merge(d_src, dst); + + CUDA_SANITY_CHECK(dst, 1e-10); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::merge(src, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// Split + +PERF_TEST_P(Sz_Depth_Cn, Split, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH, + Values(2, 3, 4))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + + cv::Mat src(size, CV_MAKE_TYPE(depth, channels)); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + std::vector dst; + + TEST_CYCLE() cv::cuda::split(d_src, dst); + + const cv::cuda::GpuMat& dst0 = dst[0]; + const cv::cuda::GpuMat& dst1 = dst[1]; + + CUDA_SANITY_CHECK(dst0, 1e-10); + CUDA_SANITY_CHECK(dst1, 1e-10); + } + else + { + std::vector dst; + + TEST_CYCLE() cv::split(src, dst); + + const cv::Mat& dst0 = dst[0]; + const cv::Mat& dst1 = dst[1]; + + CPU_SANITY_CHECK(dst0); + CPU_SANITY_CHECK(dst1); + } +} + +////////////////////////////////////////////////////////////////////// +// Transpose + +PERF_TEST_P(Sz_Type, Transpose, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8UC1, CV_8UC4, CV_16UC2, CV_16SC2, CV_32SC1, CV_32SC2, CV_64FC1))) +{ + const cv::Size size = GET_PARAM(0); + const int type = GET_PARAM(1); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::transpose(d_src, dst); + + CUDA_SANITY_CHECK(dst, 1e-10); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::transpose(src, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// Flip + +enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1}; +CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y) + +DEF_PARAM_TEST(Sz_Depth_Cn_Code, cv::Size, MatDepth, MatCn, FlipCode); + +PERF_TEST_P(Sz_Depth_Cn_Code, Flip, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + CUDA_CHANNELS_1_3_4, + FlipCode::all())) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + const int flipCode = GET_PARAM(3); + + const int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::flip(d_src, dst, flipCode); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::flip(src, dst, flipCode); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// LutOneChannel + +PERF_TEST_P(Sz_Type, LutOneChannel, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8UC1, CV_8UC3))) +{ + const cv::Size size = GET_PARAM(0); + const int type = GET_PARAM(1); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + cv::Mat lut(1, 256, CV_8UC1); + declare.in(lut, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + cv::Ptr lutAlg = cv::cuda::createLookUpTable(lut); + + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() lutAlg->transform(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::LUT(src, lut, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// LutMultiChannel + +PERF_TEST_P(Sz_Type, LutMultiChannel, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8UC3))) +{ + const cv::Size size = GET_PARAM(0); + const int type = GET_PARAM(1); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + cv::Mat lut(1, 256, CV_MAKE_TYPE(CV_8U, src.channels())); + declare.in(lut, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + cv::Ptr lutAlg = cv::cuda::createLookUpTable(lut); + + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() lutAlg->transform(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::LUT(src, lut, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// CopyMakeBorder + +DEF_PARAM_TEST(Sz_Depth_Cn_Border, cv::Size, MatDepth, MatCn, BorderMode); + +PERF_TEST_P(Sz_Depth_Cn_Border, CopyMakeBorder, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + CUDA_CHANNELS_1_3_4, + ALL_BORDER_MODES)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + const int borderMode = GET_PARAM(3); + + const int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::copyMakeBorder(d_src, dst, 5, 5, 5, 5, borderMode); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::copyMakeBorder(src, dst, 5, 5, 5, 5, borderMode); + + CPU_SANITY_CHECK(dst); + } +} + +}} // namespace diff --git a/modules/cudaarithm/perf/perf_element_operations.cpp b/modules/cudaarithm/perf/perf_element_operations.cpp new file mode 100644 index 00000000000..02f412d9949 --- /dev/null +++ b/modules/cudaarithm/perf/perf_element_operations.cpp @@ -0,0 +1,1501 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "perf_precomp.hpp" + +namespace opencv_test { namespace { + +#define ARITHM_MAT_DEPTH Values(CV_8U, CV_16U, CV_32F, CV_64F) + +////////////////////////////////////////////////////////////////////// +// AddMat + +DEF_PARAM_TEST(Sz_Depth, cv::Size, MatDepth); + +PERF_TEST_P(Sz_Depth, AddMat, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src1(size, depth); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::add(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst, 1e-10); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::add(src1, src2, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// AddScalar + +PERF_TEST_P(Sz_Depth, AddScalar, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + cv::Scalar s; + declare.in(s, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::add(d_src, s, dst); + + CUDA_SANITY_CHECK(dst, 1e-10); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::add(src, s, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// SubtractMat + +PERF_TEST_P(Sz_Depth, SubtractMat, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src1(size, depth); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::subtract(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst, 1e-10); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::subtract(src1, src2, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// SubtractScalar + +PERF_TEST_P(Sz_Depth, SubtractScalar, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + cv::Scalar s; + declare.in(s, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::subtract(d_src, s, dst); + + CUDA_SANITY_CHECK(dst, 1e-10); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::subtract(src, s, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// MultiplyMat + +PERF_TEST_P(Sz_Depth, MultiplyMat, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src1(size, depth); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::multiply(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst, 1e-6); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::multiply(src1, src2, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// MultiplyScalar + +PERF_TEST_P(Sz_Depth, MultiplyScalar, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + cv::Scalar s; + declare.in(s, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::multiply(d_src, s, dst); + + CUDA_SANITY_CHECK(dst, 1e-6); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::multiply(src, s, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// DivideMat + +PERF_TEST_P(Sz_Depth, DivideMat, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src1(size, depth); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::divide(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst, 1e-6); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::divide(src1, src2, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// DivideScalar + +PERF_TEST_P(Sz_Depth, DivideScalar, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + cv::Scalar s; + declare.in(s, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::divide(d_src, s, dst); + + CUDA_SANITY_CHECK(dst, 1e-6); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::divide(src, s, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// DivideScalarInv + +PERF_TEST_P(Sz_Depth, DivideScalarInv, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + cv::Scalar s; + declare.in(s, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::divide(s[0], d_src, dst); + + CUDA_SANITY_CHECK(dst, 1e-6); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::divide(s, src, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// AbsDiffMat + +PERF_TEST_P(Sz_Depth, AbsDiffMat, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src1(size, depth); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::absdiff(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst, 1e-10); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::absdiff(src1, src2, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// AbsDiffScalar + +PERF_TEST_P(Sz_Depth, AbsDiffScalar, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + cv::Scalar s; + declare.in(s, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::absdiff(d_src, s, dst); + + CUDA_SANITY_CHECK(dst, 1e-10); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::absdiff(src, s, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// Abs + +PERF_TEST_P(Sz_Depth, Abs, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_16S, CV_32F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::abs(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + FAIL_NO_CPU(); + } +} + +////////////////////////////////////////////////////////////////////// +// Sqr + +PERF_TEST_P(Sz_Depth, Sqr, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16S, CV_32F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::sqr(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + FAIL_NO_CPU(); + } +} + +////////////////////////////////////////////////////////////////////// +// Sqrt + +PERF_TEST_P(Sz_Depth, Sqrt, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16S, CV_32F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + cv::randu(src, 0, 100000); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::sqrt(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::sqrt(src, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// Log + +PERF_TEST_P(Sz_Depth, Log, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16S, CV_32F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + cv::randu(src, 0, 100000); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::log(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::log(src, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// Exp + +PERF_TEST_P(Sz_Depth, Exp, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16S, CV_32F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + cv::randu(src, 0, 10); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::exp(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::exp(src, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// Pow + +DEF_PARAM_TEST(Sz_Depth_Power, cv::Size, MatDepth, double); + +PERF_TEST_P(Sz_Depth_Power, Pow, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16S, CV_32F), + Values(0.3, 2.0, 2.4))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const double power = GET_PARAM(2); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::pow(d_src, power, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::pow(src, power, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// CompareMat + +CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE) + +DEF_PARAM_TEST(Sz_Depth_Code, cv::Size, MatDepth, CmpCode); + +PERF_TEST_P(Sz_Depth_Code, CompareMat, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH, + CmpCode::all())) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int cmp_code = GET_PARAM(2); + + cv::Mat src1(size, depth); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::compare(d_src1, d_src2, dst, cmp_code); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::compare(src1, src2, dst, cmp_code); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// CompareScalar + +PERF_TEST_P(Sz_Depth_Code, CompareScalar, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH, + CmpCode::all())) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int cmp_code = GET_PARAM(2); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + cv::Scalar s; + declare.in(s, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::compare(d_src, s, dst, cmp_code); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::compare(src, s, dst, cmp_code); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// BitwiseNot + +PERF_TEST_P(Sz_Depth, BitwiseNot, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::bitwise_not(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::bitwise_not(src, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// BitwiseAndMat + +PERF_TEST_P(Sz_Depth, BitwiseAndMat, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src1(size, depth); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::bitwise_and(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::bitwise_and(src1, src2, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// BitwiseAndScalar + +DEF_PARAM_TEST(Sz_Depth_Cn, cv::Size, MatDepth, MatCn); + +PERF_TEST_P(Sz_Depth_Cn, BitwiseAndScalar, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S), + CUDA_CHANNELS_1_3_4)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + + const int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + cv::Scalar s; + declare.in(s, WARMUP_RNG); + cv::Scalar_ is = s; + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::bitwise_and(d_src, is, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::bitwise_and(src, is, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// BitwiseOrMat + +PERF_TEST_P(Sz_Depth, BitwiseOrMat, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src1(size, depth); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::bitwise_or(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::bitwise_or(src1, src2, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// BitwiseOrScalar + +PERF_TEST_P(Sz_Depth_Cn, BitwiseOrScalar, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S), + CUDA_CHANNELS_1_3_4)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + + const int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + cv::Scalar s; + declare.in(s, WARMUP_RNG); + cv::Scalar_ is = s; + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::bitwise_or(d_src, is, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::bitwise_or(src, is, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// BitwiseXorMat + +PERF_TEST_P(Sz_Depth, BitwiseXorMat, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src1(size, depth); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::bitwise_xor(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::bitwise_xor(src1, src2, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// BitwiseXorScalar + +PERF_TEST_P(Sz_Depth_Cn, BitwiseXorScalar, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S), + CUDA_CHANNELS_1_3_4)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + + const int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + cv::Scalar s; + declare.in(s, WARMUP_RNG); + cv::Scalar_ is = s; + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::bitwise_xor(d_src, is, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::bitwise_xor(src, is, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// RShift + +PERF_TEST_P(Sz_Depth_Cn, RShift, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S), + CUDA_CHANNELS_1_3_4)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + + const int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + const cv::Scalar_ val = cv::Scalar_::all(4); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::rshift(d_src, val, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + FAIL_NO_CPU(); + } +} + +////////////////////////////////////////////////////////////////////// +// LShift + +PERF_TEST_P(Sz_Depth_Cn, LShift, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S), + CUDA_CHANNELS_1_3_4)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + + const int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + const cv::Scalar_ val = cv::Scalar_::all(4); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::lshift(d_src, val, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + FAIL_NO_CPU(); + } +} + +////////////////////////////////////////////////////////////////////// +// MinMat + +PERF_TEST_P(Sz_Depth, MinMat, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src1(size, depth); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::min(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::min(src1, src2, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// MinScalar + +PERF_TEST_P(Sz_Depth, MinScalar, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + cv::Scalar val; + declare.in(val, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::min(d_src, val[0], dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::min(src, val[0], dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// MaxMat + +PERF_TEST_P(Sz_Depth, MaxMat, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src1(size, depth); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::max(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::max(src1, src2, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// MaxScalar + +PERF_TEST_P(Sz_Depth, MaxScalar, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + cv::Scalar val; + declare.in(val, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::max(d_src, val[0], dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::max(src, val[0], dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// AddWeighted + +DEF_PARAM_TEST(Sz_3Depth, cv::Size, MatDepth, MatDepth, MatDepth); + +PERF_TEST_P(Sz_3Depth, AddWeighted, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F), + Values(CV_8U, CV_16U, CV_32F, CV_64F), + Values(CV_8U, CV_16U, CV_32F, CV_64F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth1 = GET_PARAM(1); + const int depth2 = GET_PARAM(2); + const int dst_depth = GET_PARAM(3); + + cv::Mat src1(size, depth1); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth2); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::addWeighted(d_src1, 0.5, d_src2, 0.5, 10.0, dst, dst_depth); + + CUDA_SANITY_CHECK(dst, 1e-10); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::addWeighted(src1, 0.5, src2, 0.5, 10.0, dst, dst_depth); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// MagnitudeComplex + +PERF_TEST_P(Sz, MagnitudeComplex, + CUDA_TYPICAL_MAT_SIZES) +{ + const cv::Size size = GetParam(); + + cv::Mat src(size, CV_32FC2); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::magnitude(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat xy[2]; + cv::split(src, xy); + + cv::Mat dst; + + TEST_CYCLE() cv::magnitude(xy[0], xy[1], dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// MagnitudeSqrComplex + +PERF_TEST_P(Sz, MagnitudeSqrComplex, + CUDA_TYPICAL_MAT_SIZES) +{ + const cv::Size size = GetParam(); + + cv::Mat src(size, CV_32FC2); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::magnitudeSqr(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + FAIL_NO_CPU(); + } +} + +////////////////////////////////////////////////////////////////////// +// Magnitude + +PERF_TEST_P(Sz, Magnitude, + CUDA_TYPICAL_MAT_SIZES) +{ + const cv::Size size = GetParam(); + + cv::Mat src1(size, CV_32FC1); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, CV_32FC1); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::magnitude(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::magnitude(src1, src2, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// MagnitudeSqr + +PERF_TEST_P(Sz, MagnitudeSqr, + CUDA_TYPICAL_MAT_SIZES) +{ + const cv::Size size = GetParam(); + + cv::Mat src1(size, CV_32FC1); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, CV_32FC1); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::magnitudeSqr(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + FAIL_NO_CPU(); + } +} + +////////////////////////////////////////////////////////////////////// +// Phase + +DEF_PARAM_TEST(Sz_AngleInDegrees, cv::Size, bool); + +PERF_TEST_P(Sz_AngleInDegrees, Phase, + Combine(CUDA_TYPICAL_MAT_SIZES, + Bool())) +{ + const cv::Size size = GET_PARAM(0); + const bool angleInDegrees = GET_PARAM(1); + + cv::Mat src1(size, CV_32FC1); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, CV_32FC1); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::phase(d_src1, d_src2, dst, angleInDegrees); + + CUDA_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::phase(src1, src2, dst, angleInDegrees); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// CartToPolar + +PERF_TEST_P(Sz_AngleInDegrees, CartToPolar, + Combine(CUDA_TYPICAL_MAT_SIZES, + Bool())) +{ + const cv::Size size = GET_PARAM(0); + const bool angleInDegrees = GET_PARAM(1); + + cv::Mat src1(size, CV_32FC1); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, CV_32FC1); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat magnitude; + cv::cuda::GpuMat angle; + + TEST_CYCLE() cv::cuda::cartToPolar(d_src1, d_src2, magnitude, angle, angleInDegrees); + + CUDA_SANITY_CHECK(magnitude); + CUDA_SANITY_CHECK(angle, 1e-6, ERROR_RELATIVE); + } + else + { + cv::Mat magnitude; + cv::Mat angle; + + TEST_CYCLE() cv::cartToPolar(src1, src2, magnitude, angle, angleInDegrees); + + CPU_SANITY_CHECK(magnitude); + CPU_SANITY_CHECK(angle); + } +} + +////////////////////////////////////////////////////////////////////// +// PolarToCart + +PERF_TEST_P(Sz_AngleInDegrees, PolarToCart, + Combine(CUDA_TYPICAL_MAT_SIZES, + Bool())) +{ + const cv::Size size = GET_PARAM(0); + const bool angleInDegrees = GET_PARAM(1); + + cv::Mat magnitude(size, CV_32FC1); + declare.in(magnitude, WARMUP_RNG); + + cv::Mat angle(size, CV_32FC1); + declare.in(angle, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_magnitude(magnitude); + const cv::cuda::GpuMat d_angle(angle); + cv::cuda::GpuMat x; + cv::cuda::GpuMat y; + + TEST_CYCLE() cv::cuda::polarToCart(d_magnitude, d_angle, x, y, angleInDegrees); + + CUDA_SANITY_CHECK(x); + CUDA_SANITY_CHECK(y); + } + else + { + cv::Mat x; + cv::Mat y; + + TEST_CYCLE() cv::polarToCart(magnitude, angle, x, y, angleInDegrees); + + CPU_SANITY_CHECK(x); + CPU_SANITY_CHECK(y); + } +} + +////////////////////////////////////////////////////////////////////// +// Threshold + +CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV) + +DEF_PARAM_TEST(Sz_Depth_Op, cv::Size, MatDepth, ThreshOp); + +PERF_TEST_P(Sz_Depth_Op, Threshold, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F), + ThreshOp::all())) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int threshOp = GET_PARAM(2); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::threshold(d_src, dst, 100.0, 255.0, threshOp); + + CUDA_SANITY_CHECK(dst, 1e-10); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::threshold(src, dst, 100.0, 255.0, threshOp); + + CPU_SANITY_CHECK(dst); + } +} + +}} // namespace diff --git a/modules/cudaarithm/perf/perf_main.cpp b/modules/cudaarithm/perf/perf_main.cpp new file mode 100644 index 00000000000..118d7596ac2 --- /dev/null +++ b/modules/cudaarithm/perf/perf_main.cpp @@ -0,0 +1,47 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "perf_precomp.hpp" + +using namespace perf; + +CV_PERF_TEST_CUDA_MAIN(cudaarithm) diff --git a/modules/cudaarithm/perf/perf_precomp.hpp b/modules/cudaarithm/perf/perf_precomp.hpp new file mode 100644 index 00000000000..071ac946537 --- /dev/null +++ b/modules/cudaarithm/perf/perf_precomp.hpp @@ -0,0 +1,55 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#ifndef __OPENCV_PERF_PRECOMP_HPP__ +#define __OPENCV_PERF_PRECOMP_HPP__ + +#include "opencv2/ts.hpp" +#include "opencv2/ts/cuda_perf.hpp" + +#include "opencv2/cudaarithm.hpp" + +namespace opencv_test { +using namespace perf; +using namespace testing; +} + +#endif diff --git a/modules/cudaarithm/perf/perf_reductions.cpp b/modules/cudaarithm/perf/perf_reductions.cpp new file mode 100644 index 00000000000..71bb5524a63 --- /dev/null +++ b/modules/cudaarithm/perf/perf_reductions.cpp @@ -0,0 +1,520 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "perf_precomp.hpp" + +namespace opencv_test { namespace { + +////////////////////////////////////////////////////////////////////// +// Norm + +DEF_PARAM_TEST(Sz_Depth_Norm, cv::Size, MatDepth, NormType); + +PERF_TEST_P(Sz_Depth_Norm, Norm, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S, CV_32F), + Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2)))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int normType = GET_PARAM(2); + + cv::Mat src(size, depth); + if (depth == CV_8U) + cv::randu(src, 0, 254); + else + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat d_buf; + double gpu_dst; + + TEST_CYCLE() gpu_dst = cv::cuda::norm(d_src, normType, d_buf); + + SANITY_CHECK(gpu_dst, 1e-6, ERROR_RELATIVE); + } + else + { + double cpu_dst; + + TEST_CYCLE() cpu_dst = cv::norm(src, normType); + + SANITY_CHECK(cpu_dst, 1e-6, ERROR_RELATIVE); + } +} + +////////////////////////////////////////////////////////////////////// +// NormDiff + +DEF_PARAM_TEST(Sz_Norm, cv::Size, NormType); + +PERF_TEST_P(Sz_Norm, NormDiff, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2)))) +{ + const cv::Size size = GET_PARAM(0); + const int normType = GET_PARAM(1); + + cv::Mat src1(size, CV_8UC1); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, CV_8UC1); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + double gpu_dst; + + TEST_CYCLE() gpu_dst = cv::cuda::norm(d_src1, d_src2, normType); + + SANITY_CHECK(gpu_dst); + + } + else + { + double cpu_dst; + + TEST_CYCLE() cpu_dst = cv::norm(src1, src2, normType); + + SANITY_CHECK(cpu_dst); + } +} + +////////////////////////////////////////////////////////////////////// +// Sum + +DEF_PARAM_TEST(Sz_Depth_Cn, cv::Size, MatDepth, MatCn); + +PERF_TEST_P(Sz_Depth_Cn, Sum, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + CUDA_CHANNELS_1_3_4)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + + const int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::Scalar gpu_dst; + + TEST_CYCLE() gpu_dst = cv::cuda::sum(d_src); + + SANITY_CHECK(gpu_dst, 1e-5, ERROR_RELATIVE); + } + else + { + cv::Scalar cpu_dst; + + TEST_CYCLE() cpu_dst = cv::sum(src); + + SANITY_CHECK(cpu_dst, 1e-6, ERROR_RELATIVE); + } +} + +////////////////////////////////////////////////////////////////////// +// SumAbs + +PERF_TEST_P(Sz_Depth_Cn, SumAbs, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + CUDA_CHANNELS_1_3_4)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + + const int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::Scalar gpu_dst; + + TEST_CYCLE() gpu_dst = cv::cuda::absSum(d_src); + + SANITY_CHECK(gpu_dst, 1e-6, ERROR_RELATIVE); + } + else + { + FAIL_NO_CPU(); + } +} + +////////////////////////////////////////////////////////////////////// +// SumSqr + +PERF_TEST_P(Sz_Depth_Cn, SumSqr, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + CUDA_CHANNELS_1_3_4)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + + const int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::Scalar gpu_dst; + + TEST_CYCLE() gpu_dst = cv::cuda::sqrSum(d_src); + + SANITY_CHECK(gpu_dst, 1e-6, ERROR_RELATIVE); + } + else + { + FAIL_NO_CPU(); + } +} + +////////////////////////////////////////////////////////////////////// +// MinMax + +DEF_PARAM_TEST(Sz_Depth, cv::Size, MatDepth); + +PERF_TEST_P(Sz_Depth, MinMax, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + if (depth == CV_8U) + cv::randu(src, 0, 254); + else + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + double gpu_minVal, gpu_maxVal; + + TEST_CYCLE() cv::cuda::minMax(d_src, &gpu_minVal, &gpu_maxVal, cv::cuda::GpuMat()); + + SANITY_CHECK(gpu_minVal, 1e-10); + SANITY_CHECK(gpu_maxVal, 1e-10); + } + else + { + double cpu_minVal, cpu_maxVal; + + TEST_CYCLE() cv::minMaxLoc(src, &cpu_minVal, &cpu_maxVal); + + SANITY_CHECK(cpu_minVal); + SANITY_CHECK(cpu_maxVal); + } +} + +////////////////////////////////////////////////////////////////////// +// MinMaxLoc + +PERF_TEST_P(Sz_Depth, MinMaxLoc, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + if (depth == CV_8U) + cv::randu(src, 0, 254); + else + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + double gpu_minVal, gpu_maxVal; + cv::Point gpu_minLoc, gpu_maxLoc; + + TEST_CYCLE() cv::cuda::minMaxLoc(d_src, &gpu_minVal, &gpu_maxVal, &gpu_minLoc, &gpu_maxLoc); + + SANITY_CHECK(gpu_minVal, 1e-10); + SANITY_CHECK(gpu_maxVal, 1e-10); + } + else + { + double cpu_minVal, cpu_maxVal; + cv::Point cpu_minLoc, cpu_maxLoc; + + TEST_CYCLE() cv::minMaxLoc(src, &cpu_minVal, &cpu_maxVal, &cpu_minLoc, &cpu_maxLoc); + + SANITY_CHECK(cpu_minVal); + SANITY_CHECK(cpu_maxVal); + } +} + +////////////////////////////////////////////////////////////////////// +// CountNonZero + +PERF_TEST_P(Sz_Depth, CountNonZero, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + int gpu_dst = 0; + + TEST_CYCLE() gpu_dst = cv::cuda::countNonZero(d_src); + + SANITY_CHECK(gpu_dst); + } + else + { + int cpu_dst = 0; + + TEST_CYCLE() cpu_dst = cv::countNonZero(src); + + SANITY_CHECK(cpu_dst); + } +} + +////////////////////////////////////////////////////////////////////// +// Reduce + +CV_ENUM(ReduceCode, REDUCE_SUM, REDUCE_AVG, REDUCE_MAX, REDUCE_MIN) + +enum {Rows = 0, Cols = 1}; +CV_ENUM(ReduceDim, Rows, Cols) + +DEF_PARAM_TEST(Sz_Depth_Cn_Code_Dim, cv::Size, MatDepth, MatCn, ReduceCode, ReduceDim); + +PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Reduce, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_16S, CV_32F), + Values(1, 2, 3, 4), + ReduceCode::all(), + ReduceDim::all())) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + const int reduceOp = GET_PARAM(3); + const int dim = GET_PARAM(4); + + const int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::reduce(d_src, dst, dim, reduceOp, CV_32F); + + dst = dst.reshape(dst.channels(), 1); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::reduce(src, dst, dim, reduceOp, CV_32F); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// Normalize + +DEF_PARAM_TEST(Sz_Depth_NormType, cv::Size, MatDepth, NormType); + +PERF_TEST_P(Sz_Depth_NormType, Normalize, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F), + Values(NormType(cv::NORM_INF), + NormType(cv::NORM_L1), + NormType(cv::NORM_L2), + NormType(cv::NORM_MINMAX)))) +{ + const cv::Size size = GET_PARAM(0); + const int type = GET_PARAM(1); + const int norm_type = GET_PARAM(2); + + const double alpha = 1; + const double beta = 0; + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::normalize(d_src, dst, alpha, beta, norm_type, type, cv::cuda::GpuMat()); + + CUDA_SANITY_CHECK(dst, 1e-6); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::normalize(src, dst, alpha, beta, norm_type, type); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// MeanStdDev + +PERF_TEST_P(Sz, MeanStdDev, + CUDA_TYPICAL_MAT_SIZES) +{ + const cv::Size size = GetParam(); + + cv::Mat src(size, CV_8UC1); + declare.in(src, WARMUP_RNG); + + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::Scalar gpu_mean; + cv::Scalar gpu_stddev; + + TEST_CYCLE() cv::cuda::meanStdDev(d_src, gpu_mean, gpu_stddev); + + SANITY_CHECK(gpu_mean); + SANITY_CHECK(gpu_stddev); + } + else + { + cv::Scalar cpu_mean; + cv::Scalar cpu_stddev; + + TEST_CYCLE() cv::meanStdDev(src, cpu_mean, cpu_stddev); + + SANITY_CHECK(cpu_mean); + SANITY_CHECK(cpu_stddev); + } +} + +////////////////////////////////////////////////////////////////////// +// Integral + +PERF_TEST_P(Sz, Integral, + CUDA_TYPICAL_MAT_SIZES) +{ + const cv::Size size = GetParam(); + + cv::Mat src(size, CV_8UC1); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::integral(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::integral(src, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// IntegralSqr + +PERF_TEST_P(Sz, IntegralSqr, + CUDA_TYPICAL_MAT_SIZES) +{ + const cv::Size size = GetParam(); + + cv::Mat src(size, CV_8UC1); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::sqrIntegral(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + FAIL_NO_CPU(); + } +} + +}} // namespace diff --git a/modules/cudaarithm/src/arithm.cpp b/modules/cudaarithm/src/arithm.cpp new file mode 100644 index 00000000000..381580cff43 --- /dev/null +++ b/modules/cudaarithm/src/arithm.cpp @@ -0,0 +1,582 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +using namespace cv; +using namespace cv::cuda; + +#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) + +void cv::cuda::gemm(InputArray, InputArray, double, InputArray, double, OutputArray, int, Stream&) { throw_no_cuda(); } + +void cv::cuda::mulSpectrums(InputArray, InputArray, OutputArray, int, bool, Stream&) { throw_no_cuda(); } +void cv::cuda::mulAndScaleSpectrums(InputArray, InputArray, OutputArray, int, float, bool, Stream&) { throw_no_cuda(); } + +void cv::cuda::dft(InputArray, OutputArray, Size, int, Stream&) { throw_no_cuda(); } + +Ptr cv::cuda::createConvolution(Size) { throw_no_cuda(); return Ptr(); } + +#else /* !defined (HAVE_CUDA) */ + +namespace +{ + #define error_entry(entry) { entry, #entry } + + struct ErrorEntry + { + int code; + const char* str; + }; + + struct ErrorEntryComparer + { + int code; + ErrorEntryComparer(int code_) : code(code_) {} + bool operator()(const ErrorEntry& e) const { return e.code == code; } + }; + + String getErrorString(int code, const ErrorEntry* errors, size_t n) + { + size_t idx = std::find_if(errors, errors + n, ErrorEntryComparer(code)) - errors; + + const char* msg = (idx != n) ? errors[idx].str : "Unknown error code"; + String str = cv::format("%s [Code = %d]", msg, code); + + return str; + } +} + +#ifdef HAVE_CUBLAS + namespace + { + const ErrorEntry cublas_errors[] = + { + error_entry( CUBLAS_STATUS_SUCCESS ), + error_entry( CUBLAS_STATUS_NOT_INITIALIZED ), + error_entry( CUBLAS_STATUS_ALLOC_FAILED ), + error_entry( CUBLAS_STATUS_INVALID_VALUE ), + error_entry( CUBLAS_STATUS_ARCH_MISMATCH ), + error_entry( CUBLAS_STATUS_MAPPING_ERROR ), + error_entry( CUBLAS_STATUS_EXECUTION_FAILED ), + error_entry( CUBLAS_STATUS_INTERNAL_ERROR ) + }; + + const size_t cublas_error_num = sizeof(cublas_errors) / sizeof(cublas_errors[0]); + + static inline void ___cublasSafeCall(cublasStatus_t err, const char* file, const int line, const char* func) + { + if (CUBLAS_STATUS_SUCCESS != err) + { + String msg = getErrorString(err, cublas_errors, cublas_error_num); + cv::error(cv::Error::GpuApiCallError, msg, func, file, line); + } + } + } + + #define cublasSafeCall(expr) ___cublasSafeCall(expr, __FILE__, __LINE__, CV_Func) +#endif // HAVE_CUBLAS + +#ifdef HAVE_CUFFT + namespace + { + ////////////////////////////////////////////////////////////////////////// + // CUFFT errors + + const ErrorEntry cufft_errors[] = + { + error_entry( CUFFT_INVALID_PLAN ), + error_entry( CUFFT_ALLOC_FAILED ), + error_entry( CUFFT_INVALID_TYPE ), + error_entry( CUFFT_INVALID_VALUE ), + error_entry( CUFFT_INTERNAL_ERROR ), + error_entry( CUFFT_EXEC_FAILED ), + error_entry( CUFFT_SETUP_FAILED ), + error_entry( CUFFT_INVALID_SIZE ), + error_entry( CUFFT_UNALIGNED_DATA ) + }; + + const int cufft_error_num = sizeof(cufft_errors) / sizeof(cufft_errors[0]); + + void ___cufftSafeCall(int err, const char* file, const int line, const char* func) + { + if (CUFFT_SUCCESS != err) + { + String msg = getErrorString(err, cufft_errors, cufft_error_num); + cv::error(cv::Error::GpuApiCallError, msg, func, file, line); + } + } + } + + #define cufftSafeCall(expr) ___cufftSafeCall(expr, __FILE__, __LINE__, CV_Func) + +#endif + +//////////////////////////////////////////////////////////////////////// +// gemm + +void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray _src3, double beta, OutputArray _dst, int flags, Stream& stream) +{ +#ifndef HAVE_CUBLAS + CV_UNUSED(_src1); + CV_UNUSED(_src2); + CV_UNUSED(alpha); + CV_UNUSED(_src3); + CV_UNUSED(beta); + CV_UNUSED(_dst); + CV_UNUSED(flags); + CV_UNUSED(stream); + CV_Error(Error::StsNotImplemented, "The library was build without CUBLAS"); +#else + // CUBLAS works with column-major matrices + + GpuMat src1 = getInputMat(_src1, stream); + GpuMat src2 = getInputMat(_src2, stream); + GpuMat src3 = getInputMat(_src3, stream); + + CV_Assert( src1.type() == CV_32FC1 || src1.type() == CV_32FC2 || src1.type() == CV_64FC1 || src1.type() == CV_64FC2 ); + CV_Assert( src2.type() == src1.type() && (src3.empty() || src3.type() == src1.type()) ); + + if (src1.depth() == CV_64F) + { + if (!deviceSupports(NATIVE_DOUBLE)) + CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); + } + + bool tr1 = (flags & GEMM_1_T) != 0; + bool tr2 = (flags & GEMM_2_T) != 0; + bool tr3 = (flags & GEMM_3_T) != 0; + + if (src1.type() == CV_64FC2) + { + if (tr1 || tr2 || tr3) + CV_Error(cv::Error::StsNotImplemented, "transpose operation doesn't implemented for CV_64FC2 type"); + } + + Size src1Size = tr1 ? Size(src1.rows, src1.cols) : src1.size(); + Size src2Size = tr2 ? Size(src2.rows, src2.cols) : src2.size(); + Size src3Size = tr3 ? Size(src3.rows, src3.cols) : src3.size(); + Size dstSize(src2Size.width, src1Size.height); + + CV_Assert( src1Size.width == src2Size.height ); + CV_Assert( src3.empty() || src3Size == dstSize ); + + GpuMat dst = getOutputMat(_dst, dstSize, src1.type(), stream); + + if (beta != 0) + { + if (src3.empty()) + { + dst.setTo(Scalar::all(0), stream); + } + else + { + if (tr3) + { + cuda::transpose(src3, dst, stream); + } + else + { + src3.copyTo(dst, stream); + } + } + } + + cublasHandle_t handle; + cublasSafeCall( cublasCreate_v2(&handle) ); + + cublasSafeCall( cublasSetStream_v2(handle, StreamAccessor::getStream(stream)) ); + + cublasSafeCall( cublasSetPointerMode_v2(handle, CUBLAS_POINTER_MODE_HOST) ); + + const float alphaf = static_cast(alpha); + const float betaf = static_cast(beta); + + const cuComplex alphacf = make_cuComplex(alphaf, 0); + const cuComplex betacf = make_cuComplex(betaf, 0); + + const cuDoubleComplex alphac = make_cuDoubleComplex(alpha, 0); + const cuDoubleComplex betac = make_cuDoubleComplex(beta, 0); + + cublasOperation_t transa = tr2 ? CUBLAS_OP_T : CUBLAS_OP_N; + cublasOperation_t transb = tr1 ? CUBLAS_OP_T : CUBLAS_OP_N; + + switch (src1.type()) + { + case CV_32FC1: + cublasSafeCall( cublasSgemm_v2(handle, transa, transb, tr2 ? src2.rows : src2.cols, tr1 ? src1.cols : src1.rows, tr2 ? src2.cols : src2.rows, + &alphaf, + src2.ptr(), static_cast(src2.step / sizeof(float)), + src1.ptr(), static_cast(src1.step / sizeof(float)), + &betaf, + dst.ptr(), static_cast(dst.step / sizeof(float))) ); + break; + + case CV_64FC1: + cublasSafeCall( cublasDgemm_v2(handle, transa, transb, tr2 ? src2.rows : src2.cols, tr1 ? src1.cols : src1.rows, tr2 ? src2.cols : src2.rows, + &alpha, + src2.ptr(), static_cast(src2.step / sizeof(double)), + src1.ptr(), static_cast(src1.step / sizeof(double)), + &beta, + dst.ptr(), static_cast(dst.step / sizeof(double))) ); + break; + + case CV_32FC2: + cublasSafeCall( cublasCgemm_v2(handle, transa, transb, tr2 ? src2.rows : src2.cols, tr1 ? src1.cols : src1.rows, tr2 ? src2.cols : src2.rows, + &alphacf, + src2.ptr(), static_cast(src2.step / sizeof(cuComplex)), + src1.ptr(), static_cast(src1.step / sizeof(cuComplex)), + &betacf, + dst.ptr(), static_cast(dst.step / sizeof(cuComplex))) ); + break; + + case CV_64FC2: + cublasSafeCall( cublasZgemm_v2(handle, transa, transb, tr2 ? src2.rows : src2.cols, tr1 ? src1.cols : src1.rows, tr2 ? src2.cols : src2.rows, + &alphac, + src2.ptr(), static_cast(src2.step / sizeof(cuDoubleComplex)), + src1.ptr(), static_cast(src1.step / sizeof(cuDoubleComplex)), + &betac, + dst.ptr(), static_cast(dst.step / sizeof(cuDoubleComplex))) ); + break; + } + + cublasSafeCall( cublasDestroy_v2(handle) ); + + syncOutput(dst, _dst, stream); +#endif +} + +////////////////////////////////////////////////////////////////////////////// +// DFT function + +void cv::cuda::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags, Stream& stream) +{ + if (getInputMat(_src, stream).channels() == 2) + flags |= DFT_COMPLEX_INPUT; + + Ptr dft = createDFT(dft_size, flags); + dft->compute(_src, _dst, stream); +} + +////////////////////////////////////////////////////////////////////////////// +// DFT algorithm + +#ifdef HAVE_CUFFT + +namespace +{ + + class DFTImpl : public DFT + { + Size dft_size, dft_size_opt; + bool is_1d_input, is_row_dft, is_scaled_dft, is_inverse, is_complex_input, is_complex_output; + + cufftType dft_type; + cufftHandle plan; + + public: + DFTImpl(Size dft_size, int flags) + : dft_size(dft_size), + dft_size_opt(dft_size), + is_1d_input((dft_size.height == 1) || (dft_size.width == 1)), + is_row_dft((flags & DFT_ROWS) != 0), + is_scaled_dft((flags & DFT_SCALE) != 0), + is_inverse((flags & DFT_INVERSE) != 0), + is_complex_input((flags & DFT_COMPLEX_INPUT) != 0), + is_complex_output(!(flags & DFT_REAL_OUTPUT)), + dft_type(!is_complex_input ? CUFFT_R2C : (is_complex_output ? CUFFT_C2C : CUFFT_C2R)) + { + // We don't support unpacked output (in the case of real input) + CV_Assert( !(flags & DFT_COMPLEX_OUTPUT) ); + + // We don't support real-to-real transform + CV_Assert( is_complex_input || is_complex_output ); + + if (is_1d_input && !is_row_dft) + { + // If the source matrix is single column handle it as single row + dft_size_opt.width = std::max(dft_size.width, dft_size.height); + dft_size_opt.height = std::min(dft_size.width, dft_size.height); + } + + CV_Assert( dft_size_opt.width > 1 ); + + if (is_1d_input || is_row_dft) + cufftSafeCall( cufftPlan1d(&plan, dft_size_opt.width, dft_type, dft_size_opt.height) ); + else + cufftSafeCall( cufftPlan2d(&plan, dft_size_opt.height, dft_size_opt.width, dft_type) ); + } + + ~DFTImpl() + { + cufftSafeCall( cufftDestroy(plan) ); + } + + void compute(InputArray _src, OutputArray _dst, Stream& stream) + { + GpuMat src = getInputMat(_src, stream); + + CV_Assert( src.type() == CV_32FC1 || src.type() == CV_32FC2 ); + CV_Assert( is_complex_input == (src.channels() == 2) ); + + // Make sure here we work with the continuous input, + // as CUFFT can't handle gaps + GpuMat src_cont; + if (src.isContinuous()) + { + src_cont = src; + } + else + { + BufferPool pool(stream); + src_cont.allocator = pool.getAllocator(); + createContinuous(src.rows, src.cols, src.type(), src_cont); + src.copyTo(src_cont, stream); + } + + cufftSafeCall( cufftSetStream(plan, StreamAccessor::getStream(stream)) ); + + if (is_complex_input) + { + if (is_complex_output) + { + createContinuous(dft_size, CV_32FC2, _dst); + GpuMat dst = _dst.getGpuMat(); + + cufftSafeCall(cufftExecC2C( + plan, src_cont.ptr(), dst.ptr(), + is_inverse ? CUFFT_INVERSE : CUFFT_FORWARD)); + } + else + { + createContinuous(dft_size, CV_32F, _dst); + GpuMat dst = _dst.getGpuMat(); + + cufftSafeCall(cufftExecC2R( + plan, src_cont.ptr(), dst.ptr())); + } + } + else + { + // We could swap dft_size for efficiency. Here we must reflect it + if (dft_size == dft_size_opt) + createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, _dst); + else + createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, _dst); + + GpuMat dst = _dst.getGpuMat(); + + cufftSafeCall(cufftExecR2C( + plan, src_cont.ptr(), dst.ptr())); + } + + if (is_scaled_dft) + cuda::multiply(_dst, Scalar::all(1. / dft_size.area()), _dst, 1, -1, stream); + } + }; +} + +#endif + +Ptr cv::cuda::createDFT(Size dft_size, int flags) +{ +#ifndef HAVE_CUFFT + CV_UNUSED(dft_size); + CV_UNUSED(flags); + CV_Error(Error::StsNotImplemented, "The library was build without CUFFT"); + return Ptr(); +#else + return makePtr(dft_size, flags); +#endif +} + +////////////////////////////////////////////////////////////////////////////// +// Convolution + +#ifdef HAVE_CUFFT + +namespace +{ + class ConvolutionImpl : public Convolution + { + public: + explicit ConvolutionImpl(Size user_block_size_) : user_block_size(user_block_size_) {} + + void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr = false, Stream& stream = Stream::Null()); + + private: + void create(Size image_size, Size templ_size); + static Size estimateBlockSize(Size result_size); + + Size result_size; + Size block_size; + Size user_block_size; + Size dft_size; + + GpuMat image_spect, templ_spect, result_spect; + GpuMat image_block, templ_block, result_data; + }; + + void ConvolutionImpl::create(Size image_size, Size templ_size) + { + result_size = Size(image_size.width - templ_size.width + 1, + image_size.height - templ_size.height + 1); + + block_size = user_block_size; + if (user_block_size.width == 0 || user_block_size.height == 0) + block_size = estimateBlockSize(result_size); + + dft_size.width = 1 << int(ceil(std::log(block_size.width + templ_size.width - 1.) / std::log(2.))); + dft_size.height = 1 << int(ceil(std::log(block_size.height + templ_size.height - 1.) / std::log(2.))); + + // CUFFT has hard-coded kernels for power-of-2 sizes (up to 8192), + // see CUDA Toolkit 4.1 CUFFT Library Programming Guide + if (dft_size.width > 8192) + dft_size.width = getOptimalDFTSize(block_size.width + templ_size.width - 1); + if (dft_size.height > 8192) + dft_size.height = getOptimalDFTSize(block_size.height + templ_size.height - 1); + + // To avoid wasting time doing small DFTs + dft_size.width = std::max(dft_size.width, 512); + dft_size.height = std::max(dft_size.height, 512); + + createContinuous(dft_size, CV_32F, image_block); + createContinuous(dft_size, CV_32F, templ_block); + createContinuous(dft_size, CV_32F, result_data); + + int spect_len = dft_size.height * (dft_size.width / 2 + 1); + createContinuous(1, spect_len, CV_32FC2, image_spect); + createContinuous(1, spect_len, CV_32FC2, templ_spect); + createContinuous(1, spect_len, CV_32FC2, result_spect); + + // Use maximum result matrix block size for the estimated DFT block size + block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width); + block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height); + } + + Size ConvolutionImpl::estimateBlockSize(Size result_size) + { + int width = (result_size.width + 2) / 3; + int height = (result_size.height + 2) / 3; + width = std::min(width, result_size.width); + height = std::min(height, result_size.height); + return Size(width, height); + } + + void ConvolutionImpl::convolve(InputArray _image, InputArray _templ, OutputArray _result, bool ccorr, Stream& _stream) + { + GpuMat image = getInputMat(_image, _stream); + GpuMat templ = getInputMat(_templ, _stream); + + CV_Assert( image.type() == CV_32FC1 ); + CV_Assert( templ.type() == CV_32FC1 ); + + create(image.size(), templ.size()); + + GpuMat result = getOutputMat(_result, result_size, CV_32FC1, _stream); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + + cufftHandle planR2C, planC2R; + cufftSafeCall( cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R) ); + cufftSafeCall( cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C) ); + + cufftSafeCall( cufftSetStream(planR2C, stream) ); + cufftSafeCall( cufftSetStream(planC2R, stream) ); + + GpuMat templ_roi(templ.size(), CV_32FC1, templ.data, templ.step); + cuda::copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0, + templ_block.cols - templ_roi.cols, 0, Scalar(), _stream); + + cufftSafeCall( cufftExecR2C(planR2C, templ_block.ptr(), templ_spect.ptr()) ); + + // Process all blocks of the result matrix + for (int y = 0; y < result.rows; y += block_size.height) + { + for (int x = 0; x < result.cols; x += block_size.width) + { + Size image_roi_size(std::min(x + dft_size.width, image.cols) - x, + std::min(y + dft_size.height, image.rows) - y); + GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr(y) + x), + image.step); + cuda::copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows, + 0, image_block.cols - image_roi.cols, 0, Scalar(), _stream); + + cufftSafeCall(cufftExecR2C(planR2C, image_block.ptr(), + image_spect.ptr())); + cuda::mulAndScaleSpectrums(image_spect, templ_spect, result_spect, 0, + 1.f / dft_size.area(), ccorr, _stream); + cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr(), + result_data.ptr())); + + Size result_roi_size(std::min(x + block_size.width, result.cols) - x, + std::min(y + block_size.height, result.rows) - y); + GpuMat result_roi(result_roi_size, result.type(), + (void*)(result.ptr(y) + x), result.step); + GpuMat result_block(result_roi_size, result_data.type(), + result_data.ptr(), result_data.step); + + result_block.copyTo(result_roi, _stream); + } + } + + cufftSafeCall( cufftDestroy(planR2C) ); + cufftSafeCall( cufftDestroy(planC2R) ); + + syncOutput(result, _result, _stream); + } +} + +#endif + +Ptr cv::cuda::createConvolution(Size user_block_size) +{ +#ifndef HAVE_CUFFT + CV_UNUSED(user_block_size); + CV_Error(Error::StsNotImplemented, "The library was build without CUFFT"); + return Ptr(); +#else + return makePtr(user_block_size); +#endif +} + +#endif /* !defined (HAVE_CUDA) */ diff --git a/modules/cudaarithm/src/core.cpp b/modules/cudaarithm/src/core.cpp new file mode 100644 index 00000000000..7dd51f97816 --- /dev/null +++ b/modules/cudaarithm/src/core.cpp @@ -0,0 +1,135 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +using namespace cv; +using namespace cv::cuda; + +#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) + +void cv::cuda::merge(const GpuMat*, size_t, OutputArray, Stream&) { throw_no_cuda(); } +void cv::cuda::merge(const std::vector&, OutputArray, Stream&) { throw_no_cuda(); } + +void cv::cuda::split(InputArray, GpuMat*, Stream&) { throw_no_cuda(); } +void cv::cuda::split(InputArray, std::vector&, Stream&) { throw_no_cuda(); } + +void cv::cuda::transpose(InputArray, OutputArray, Stream&) { throw_no_cuda(); } + +void cv::cuda::flip(InputArray, OutputArray, int, Stream&) { throw_no_cuda(); } + +Ptr cv::cuda::createLookUpTable(InputArray) { throw_no_cuda(); return Ptr(); } + +void cv::cuda::copyMakeBorder(InputArray, OutputArray, int, int, int, int, int, Scalar, Stream&) { throw_no_cuda(); } + +#else /* !defined (HAVE_CUDA) */ + +//////////////////////////////////////////////////////////////////////// +// flip + +namespace +{ + template struct NppTypeTraits; + template<> struct NppTypeTraits { typedef Npp8u npp_t; }; + template<> struct NppTypeTraits { typedef Npp8s npp_t; }; + template<> struct NppTypeTraits { typedef Npp16u npp_t; }; + template<> struct NppTypeTraits { typedef Npp16s npp_t; }; + template<> struct NppTypeTraits { typedef Npp32s npp_t; }; + template<> struct NppTypeTraits { typedef Npp32f npp_t; }; + template<> struct NppTypeTraits { typedef Npp64f npp_t; }; + + template struct NppMirrorFunc + { + typedef typename NppTypeTraits::npp_t npp_t; + + typedef NppStatus (*func_t)(const npp_t* pSrc, int nSrcStep, npp_t* pDst, int nDstStep, NppiSize oROI, NppiAxis flip); + }; + + template ::func_t func> struct NppMirror + { + typedef typename NppMirrorFunc::npp_t npp_t; + + static void call(const GpuMat& src, GpuMat& dst, int flipCode, cudaStream_t stream) + { + NppStreamHandler h(stream); + + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + + nppSafeCall( func(src.ptr(), static_cast(src.step), + dst.ptr(), static_cast(dst.step), sz, + (flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; +} + +void cv::cuda::flip(InputArray _src, OutputArray _dst, int flipCode, Stream& stream) +{ + typedef void (*func_t)(const GpuMat& src, GpuMat& dst, int flipCode, cudaStream_t stream); + static const func_t funcs[6][4] = + { + {NppMirror::call, 0, NppMirror::call, NppMirror::call}, + {0,0,0,0}, + {NppMirror::call, 0, NppMirror::call, NppMirror::call}, + {0,0,0,0}, + {NppMirror::call, 0, NppMirror::call, NppMirror::call}, + {NppMirror::call, 0, NppMirror::call, NppMirror::call} + }; + + GpuMat src = getInputMat(_src, stream); + + CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F); + CV_Assert(src.channels() == 1 || src.channels() == 3 || src.channels() == 4); + + _dst.create(src.size(), src.type()); + GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream); + + funcs[src.depth()][src.channels() - 1](src, dst, flipCode, StreamAccessor::getStream(stream)); + + syncOutput(dst, _dst, stream); +} + +#endif /* !defined (HAVE_CUDA) */ diff --git a/modules/cudaarithm/src/cuda/absdiff_mat.cu b/modules/cudaarithm/src/cuda/absdiff_mat.cu new file mode 100644 index 00000000000..ec04f122845 --- /dev/null +++ b/modules/cudaarithm/src/cuda/absdiff_mat.cu @@ -0,0 +1,188 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/opencv_modules.hpp" + +#ifndef HAVE_OPENCV_CUDEV + +#error "opencv_cudev is required" + +#else + +#include "opencv2/cudev.hpp" + +using namespace cv::cudev; + +void absDiffMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int); + +namespace +{ + __device__ __forceinline__ int _abs(int a) + { + return ::abs(a); + } + __device__ __forceinline__ float _abs(float a) + { + return ::fabsf(a); + } + __device__ __forceinline__ double _abs(double a) + { + return ::fabs(a); + } + + template struct AbsDiffOp1 : binary_function + { + __device__ __forceinline__ T operator ()(T a, T b) const + { + return saturate_cast(_abs(a - b)); + } + }; + + template struct TransformPolicy : DefaultTransformPolicy + { + }; + template <> struct TransformPolicy : DefaultTransformPolicy + { + enum { + shift = 1 + }; + }; + + template + void absDiffMat_v1(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) + { + gridTransformBinary_< TransformPolicy >(globPtr(src1), globPtr(src2), globPtr(dst), AbsDiffOp1(), stream); + } + + struct AbsDiffOp2 : binary_function + { + __device__ __forceinline__ uint operator ()(uint a, uint b) const + { + return vabsdiff2(a, b); + } + }; + + void absDiffMat_v2(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) + { + const int vcols = src1.cols >> 1; + + GlobPtrSz src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols); + GlobPtrSz src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols); + GlobPtrSz dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols); + + gridTransformBinary(src1_, src2_, dst_, AbsDiffOp2(), stream); + } + + struct AbsDiffOp4 : binary_function + { + __device__ __forceinline__ uint operator ()(uint a, uint b) const + { + return vabsdiff4(a, b); + } + }; + + void absDiffMat_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) + { + const int vcols = src1.cols >> 2; + + GlobPtrSz src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols); + GlobPtrSz src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols); + GlobPtrSz dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols); + + gridTransformBinary(src1_, src2_, dst_, AbsDiffOp4(), stream); + } +} + +void absDiffMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int) +{ + typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream); + static const func_t funcs[] = + { + absDiffMat_v1, + absDiffMat_v1, + absDiffMat_v1, + absDiffMat_v1, + absDiffMat_v1, + absDiffMat_v1, + absDiffMat_v1 + }; + + const int depth = src1.depth(); + + CV_DbgAssert( depth <= CV_64F ); + + GpuMat src1_ = src1.reshape(1); + GpuMat src2_ = src2.reshape(1); + GpuMat dst_ = dst.reshape(1); + + if (depth == CV_8U || depth == CV_16U) + { + const intptr_t src1ptr = reinterpret_cast(src1_.data); + const intptr_t src2ptr = reinterpret_cast(src2_.data); + const intptr_t dstptr = reinterpret_cast(dst_.data); + + const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0; + + if (isAllAligned) + { + if (depth == CV_8U && (src1_.cols & 3) == 0) + { + absDiffMat_v4(src1_, src2_, dst_, stream); + return; + } + else if (depth == CV_16U && (src1_.cols & 1) == 0) + { + absDiffMat_v2(src1_, src2_, dst_, stream); + return; + } + } + } + + const func_t func = funcs[depth]; + + if (!func) + CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); + + func(src1_, src2_, dst_, stream); +} + +#endif diff --git a/modules/cudaarithm/src/cuda/absdiff_scalar.cu b/modules/cudaarithm/src/cuda/absdiff_scalar.cu new file mode 100644 index 00000000000..0955e40c8b1 --- /dev/null +++ b/modules/cudaarithm/src/cuda/absdiff_scalar.cu @@ -0,0 +1,133 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/opencv_modules.hpp" + +#ifndef HAVE_OPENCV_CUDEV + +#error "opencv_cudev is required" + +#else + +#include "opencv2/cudev.hpp" + +using namespace cv::cudev; + +void absDiffScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int); + +namespace +{ + template struct AbsDiffScalarOp : unary_function + { + ScalarType val; + + __device__ __forceinline__ DstType operator ()(SrcType a) const + { + abs_func f; + return saturate_cast(f(saturate_cast(a) - val)); + } + }; + + template struct TransformPolicy : DefaultTransformPolicy + { + }; + template <> struct TransformPolicy : DefaultTransformPolicy + { + enum { + shift = 1 + }; + }; + + template + void absDiffScalarImpl(const GpuMat& src, cv::Scalar value, GpuMat& dst, Stream& stream) + { + typedef typename MakeVec::cn>::type ScalarType; + + cv::Scalar_ value_ = value; + + AbsDiffScalarOp op; + op.val = VecTraits::make(value_.val); + gridTransformUnary_< TransformPolicy >(globPtr(src), globPtr(dst), op, stream); + } +} + +void absDiffScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int) +{ + typedef void (*func_t)(const GpuMat& src, cv::Scalar val, GpuMat& dst, Stream& stream); + static const func_t funcs[7][4] = + { + { + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl + }, + { + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl + }, + { + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl + }, + { + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl + }, + { + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl + }, + { + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl + }, + { + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl + } + }; + + const int sdepth = src.depth(); + const int cn = src.channels(); + + CV_DbgAssert( sdepth <= CV_64F && cn <= 4 && src.type() == dst.type()); + + const func_t func = funcs[sdepth][cn - 1]; + if (!func) + CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); + + func(src, val, dst, stream); +} + +#endif diff --git a/modules/cudaarithm/src/cuda/add_mat.cu b/modules/cudaarithm/src/cuda/add_mat.cu new file mode 100644 index 00000000000..4166cc104e0 --- /dev/null +++ b/modules/cudaarithm/src/cuda/add_mat.cu @@ -0,0 +1,225 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/opencv_modules.hpp" + +#ifndef HAVE_OPENCV_CUDEV + +#error "opencv_cudev is required" + +#else + +#include "opencv2/cudev.hpp" + +using namespace cv::cudev; + +void addMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream, int); + +namespace +{ + template struct AddOp1 : binary_function + { + __device__ __forceinline__ D operator ()(T a, T b) const + { + return saturate_cast(a + b); + } + }; + + template + void addMat_v1(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream) + { + if (mask.data) + gridTransformBinary(globPtr(src1), globPtr(src2), globPtr(dst), AddOp1(), globPtr(mask), stream); + else + gridTransformBinary(globPtr(src1), globPtr(src2), globPtr(dst), AddOp1(), stream); + } + + struct AddOp2 : binary_function + { + __device__ __forceinline__ uint operator ()(uint a, uint b) const + { + return vadd2(a, b); + } + }; + + void addMat_v2(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) + { + const int vcols = src1.cols >> 1; + + GlobPtrSz src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols); + GlobPtrSz src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols); + GlobPtrSz dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols); + + gridTransformBinary(src1_, src2_, dst_, AddOp2(), stream); + } + + struct AddOp4 : binary_function + { + __device__ __forceinline__ uint operator ()(uint a, uint b) const + { + return vadd4(a, b); + } + }; + + void addMat_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) + { + const int vcols = src1.cols >> 2; + + GlobPtrSz src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols); + GlobPtrSz src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols); + GlobPtrSz dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols); + + gridTransformBinary(src1_, src2_, dst_, AddOp4(), stream); + } +} + +void addMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int) +{ + typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream); + static const func_t funcs[7][7] = + { + { + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1 + }, + { + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1 + }, + { + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1 + }, + { + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1 + }, + { + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + addMat_v1, + addMat_v1, + addMat_v1 + }, + { + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + addMat_v1, + addMat_v1 + }, + { + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + addMat_v1 + } + }; + + const int sdepth = src1.depth(); + const int ddepth = dst.depth(); + + CV_DbgAssert( sdepth <= CV_64F && ddepth <= CV_64F ); + + GpuMat src1_ = src1.reshape(1); + GpuMat src2_ = src2.reshape(1); + GpuMat dst_ = dst.reshape(1); + + if (mask.empty() && (sdepth == CV_8U || sdepth == CV_16U) && ddepth == sdepth) + { + const intptr_t src1ptr = reinterpret_cast(src1_.data); + const intptr_t src2ptr = reinterpret_cast(src2_.data); + const intptr_t dstptr = reinterpret_cast(dst_.data); + + const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0; + + if (isAllAligned) + { + if (sdepth == CV_8U && (src1_.cols & 3) == 0) + { + addMat_v4(src1_, src2_, dst_, stream); + return; + } + else if (sdepth == CV_16U && (src1_.cols & 1) == 0) + { + addMat_v2(src1_, src2_, dst_, stream); + return; + } + } + } + + const func_t func = funcs[sdepth][ddepth]; + + if (!func) + CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); + + func(src1_, src2_, dst_, mask, stream); +} + +#endif diff --git a/modules/cudaarithm/src/cuda/add_scalar.cu b/modules/cudaarithm/src/cuda/add_scalar.cu new file mode 100644 index 00000000000..92838a2a57d --- /dev/null +++ b/modules/cudaarithm/src/cuda/add_scalar.cu @@ -0,0 +1,180 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/opencv_modules.hpp" + +#ifndef HAVE_OPENCV_CUDEV + +#error "opencv_cudev is required" + +#else + +#include "opencv2/cudev.hpp" + +using namespace cv::cudev; + +void addScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int); + +namespace +{ + template struct AddScalarOp : unary_function + { + ScalarType val; + + __device__ __forceinline__ DstType operator ()(SrcType a) const + { + return saturate_cast(saturate_cast(a) + val); + } + }; + + template struct TransformPolicy : DefaultTransformPolicy + { + }; + template <> struct TransformPolicy : DefaultTransformPolicy + { + enum { + shift = 1 + }; + }; + + template + void addScalarImpl(const GpuMat& src, cv::Scalar value, GpuMat& dst, const GpuMat& mask, Stream& stream) + { + typedef typename MakeVec::cn>::type ScalarType; + + cv::Scalar_ value_ = value; + + AddScalarOp op; + op.val = VecTraits::make(value_.val); + + if (mask.data) + gridTransformUnary_< TransformPolicy >(globPtr(src), globPtr(dst), op, globPtr(mask), stream); + else + gridTransformUnary_< TransformPolicy >(globPtr(src), globPtr(dst), op, stream); + } +} + +void addScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int) +{ + typedef void (*func_t)(const GpuMat& src, cv::Scalar val, GpuMat& dst, const GpuMat& mask, Stream& stream); + static const func_t funcs[7][7][4] = + { + { + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl} + }, + { + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl} + }, + { + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl} + }, + { + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl} + }, + { + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl} + }, + { + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl} + }, + { + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl} + } + }; + + const int sdepth = src.depth(); + const int ddepth = dst.depth(); + const int cn = src.channels(); + + CV_DbgAssert( sdepth <= CV_64F && ddepth <= CV_64F && cn <= 4 ); + + const func_t func = funcs[sdepth][ddepth][cn - 1]; + + if (!func) + CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); + + func(src, val, dst, mask, stream); +} + +#endif diff --git a/modules/cudaarithm/src/cuda/add_weighted.cu b/modules/cudaarithm/src/cuda/add_weighted.cu new file mode 100644 index 00000000000..929301076d3 --- /dev/null +++ b/modules/cudaarithm/src/cuda/add_weighted.cu @@ -0,0 +1,596 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/opencv_modules.hpp" + +#ifndef HAVE_OPENCV_CUDEV + +#error "opencv_cudev is required" + +#else + +#include "opencv2/cudaarithm.hpp" +#include "opencv2/cudev.hpp" +#include "opencv2/core/private.cuda.hpp" + +using namespace cv; +using namespace cv::cuda; +using namespace cv::cudev; + +namespace +{ + template struct AddWeightedOp : binary_function + { + S alpha; + S beta; + S gamma; + + __device__ __forceinline__ D operator ()(T1 a, T2 b) const + { + return cudev::saturate_cast(a * alpha + b * beta + gamma); + } + }; + + template struct TransformPolicy : DefaultTransformPolicy + { + }; + template <> struct TransformPolicy : DefaultTransformPolicy + { + enum { + shift = 1 + }; + }; + + template + void addWeightedImpl(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, Stream& stream) + { + typedef typename LargerType::type larger_type1; + typedef typename LargerType::type larger_type2; + typedef typename LargerType::type scalar_type; + + AddWeightedOp op; + op.alpha = static_cast(alpha); + op.beta = static_cast(beta); + op.gamma = static_cast(gamma); + + gridTransformBinary_< TransformPolicy >(globPtr(src1), globPtr(src2), globPtr(dst), op, stream); + } +} + +void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, double beta, double gamma, OutputArray _dst, int ddepth, Stream& stream) +{ + typedef void (*func_t)(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, Stream& stream); + static const func_t funcs[7][7][7] = + { + { + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + } + }, + { + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + } + }, + { + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + } + }, + { + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + } + }, + { + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + } + }, + { + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + } + }, + { + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + } + } + }; + + GpuMat src1 = getInputMat(_src1, stream); + GpuMat src2 = getInputMat(_src2, stream); + + int sdepth1 = src1.depth(); + int sdepth2 = src2.depth(); + + ddepth = ddepth >= 0 ? CV_MAT_DEPTH(ddepth) : std::max(sdepth1, sdepth2); + const int cn = src1.channels(); + + CV_Assert( src2.size() == src1.size() && src2.channels() == cn ); + CV_Assert( sdepth1 <= CV_64F && sdepth2 <= CV_64F && ddepth <= CV_64F ); + + GpuMat dst = getOutputMat(_dst, src1.size(), CV_MAKE_TYPE(ddepth, cn), stream); + + GpuMat src1_single = src1.reshape(1); + GpuMat src2_single = src2.reshape(1); + GpuMat dst_single = dst.reshape(1); + + if (sdepth1 > sdepth2) + { + src1_single.swap(src2_single); + std::swap(alpha, beta); + std::swap(sdepth1, sdepth2); + } + + const func_t func = funcs[sdepth1][sdepth2][ddepth]; + + if (!func) + CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); + + func(src1_single, alpha, src2_single, beta, gamma, dst_single, stream); + + syncOutput(dst, _dst, stream); +} + +#endif diff --git a/modules/cudaarithm/src/cuda/bitwise_mat.cu b/modules/cudaarithm/src/cuda/bitwise_mat.cu new file mode 100644 index 00000000000..f151c1a4862 --- /dev/null +++ b/modules/cudaarithm/src/cuda/bitwise_mat.cu @@ -0,0 +1,230 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/opencv_modules.hpp" + +#ifndef HAVE_OPENCV_CUDEV + +#error "opencv_cudev is required" + +#else + +#include "opencv2/cudaarithm.hpp" +#include "opencv2/cudev.hpp" +#include "opencv2/core/private.cuda.hpp" + +using namespace cv; +using namespace cv::cuda; +using namespace cv::cudev; + +void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op); + +////////////////////////////////////////////////////////////////////////////// +/// bitwise_not + +void cv::cuda::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream) +{ + GpuMat src = getInputMat(_src, stream); + GpuMat mask = getInputMat(_mask, stream); + + const int depth = src.depth(); + + CV_DbgAssert( depth <= CV_32F ); + CV_DbgAssert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) ); + + GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream); + + if (mask.empty()) + { + const int bcols = (int) (src.cols * src.elemSize()); + + if ((bcols & 3) == 0) + { + const int vcols = bcols >> 2; + + GlobPtrSz vsrc = globPtr((uint*) src.data, src.step, src.rows, vcols); + GlobPtrSz vdst = globPtr((uint*) dst.data, dst.step, src.rows, vcols); + + gridTransformUnary(vsrc, vdst, bit_not(), stream); + } + else if ((bcols & 1) == 0) + { + const int vcols = bcols >> 1; + + GlobPtrSz vsrc = globPtr((ushort*) src.data, src.step, src.rows, vcols); + GlobPtrSz vdst = globPtr((ushort*) dst.data, dst.step, src.rows, vcols); + + gridTransformUnary(vsrc, vdst, bit_not(), stream); + } + else + { + GlobPtrSz vsrc = globPtr((uchar*) src.data, src.step, src.rows, bcols); + GlobPtrSz vdst = globPtr((uchar*) dst.data, dst.step, src.rows, bcols); + + gridTransformUnary(vsrc, vdst, bit_not(), stream); + } + } + else + { + if (depth == CV_32F || depth == CV_32S) + { + GlobPtrSz vsrc = globPtr((uint*) src.data, src.step, src.rows, src.cols * src.channels()); + GlobPtrSz vdst = globPtr((uint*) dst.data, dst.step, src.rows, src.cols * src.channels()); + + gridTransformUnary(vsrc, vdst, bit_not(), singleMaskChannels(globPtr(mask), src.channels()), stream); + } + else if (depth == CV_16S || depth == CV_16U) + { + GlobPtrSz vsrc = globPtr((ushort*) src.data, src.step, src.rows, src.cols * src.channels()); + GlobPtrSz vdst = globPtr((ushort*) dst.data, dst.step, src.rows, src.cols * src.channels()); + + gridTransformUnary(vsrc, vdst, bit_not(), singleMaskChannels(globPtr(mask), src.channels()), stream); + } + else + { + GlobPtrSz vsrc = globPtr((uchar*) src.data, src.step, src.rows, src.cols * src.channels()); + GlobPtrSz vdst = globPtr((uchar*) dst.data, dst.step, src.rows, src.cols * src.channels()); + + gridTransformUnary(vsrc, vdst, bit_not(), singleMaskChannels(globPtr(mask), src.channels()), stream); + } + } + + syncOutput(dst, _dst, stream); +} + +////////////////////////////////////////////////////////////////////////////// +/// Binary bitwise logical operations + +namespace +{ + template