cuda: move CUDA modules to opencv_contrib

OpenCV 4.0+
opencv · Sep 19, 2018 · 82733fe · 82733fe
1 parent 136baab
commit 82733fe
Show file tree

Hide file tree

Showing 458 changed files with 120,709 additions and 0 deletions.
diff --git a/modules/cudaarithm/CMakeLists.txt b/modules/cudaarithm/CMakeLists.txt
@@ -0,0 +1,27 @@
+if(IOS OR WINRT OR (NOT HAVE_CUDA AND NOT BUILD_CUDA_STUBS))
+  ocv_module_disable(cudaarithm)
+endif()
+
+set(the_description "CUDA-accelerated Operations on Matrices")
+
+ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations -Wshadow)
+
+ocv_add_module(cudaarithm opencv_core OPTIONAL opencv_cudev WRAP python)
+
+ocv_module_include_directories()
+ocv_glob_module_sources()
+
+set(extra_libs "")
+
+if(HAVE_CUBLAS)
+  list(APPEND extra_libs ${CUDA_cublas_LIBRARY})
+endif()
+
+if(HAVE_CUFFT)
+  list(APPEND extra_libs ${CUDA_cufft_LIBRARY})
+endif()
+
+ocv_create_module(${extra_libs})
+
+ocv_add_accuracy_tests(DEPENDS_ON opencv_imgproc)
+ocv_add_perf_tests(DEPENDS_ON opencv_imgproc)
diff --git a/modules/cudaarithm/include/opencv2/cudaarithm.hpp b/modules/cudaarithm/include/opencv2/cudaarithm.hpp
diff --git a/modules/cudaarithm/perf/perf_arithm.cpp b/modules/cudaarithm/perf/perf_arithm.cpp
@@ -0,0 +1,254 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+namespace opencv_test { namespace {
+
+//////////////////////////////////////////////////////////////////////
+// GEMM
+
+#ifdef HAVE_CUBLAS
+
+CV_FLAGS(GemmFlags, 0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T)
+#define ALL_GEMM_FLAGS Values(GemmFlags(0), GemmFlags(cv::GEMM_1_T), GemmFlags(cv::GEMM_2_T), GemmFlags(cv::GEMM_3_T), \
+                              GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T | cv::GEMM_3_T))
+
+DEF_PARAM_TEST(Sz_Type_Flags, cv::Size, MatType, GemmFlags);
+
+PERF_TEST_P(Sz_Type_Flags, GEMM,
+            Combine(Values(cv::Size(512, 512), cv::Size(1024, 1024)),
+                    Values(CV_32FC1, CV_32FC2, CV_64FC1),
+                    ALL_GEMM_FLAGS))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int type = GET_PARAM(1);
+    const int flags = GET_PARAM(2);
+
+    cv::Mat src1(size, type);
+    declare.in(src1, WARMUP_RNG);
+
+    cv::Mat src2(size, type);
+    declare.in(src2, WARMUP_RNG);
+
+    cv::Mat src3(size, type);
+    declare.in(src3, WARMUP_RNG);
+
+    if (PERF_RUN_CUDA())
+    {
+        declare.time(5.0);
+
+        const cv::cuda::GpuMat d_src1(src1);
+        const cv::cuda::GpuMat d_src2(src2);
+        const cv::cuda::GpuMat d_src3(src3);
+        cv::cuda::GpuMat dst;
+
+        TEST_CYCLE() cv::cuda::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, dst, flags);
+
+        CUDA_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
+    }
+    else
+    {
+        declare.time(50.0);
+
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::gemm(src1, src2, 1.0, src3, 1.0, dst, flags);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+#endif
+
+//////////////////////////////////////////////////////////////////////
+// MulSpectrums
+
+CV_FLAGS(DftFlags, 0, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
+
+DEF_PARAM_TEST(Sz_Flags, cv::Size, DftFlags);
+
+PERF_TEST_P(Sz_Flags, MulSpectrums,
+            Combine(CUDA_TYPICAL_MAT_SIZES,
+                    Values(0, DftFlags(cv::DFT_ROWS))))
+{
+    const cv::Size size = GET_PARAM(0);
+    const int flag = GET_PARAM(1);
+
+    cv::Mat a(size, CV_32FC2);
+    cv::Mat b(size, CV_32FC2);
+    declare.in(a, b, WARMUP_RNG);
+
+    if (PERF_RUN_CUDA())
+    {
+        const cv::cuda::GpuMat d_a(a);
+        const cv::cuda::GpuMat d_b(b);
+        cv::cuda::GpuMat dst;
+
+        TEST_CYCLE() cv::cuda::mulSpectrums(d_a, d_b, dst, flag);
+
+        CUDA_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::mulSpectrums(a, b, dst, flag);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// MulAndScaleSpectrums
+
+PERF_TEST_P(Sz, MulAndScaleSpectrums,
+            CUDA_TYPICAL_MAT_SIZES)
+{
+    const cv::Size size = GetParam();
+
+    const float scale = 1.f / size.area();
+
+    cv::Mat src1(size, CV_32FC2);
+    cv::Mat src2(size, CV_32FC2);
+    declare.in(src1,src2, WARMUP_RNG);
+
+    if (PERF_RUN_CUDA())
+    {
+        const cv::cuda::GpuMat d_src1(src1);
+        const cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat dst;
+
+        TEST_CYCLE() cv::cuda::mulAndScaleSpectrums(d_src1, d_src2, dst, cv::DFT_ROWS, scale, false);
+
+        CUDA_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// Dft
+
+PERF_TEST_P(Sz_Flags, Dft,
+            Combine(CUDA_TYPICAL_MAT_SIZES,
+                    Values(0, DftFlags(cv::DFT_ROWS), DftFlags(cv::DFT_INVERSE))))
+{
+    declare.time(10.0);
+
+    const cv::Size size = GET_PARAM(0);
+    const int flag = GET_PARAM(1);
+
+    cv::Mat src(size, CV_32FC2);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_CUDA())
+    {
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
+
+        TEST_CYCLE() cv::cuda::dft(d_src, dst, size, flag);
+
+        CUDA_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::dft(src, dst, flag);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// Convolve
+
+DEF_PARAM_TEST(Sz_KernelSz_Ccorr, cv::Size, int, bool);
+
+PERF_TEST_P(Sz_KernelSz_Ccorr, Convolve,
+            Combine(CUDA_TYPICAL_MAT_SIZES,
+                    Values(17, 27, 32, 64),
+                    Bool()))
+{
+    declare.time(10.0);
+
+    const cv::Size size = GET_PARAM(0);
+    const int templ_size = GET_PARAM(1);
+    const bool ccorr = GET_PARAM(2);
+
+    const cv::Mat image(size, CV_32FC1);
+    const cv::Mat templ(templ_size, templ_size, CV_32FC1);
+    declare.in(image, templ, WARMUP_RNG);
+
+    if (PERF_RUN_CUDA())
+    {
+        cv::cuda::GpuMat d_image = cv::cuda::createContinuous(size, CV_32FC1);
+        d_image.upload(image);
+
+        cv::cuda::GpuMat d_templ = cv::cuda::createContinuous(templ_size, templ_size, CV_32FC1);
+        d_templ.upload(templ);
+
+        cv::Ptr<cv::cuda::Convolution> convolution = cv::cuda::createConvolution();
+
+        cv::cuda::GpuMat dst;
+
+        TEST_CYCLE() convolution->convolve(d_image, d_templ, dst, ccorr);
+
+        CUDA_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
+    }
+    else
+    {
+        if (ccorr)
+            FAIL_NO_CPU();
+
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::filter2D(image, dst, image.depth(), templ);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+}} // namespace