opencv · alalek · Sep 26, 2018 · Sep 17, 2018 · Sep 19, 2018 · Sep 19, 2018
diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp
@@ -59,22 +59,22 @@ CV_EXPORTS_W void finish();
 CV_EXPORTS bool haveSVM();
 
 class CV_EXPORTS Context;
-class CV_EXPORTS Device;
+class CV_EXPORTS_W_SIMPLE Device;
 class CV_EXPORTS Kernel;
 class CV_EXPORTS Program;
 class CV_EXPORTS ProgramSource;
 class CV_EXPORTS Queue;
 class CV_EXPORTS PlatformInfo;
 class CV_EXPORTS Image2D;
 
-class CV_EXPORTS Device
+class CV_EXPORTS_W_SIMPLE Device
 {
 public:
-    Device();
+    CV_WRAP Device();
     explicit Device(void* d);
     Device(const Device& d);
     Device& operator = (const Device& d);
-    ~Device();
+    CV_WRAP ~Device();
 
     void set(void* d);
 
@@ -89,24 +89,24 @@ class CV_EXPORTS Device
         TYPE_ALL         = 0xFFFFFFFF
     };
 
-    String name() const;
-    String extensions() const;
-    bool isExtensionSupported(const String& extensionName) const;
-    String version() const;
-    String vendorName() const;
-    String OpenCL_C_Version() const;
-    String OpenCLVersion() const;
-    int deviceVersionMajor() const;
-    int deviceVersionMinor() const;
-    String driverVersion() const;
+    CV_WRAP String name() const;
+    CV_WRAP String extensions() const;
+    CV_WRAP bool isExtensionSupported(const String& extensionName) const;
+    CV_WRAP String version() const;
+    CV_WRAP String vendorName() const;
+    CV_WRAP String OpenCL_C_Version() const;
+    CV_WRAP String OpenCLVersion() const;
+    CV_WRAP int deviceVersionMajor() const;
+    CV_WRAP int deviceVersionMinor() const;
+    CV_WRAP String driverVersion() const;
     void* ptr() const;
 
-    int type() const;
+    CV_WRAP int type() const;
 
-    int addressBits() const;
-    bool available() const;
-    bool compilerAvailable() const;
-    bool linkerAvailable() const;
+    CV_WRAP int addressBits() const;
+    CV_WRAP bool available() const;
+    CV_WRAP bool compilerAvailable() const;
+    CV_WRAP bool linkerAvailable() const;
 
     enum
     {
@@ -119,60 +119,60 @@ class CV_EXPORTS Device
         FP_SOFT_FLOAT=(1 << 6),
         FP_CORRECTLY_ROUNDED_DIVIDE_SQRT=(1 << 7)
     };
-    int doubleFPConfig() const;
-    int singleFPConfig() const;
-    int halfFPConfig() const;
+    CV_WRAP int doubleFPConfig() const;
+    CV_WRAP int singleFPConfig() const;
+    CV_WRAP int halfFPConfig() const;
 
-    bool endianLittle() const;
-    bool errorCorrectionSupport() const;
+    CV_WRAP bool endianLittle() const;
+    CV_WRAP bool errorCorrectionSupport() const;
 
     enum
     {
         EXEC_KERNEL=(1 << 0),
         EXEC_NATIVE_KERNEL=(1 << 1)
     };
-    int executionCapabilities() const;
+    CV_WRAP int executionCapabilities() const;
 
-    size_t globalMemCacheSize() const;
+    CV_WRAP size_t globalMemCacheSize() const;
 
     enum
     {
         NO_CACHE=0,
         READ_ONLY_CACHE=1,
         READ_WRITE_CACHE=2
     };
-    int globalMemCacheType() const;
-    int globalMemCacheLineSize() const;
-    size_t globalMemSize() const;
+    CV_WRAP int globalMemCacheType() const;
+    CV_WRAP int globalMemCacheLineSize() const;
+    CV_WRAP size_t globalMemSize() const;
 
-    size_t localMemSize() const;
+    CV_WRAP size_t localMemSize() const;
     enum
     {
         NO_LOCAL_MEM=0,
         LOCAL_IS_LOCAL=1,
         LOCAL_IS_GLOBAL=2
     };
-    int localMemType() const;
-    bool hostUnifiedMemory() const;
+    CV_WRAP int localMemType() const;
+    CV_WRAP bool hostUnifiedMemory() const;
 
-    bool imageSupport() const;
+    CV_WRAP bool imageSupport() const;
 
-    bool imageFromBufferSupport() const;
+    CV_WRAP bool imageFromBufferSupport() const;
     uint imagePitchAlignment() const;
     uint imageBaseAddressAlignment() const;
 
     /// deprecated, use isExtensionSupported() method (probably with "cl_khr_subgroups" value)
-    bool intelSubgroupsSupport() const;
+    CV_WRAP bool intelSubgroupsSupport() const;
 
-    size_t image2DMaxWidth() const;
-    size_t image2DMaxHeight() const;
+    CV_WRAP size_t image2DMaxWidth() const;
+    CV_WRAP size_t image2DMaxHeight() const;
 
-    size_t image3DMaxWidth() const;
-    size_t image3DMaxHeight() const;
-    size_t image3DMaxDepth() const;
+    CV_WRAP size_t image3DMaxWidth() const;
+    CV_WRAP size_t image3DMaxHeight() const;
+    CV_WRAP size_t image3DMaxDepth() const;
 
-    size_t imageMaxBufferSize() const;
-    size_t imageMaxArraySize() const;
+    CV_WRAP size_t imageMaxBufferSize() const;
+    CV_WRAP size_t imageMaxArraySize() const;
 
     enum
     {
@@ -181,53 +181,53 @@ class CV_EXPORTS Device
         VENDOR_INTEL=2,
         VENDOR_NVIDIA=3
     };
-    int vendorID() const;
+    CV_WRAP int vendorID() const;
     // FIXIT
     // dev.isAMD() doesn't work for OpenCL CPU devices from AMD OpenCL platform.
     // This method should use platform name instead of vendor name.
     // After fix restore code in arithm.cpp: ocl_compare()
-    inline bool isAMD() const { return vendorID() == VENDOR_AMD; }
-    inline bool isIntel() const { return vendorID() == VENDOR_INTEL; }
-    inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; }
+    CV_WRAP inline bool isAMD() const { return vendorID() == VENDOR_AMD; }
+    CV_WRAP inline bool isIntel() const { return vendorID() == VENDOR_INTEL; }
+    CV_WRAP inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; }
 
-    int maxClockFrequency() const;
-    int maxComputeUnits() const;
-    int maxConstantArgs() const;
-    size_t maxConstantBufferSize() const;
+    CV_WRAP int maxClockFrequency() const;
+    CV_WRAP int maxComputeUnits() const;
+    CV_WRAP int maxConstantArgs() const;
+    CV_WRAP size_t maxConstantBufferSize() const;
 
-    size_t maxMemAllocSize() const;
-    size_t maxParameterSize() const;
+    CV_WRAP size_t maxMemAllocSize() const;
+    CV_WRAP size_t maxParameterSize() const;
 
-    int maxReadImageArgs() const;
-    int maxWriteImageArgs() const;
-    int maxSamplers() const;
+    CV_WRAP int maxReadImageArgs() const;
+    CV_WRAP int maxWriteImageArgs() const;
+    CV_WRAP int maxSamplers() const;
 
-    size_t maxWorkGroupSize() const;
-    int maxWorkItemDims() const;
+    CV_WRAP size_t maxWorkGroupSize() const;
+    CV_WRAP int maxWorkItemDims() const;
     void maxWorkItemSizes(size_t*) const;
 
-    int memBaseAddrAlign() const;
+    CV_WRAP int memBaseAddrAlign() const;
 
-    int nativeVectorWidthChar() const;
-    int nativeVectorWidthShort() const;
-    int nativeVectorWidthInt() const;
-    int nativeVectorWidthLong() const;
-    int nativeVectorWidthFloat() const;
-    int nativeVectorWidthDouble() const;
-    int nativeVectorWidthHalf() const;
+    CV_WRAP int nativeVectorWidthChar() const;
+    CV_WRAP int nativeVectorWidthShort() const;
+    CV_WRAP int nativeVectorWidthInt() const;
+    CV_WRAP int nativeVectorWidthLong() const;
+    CV_WRAP int nativeVectorWidthFloat() const;
+    CV_WRAP int nativeVectorWidthDouble() const;
+    CV_WRAP int nativeVectorWidthHalf() const;
 
-    int preferredVectorWidthChar() const;
-    int preferredVectorWidthShort() const;
-    int preferredVectorWidthInt() const;
-    int preferredVectorWidthLong() const;
-    int preferredVectorWidthFloat() const;
-    int preferredVectorWidthDouble() const;
-    int preferredVectorWidthHalf() const;
+    CV_WRAP int preferredVectorWidthChar() const;
+    CV_WRAP int preferredVectorWidthShort() const;
+    CV_WRAP int preferredVectorWidthInt() const;
+    CV_WRAP int preferredVectorWidthLong() const;
+    CV_WRAP int preferredVectorWidthFloat() const;
+    CV_WRAP int preferredVectorWidthDouble() const;
+    CV_WRAP int preferredVectorWidthHalf() const;
 
-    size_t printfBufferSize() const;
-    size_t profilingTimerResolution() const;
+    CV_WRAP size_t printfBufferSize() const;
+    CV_WRAP size_t profilingTimerResolution() const;
 
-    static const Device& getDefault();
+    CV_WRAP static const Device& getDefault();
 
 protected:
     struct Impl;

diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp
@@ -3078,7 +3078,7 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
             dims == 1 ? 64 : dims == 2 ? (i == 0 ? 256 : 8) : dims == 3 ? (8>>(int)(i>0)) : 1;
         CV_Assert( val > 0 );
         total *= _globalsize[i];
-        if (_globalsize[i] == 1)
+        if (_globalsize[i] == 1 && !_localsize)
 // Test for non-Intel GPUs to check CL_INVALID_WORK_GROUP_SIZE when localsize > globalsize 
 OCL_TEST(Gemm, small) 
 { 
     UMat A(2, 3, CV_32F), B(4, 3, CV_32F), uC(2, 4, CV_32F); 
     Mat C(2, 4, CV_32F); 
     randu(A, -1, 1); 
     randu(B, -1, 1); 
     OCL_OFF(cv::gemm(A, B, 1, noArray(), 0, C, GEMM_2_T)); 
     OCL_ON(cv::gemm(A, B, 1, noArray(), 0, uC, GEMM_2_T)); 
     EXPECT_LE(cvtest::norm(C, uC, cv::NORM_INF), 1e-5); 
 } 
 // Test for non-Intel GPUs to check CL_INVALID_WORK_GROUP_SIZE when localsize > globalsize 
 OCL_TEST(Gemm, small) 
 { 
     UMat A(2, 3, CV_32F), B(4, 3, CV_32F), uC(2, 4, CV_32F); 
     Mat C(2, 4, CV_32F); 
  
     randu(A, -1, 1); 
     randu(B, -1, 1); 
  
     OCL_OFF(cv::gemm(A, B, 1, noArray(), 0, C, GEMM_2_T)); 
     OCL_ON(cv::gemm(A, B, 1, noArray(), 0, uC, GEMM_2_T)); 
  
     EXPECT_LE(cvtest::norm(C, uC, cv::NORM_INF), 1e-5); 
 } 
             val = 1;
         globalsize[i] = divUp(_globalsize[i], (unsigned int)val) * val;
     }

diff --git a/modules/core/test/ocl/test_gemm.cpp b/modules/core/test/ocl/test_gemm.cpp
@@ -145,6 +145,21 @@ OCL_INSTANTIATE_TEST_CASE_P(Core, Gemm, ::testing::Combine(
                             testing::Values(CV_32FC1, CV_32FC2, CV_64FC1, CV_64FC2),
                             Bool(), Bool(), Bool(), Bool()));
 
+// Test for non-Intel GPUs to check CL_INVALID_WORK_GROUP_SIZE when localsize > globalsize
+OCL_TEST(Gemm, small)
+{
+    UMat A(2, 3, CV_32F), B(4, 3, CV_32F), uC(2, 4, CV_32F);
+    Mat C(2, 4, CV_32F);
+
+    randu(A, -1, 1);
+    randu(B, -1, 1);
+
+    OCL_OFF(cv::gemm(A, B, 1, noArray(), 0, C, GEMM_2_T));
+    OCL_ON(cv::gemm(A, B, 1, noArray(), 0, uC, GEMM_2_T));
+
+    EXPECT_LE(cvtest::norm(C, uC, cv::NORM_INF), 1e-5);
+}
+
 } } // namespace opencv_test::ocl
 
 #endif // HAVE_OPENCL
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
@@ -1078,12 +1078,22 @@ struct Net::Impl
             }
 #else
             {
-                if (!DNN_OPENCL_ALLOW_ALL_DEVICES
-                    && !(ocl::Device::getDefault().isIntel() && ocl::Device::getDefault().type() == ocl::Device::TYPE_GPU) // Current implementation is only valid for Intel GPU (#11494)
-                    )
+                if (!DNN_OPENCL_ALLOW_ALL_DEVICES)
                 {
-                    CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with Intel GPUs only), switching to CPU.");
-                    preferableTarget = DNN_TARGET_CPU;
+                    // Current implementation is only valid for GPU (#11494)
+                    if (ocl::Device::getDefault().type() != ocl::Device::TYPE_GPU)
+                    {
+                        CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with GPUs only), switching to CPU.");
+                        preferableTarget = DNN_TARGET_CPU;
+                    }
+                    else if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
+                    {
+                        CV_LOG_WARNING(NULL,
+                            "DNN: OpenCL target with fp16 precision is not supported "
+                            "with current OpenCL device (tested with Intel GPUs only), "
+                            "switching to OpenCL with fp32 precision.");
+                        preferableTarget = DNN_TARGET_OPENCL;
+                    }
                 }
             }
 #endif

diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp
@@ -230,8 +230,7 @@ class BatchNormLayerImpl CV_FINAL : public BatchNormLayer
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
-        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                    forward_ocl(inputs_arr, outputs_arr, internals_arr))
 
         if (inputs_arr.depth() == CV_16S)

diff --git a/modules/dnn/src/layers/blank_layer.cpp b/modules/dnn/src/layers/blank_layer.cpp
@@ -95,16 +95,9 @@ class BlankLayerImpl CV_FINAL : public BlankLayer
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
-        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                    forward_ocl(inputs_arr, outputs_arr, internals_arr))
 
-        if (inputs_arr.depth() == CV_16S)
-        {
-            forward_fallback(inputs_arr, outputs_arr, internals_arr);
-            return;
-        }
-
         std::vector<Mat> inputs, outputs;
         inputs_arr.getMatVector(inputs);
         outputs_arr.getMatVector(outputs);

diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp
@@ -237,16 +237,9 @@ class ConcatLayerImpl CV_FINAL : public ConcatLayer
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
-        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                    forward_ocl(inputs_arr, outputs_arr, internals_arr))
 
-        if (inputs_arr.depth() == CV_16S)
-        {
-            forward_fallback(inputs_arr, outputs_arr, internals_arr);
-            return;
-        }
-
         std::vector<Mat> inputs, outputs;
         inputs_arr.getMatVector(inputs);
         outputs_arr.getMatVector(outputs);

diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
@@ -1529,8 +1529,7 @@ class DeConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
-        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                    forward_ocl(inputs_arr, outputs_arr, internals_arr));
 
         if (inputs_arr.depth() == CV_16S)

diff --git a/modules/dnn/src/layers/crop_layer.cpp b/modules/dnn/src/layers/crop_layer.cpp
@@ -137,12 +137,6 @@ class CropLayerImpl CV_FINAL : public CropLayer
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
-        if (inputs_arr.depth() == CV_16S)
-        {
-            forward_fallback(inputs_arr, outputs_arr, internals_arr);
-            return;
-        }
-
         std::vector<Mat> inputs, outputs;
         inputs_arr.getMatVector(inputs);
         outputs_arr.getMatVector(outputs);

diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp
@@ -415,8 +415,7 @@ class DetectionOutputLayerImpl CV_FINAL : public DetectionOutputLayer
 
         if (_bboxesNormalized)
         {
-            CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                       OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+            CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                        forward_ocl(inputs_arr, outputs_arr, internals_arr))
         }
         if (inputs_arr.depth() == CV_16S)

diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp
@@ -354,8 +354,7 @@ class EltwiseLayerImpl CV_FINAL : public EltwiseLayer
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
-        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                    forward_ocl(inputs_arr, outputs_arr, internals_arr))
 
         if (inputs_arr.depth() == CV_16S)