Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove isIntel check from deep learning layers #12565

Merged
merged 3 commits into from Sep 26, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
150 changes: 75 additions & 75 deletions modules/core/include/opencv2/core/ocl.hpp
Expand Up @@ -59,22 +59,22 @@ CV_EXPORTS_W void finish();
CV_EXPORTS bool haveSVM();

class CV_EXPORTS Context;
class CV_EXPORTS Device;
class CV_EXPORTS_W_SIMPLE Device;
class CV_EXPORTS Kernel;
class CV_EXPORTS Program;
class CV_EXPORTS ProgramSource;
class CV_EXPORTS Queue;
class CV_EXPORTS PlatformInfo;
class CV_EXPORTS Image2D;

class CV_EXPORTS Device
class CV_EXPORTS_W_SIMPLE Device
{
public:
Device();
CV_WRAP Device();
explicit Device(void* d);
Device(const Device& d);
Device& operator = (const Device& d);
~Device();
CV_WRAP ~Device();

void set(void* d);

Expand All @@ -89,24 +89,24 @@ class CV_EXPORTS Device
TYPE_ALL = 0xFFFFFFFF
};

String name() const;
String extensions() const;
bool isExtensionSupported(const String& extensionName) const;
String version() const;
String vendorName() const;
String OpenCL_C_Version() const;
String OpenCLVersion() const;
int deviceVersionMajor() const;
int deviceVersionMinor() const;
String driverVersion() const;
CV_WRAP String name() const;
CV_WRAP String extensions() const;
CV_WRAP bool isExtensionSupported(const String& extensionName) const;
CV_WRAP String version() const;
CV_WRAP String vendorName() const;
CV_WRAP String OpenCL_C_Version() const;
CV_WRAP String OpenCLVersion() const;
CV_WRAP int deviceVersionMajor() const;
CV_WRAP int deviceVersionMinor() const;
CV_WRAP String driverVersion() const;
void* ptr() const;

int type() const;
CV_WRAP int type() const;

int addressBits() const;
bool available() const;
bool compilerAvailable() const;
bool linkerAvailable() const;
CV_WRAP int addressBits() const;
CV_WRAP bool available() const;
CV_WRAP bool compilerAvailable() const;
CV_WRAP bool linkerAvailable() const;

enum
{
Expand All @@ -119,60 +119,60 @@ class CV_EXPORTS Device
FP_SOFT_FLOAT=(1 << 6),
FP_CORRECTLY_ROUNDED_DIVIDE_SQRT=(1 << 7)
};
int doubleFPConfig() const;
int singleFPConfig() const;
int halfFPConfig() const;
CV_WRAP int doubleFPConfig() const;
CV_WRAP int singleFPConfig() const;
CV_WRAP int halfFPConfig() const;

bool endianLittle() const;
bool errorCorrectionSupport() const;
CV_WRAP bool endianLittle() const;
CV_WRAP bool errorCorrectionSupport() const;

enum
{
EXEC_KERNEL=(1 << 0),
EXEC_NATIVE_KERNEL=(1 << 1)
};
int executionCapabilities() const;
CV_WRAP int executionCapabilities() const;

size_t globalMemCacheSize() const;
CV_WRAP size_t globalMemCacheSize() const;

enum
{
NO_CACHE=0,
READ_ONLY_CACHE=1,
READ_WRITE_CACHE=2
};
int globalMemCacheType() const;
int globalMemCacheLineSize() const;
size_t globalMemSize() const;
CV_WRAP int globalMemCacheType() const;
CV_WRAP int globalMemCacheLineSize() const;
CV_WRAP size_t globalMemSize() const;

size_t localMemSize() const;
CV_WRAP size_t localMemSize() const;
enum
{
NO_LOCAL_MEM=0,
LOCAL_IS_LOCAL=1,
LOCAL_IS_GLOBAL=2
};
int localMemType() const;
bool hostUnifiedMemory() const;
CV_WRAP int localMemType() const;
CV_WRAP bool hostUnifiedMemory() const;

bool imageSupport() const;
CV_WRAP bool imageSupport() const;

bool imageFromBufferSupport() const;
CV_WRAP bool imageFromBufferSupport() const;
uint imagePitchAlignment() const;
uint imageBaseAddressAlignment() const;

/// deprecated, use isExtensionSupported() method (probably with "cl_khr_subgroups" value)
bool intelSubgroupsSupport() const;
CV_WRAP bool intelSubgroupsSupport() const;

size_t image2DMaxWidth() const;
size_t image2DMaxHeight() const;
CV_WRAP size_t image2DMaxWidth() const;
CV_WRAP size_t image2DMaxHeight() const;

size_t image3DMaxWidth() const;
size_t image3DMaxHeight() const;
size_t image3DMaxDepth() const;
CV_WRAP size_t image3DMaxWidth() const;
CV_WRAP size_t image3DMaxHeight() const;
CV_WRAP size_t image3DMaxDepth() const;

size_t imageMaxBufferSize() const;
size_t imageMaxArraySize() const;
CV_WRAP size_t imageMaxBufferSize() const;
CV_WRAP size_t imageMaxArraySize() const;

enum
{
Expand All @@ -181,53 +181,53 @@ class CV_EXPORTS Device
VENDOR_INTEL=2,
VENDOR_NVIDIA=3
};
int vendorID() const;
CV_WRAP int vendorID() const;
// FIXIT
// dev.isAMD() doesn't work for OpenCL CPU devices from AMD OpenCL platform.
// This method should use platform name instead of vendor name.
// After fix restore code in arithm.cpp: ocl_compare()
inline bool isAMD() const { return vendorID() == VENDOR_AMD; }
inline bool isIntel() const { return vendorID() == VENDOR_INTEL; }
inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; }
CV_WRAP inline bool isAMD() const { return vendorID() == VENDOR_AMD; }
CV_WRAP inline bool isIntel() const { return vendorID() == VENDOR_INTEL; }
CV_WRAP inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; }

int maxClockFrequency() const;
int maxComputeUnits() const;
int maxConstantArgs() const;
size_t maxConstantBufferSize() const;
CV_WRAP int maxClockFrequency() const;
CV_WRAP int maxComputeUnits() const;
CV_WRAP int maxConstantArgs() const;
CV_WRAP size_t maxConstantBufferSize() const;

size_t maxMemAllocSize() const;
size_t maxParameterSize() const;
CV_WRAP size_t maxMemAllocSize() const;
CV_WRAP size_t maxParameterSize() const;

int maxReadImageArgs() const;
int maxWriteImageArgs() const;
int maxSamplers() const;
CV_WRAP int maxReadImageArgs() const;
CV_WRAP int maxWriteImageArgs() const;
CV_WRAP int maxSamplers() const;

size_t maxWorkGroupSize() const;
int maxWorkItemDims() const;
CV_WRAP size_t maxWorkGroupSize() const;
CV_WRAP int maxWorkItemDims() const;
void maxWorkItemSizes(size_t*) const;

int memBaseAddrAlign() const;
CV_WRAP int memBaseAddrAlign() const;

int nativeVectorWidthChar() const;
int nativeVectorWidthShort() const;
int nativeVectorWidthInt() const;
int nativeVectorWidthLong() const;
int nativeVectorWidthFloat() const;
int nativeVectorWidthDouble() const;
int nativeVectorWidthHalf() const;
CV_WRAP int nativeVectorWidthChar() const;
CV_WRAP int nativeVectorWidthShort() const;
CV_WRAP int nativeVectorWidthInt() const;
CV_WRAP int nativeVectorWidthLong() const;
CV_WRAP int nativeVectorWidthFloat() const;
CV_WRAP int nativeVectorWidthDouble() const;
CV_WRAP int nativeVectorWidthHalf() const;

int preferredVectorWidthChar() const;
int preferredVectorWidthShort() const;
int preferredVectorWidthInt() const;
int preferredVectorWidthLong() const;
int preferredVectorWidthFloat() const;
int preferredVectorWidthDouble() const;
int preferredVectorWidthHalf() const;
CV_WRAP int preferredVectorWidthChar() const;
CV_WRAP int preferredVectorWidthShort() const;
CV_WRAP int preferredVectorWidthInt() const;
CV_WRAP int preferredVectorWidthLong() const;
CV_WRAP int preferredVectorWidthFloat() const;
CV_WRAP int preferredVectorWidthDouble() const;
CV_WRAP int preferredVectorWidthHalf() const;

size_t printfBufferSize() const;
size_t profilingTimerResolution() const;
CV_WRAP size_t printfBufferSize() const;
CV_WRAP size_t profilingTimerResolution() const;

static const Device& getDefault();
CV_WRAP static const Device& getDefault();

protected:
struct Impl;
Expand Down
2 changes: 1 addition & 1 deletion modules/core/src/ocl.cpp
Expand Up @@ -3078,7 +3078,7 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
dims == 1 ? 64 : dims == 2 ? (i == 0 ? 256 : 8) : dims == 3 ? (8>>(int)(i>0)) : 1;
CV_Assert( val > 0 );
total *= _globalsize[i];
if (_globalsize[i] == 1)
if (_globalsize[i] == 1 && !_localsize)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dkurt What test case requires this change?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alalek, With gemm for small matrices.

// Test for non-Intel GPUs to check CL_INVALID_WORK_GROUP_SIZE when localsize > globalsize
OCL_TEST(Gemm, small)
{
UMat A(2, 3, CV_32F), B(4, 3, CV_32F), uC(2, 4, CV_32F);
Mat C(2, 4, CV_32F);
randu(A, -1, 1);
randu(B, -1, 1);
OCL_OFF(cv::gemm(A, B, 1, noArray(), 0, C, GEMM_2_T));
OCL_ON(cv::gemm(A, B, 1, noArray(), 0, uC, GEMM_2_T));
EXPECT_LE(cvtest::norm(C, uC, cv::NORM_INF), 1e-5);
}

val = 1;
globalsize[i] = divUp(_globalsize[i], (unsigned int)val) * val;
}
Expand Down
15 changes: 15 additions & 0 deletions modules/core/test/ocl/test_gemm.cpp
Expand Up @@ -145,6 +145,21 @@ OCL_INSTANTIATE_TEST_CASE_P(Core, Gemm, ::testing::Combine(
testing::Values(CV_32FC1, CV_32FC2, CV_64FC1, CV_64FC2),
Bool(), Bool(), Bool(), Bool()));

// Test for non-Intel GPUs to check CL_INVALID_WORK_GROUP_SIZE when localsize > globalsize
OCL_TEST(Gemm, small)
{
UMat A(2, 3, CV_32F), B(4, 3, CV_32F), uC(2, 4, CV_32F);
Mat C(2, 4, CV_32F);

randu(A, -1, 1);
randu(B, -1, 1);

OCL_OFF(cv::gemm(A, B, 1, noArray(), 0, C, GEMM_2_T));
OCL_ON(cv::gemm(A, B, 1, noArray(), 0, uC, GEMM_2_T));

EXPECT_LE(cvtest::norm(C, uC, cv::NORM_INF), 1e-5);
}

} } // namespace opencv_test::ocl

#endif // HAVE_OPENCL
20 changes: 15 additions & 5 deletions modules/dnn/src/dnn.cpp
Expand Up @@ -1078,12 +1078,22 @@ struct Net::Impl
}
#else
{
if (!DNN_OPENCL_ALLOW_ALL_DEVICES
&& !(ocl::Device::getDefault().isIntel() && ocl::Device::getDefault().type() == ocl::Device::TYPE_GPU) // Current implementation is only valid for Intel GPU (#11494)
)
if (!DNN_OPENCL_ALLOW_ALL_DEVICES)
{
CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with Intel GPUs only), switching to CPU.");
preferableTarget = DNN_TARGET_CPU;
// Current implementation is only valid for GPU (#11494)
if (ocl::Device::getDefault().type() != ocl::Device::TYPE_GPU)
{
CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with GPUs only), switching to CPU.");
preferableTarget = DNN_TARGET_CPU;
}
else if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
{
CV_LOG_WARNING(NULL,
"DNN: OpenCL target with fp16 precision is not supported "
"with current OpenCL device (tested with Intel GPUs only), "
"switching to OpenCL with fp32 precision.");
preferableTarget = DNN_TARGET_OPENCL;
}
}
}
#endif
Expand Down
3 changes: 1 addition & 2 deletions modules/dnn/src/layers/batch_norm_layer.cpp
Expand Up @@ -230,8 +230,7 @@ class BatchNormLayerImpl CV_FINAL : public BatchNormLayer
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());

CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))

if (inputs_arr.depth() == CV_16S)
Expand Down
9 changes: 1 addition & 8 deletions modules/dnn/src/layers/blank_layer.cpp
Expand Up @@ -95,16 +95,9 @@ class BlankLayerImpl CV_FINAL : public BlankLayer
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());

CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))

if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}

std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
Expand Down
9 changes: 1 addition & 8 deletions modules/dnn/src/layers/concat_layer.cpp
Expand Up @@ -237,16 +237,9 @@ class ConcatLayerImpl CV_FINAL : public ConcatLayer
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());

CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))

if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}

std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
Expand Down
3 changes: 1 addition & 2 deletions modules/dnn/src/layers/convolution_layer.cpp
Expand Up @@ -1529,8 +1529,7 @@ class DeConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());

CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr));

if (inputs_arr.depth() == CV_16S)
Expand Down
6 changes: 0 additions & 6 deletions modules/dnn/src/layers/crop_layer.cpp
Expand Up @@ -137,12 +137,6 @@ class CropLayerImpl CV_FINAL : public CropLayer
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());

if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}

std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
Expand Down
3 changes: 1 addition & 2 deletions modules/dnn/src/layers/detection_output_layer.cpp
Expand Up @@ -415,8 +415,7 @@ class DetectionOutputLayerImpl CV_FINAL : public DetectionOutputLayer

if (_bboxesNormalized)
{
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
}
if (inputs_arr.depth() == CV_16S)
Expand Down
3 changes: 1 addition & 2 deletions modules/dnn/src/layers/eltwise_layer.cpp
Expand Up @@ -354,8 +354,7 @@ class EltwiseLayerImpl CV_FINAL : public EltwiseLayer
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());

CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))

if (inputs_arr.depth() == CV_16S)
Expand Down