Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
fengyuentau committed May 10, 2024
1 parent ba65d2e commit f06b90c
Show file tree
Hide file tree
Showing 13 changed files with 639 additions and 2 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,9 @@ OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" ON
OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON
VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT XROS AND NOT WINRT
VERIFY HAVE_CLAMDBLAS)
OCV_OPTION(WITH_CLBLAST "Include CLBlast library support" ON
VISIBLE_IF TRUE
VERIFY HAVE_CLBLAST)
OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON
VISIBLE_IF WIN32 AND NOT WINRT
VERIFY HAVE_DIRECTX)
Expand Down Expand Up @@ -1820,6 +1823,7 @@ if(WITH_OPENCL OR HAVE_OPENCL)
IF HAVE_OPENCL_SVM THEN "SVM"
IF HAVE_CLAMDFFT THEN "AMDFFT"
IF HAVE_CLAMDBLAS THEN "AMDBLAS"
IF HAVE_CLBLAST THEN "CLBlast"
IF HAVE_OPENCL_D3D11_NV THEN "NVD3D11"
IF HAVE_VA_INTEL THEN "INTELVA"
ELSE "no extra features")
Expand Down
13 changes: 13 additions & 0 deletions cmake/OpenCVDetectOpenCL.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,19 @@ if(OPENCL_FOUND)
endif()
endif()

if(WITH_CLBLAST)
find_path(CLBLAST_INCLUDE_DIR
NAMES clblast_c.h
HINTS ENV CLBLAST_INSTALL_DIR
PATH_SUFFIXES include
DOC "CLBlast include directory")

if(CLBLAST_INCLUDE_DIR)
set(HAVE_CLBLAST 1)
list(APPEND OPENCL_INCLUDE_DIRS "${CLBLAST_INCLUDE_DIR}")
endif()
endif()

# check WITH_OPENCL_D3D11_NV is located in OpenCVDetectDirectX.cmake file

if(WITH_VA_INTEL AND HAVE_VA)
Expand Down
3 changes: 3 additions & 0 deletions cmake/templates/cvconfig.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
/* Compile for 'virtual' NVIDIA PTX architectures */
#define CUDA_ARCH_PTX "${OPENCV_CUDA_ARCH_PTX}"

/* CLBlast: The tuned OpenCL BLAS library https://github.com/CNugteren/CLBlast */
#cmakedefine HAVE_CLBLAST

/* AMD's Basic Linear Algebra Subprograms Library*/
#cmakedefine HAVE_CLAMDBLAS

Expand Down
1 change: 1 addition & 0 deletions modules/core/include/opencv2/core/ocl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ namespace cv { namespace ocl {

CV_EXPORTS_W bool haveOpenCL();
CV_EXPORTS_W bool useOpenCL();
CV_EXPORTS_W bool haveClblast();
CV_EXPORTS_W bool haveAmdBlas();
CV_EXPORTS_W bool haveAmdFft();
CV_EXPORTS_W void setUseOpenCL(bool flag);
Expand Down
2 changes: 1 addition & 1 deletion modules/core/include/opencv2/core/opencl/ocl_defs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ try \
} \
catch (const cv::Exception& e) \
{ \
CV_UNUSED(e); /* TODO: Add some logging here */ \
printf("%s\n", e.msg.c_str()); \
}
#endif

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.

#ifndef OPENCV_CORE_OCL_RUNTIME_CLBLAST_HPP
#define OPENCV_CORE_OCL_RUNTIME_CLBLAST_HPP

#ifdef HAVE_CLBLAST

#include "opencl_core.hpp"

#include "autogenerated/opencl_clblast.hpp"

#endif // HAVE_CLBLAST

#endif // OPENCV_CORE_OCL_RUNTIME_CLBLAST_HPP
138 changes: 138 additions & 0 deletions modules/core/src/matmul.dispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include <opencv2/core/utils/logger.hpp>

#include "opencl_kernels_core.hpp"
#include "opencv2/core/opencl/runtime/opencl_clblast.hpp"
#include "opencv2/core/opencl/runtime/opencl_clblas.hpp"
#include "opencv2/core/opencl/runtime/opencl_core.hpp"
#include "intel_gpu_gemm.inl.hpp"
Expand All @@ -59,6 +60,138 @@ namespace cv
* GEMM *
\****************************************************************************************/

#ifdef HAVE_CLBLAST

// matD = alpha * matA * matB + beta * matC
static bool ocl_gemm_clblast(InputArray matA, InputArray matB, double alpha,
InputArray matC, double beta, OutputArray matD, int flags) {
int type = matA.type(), depth = CV_MAT_DEPTH(type), esz = CV_ELEM_SIZE(type);
bool haveC = matC.kind() != cv::_InputArray::NONE;
CV_CheckEQ(matB.type(), type, "Type of matB does not match the type of matA");
if (haveC) {
CV_CheckEQ(matC.type(), type, "Type of matC does not match the type of matA");
}
auto &device = ocl::Device::getDefault();
bool SupportFP64 = device.hasFP64(),
SupportFP16 = device.hasFP16();
if (!SupportFP64 && depth == CV_64F) {
return false;
}
if (!SupportFP16 && depth == CV_16F) {
return false;
}

Size sizeA = matA.size(), sizeB = matB.size(), sizeC = haveC ? matC.size() : Size(0, 0);
bool transA_ = (flags & GEMM_1_T) != 0, transB_ = (flags & GEMM_2_T) != 0, transC_ = (flags & GEMM_3_T) != 0;

if (transA_) {
sizeA = Size(sizeA.height, sizeA.width);
}
if (transB_) {
sizeB = Size(sizeB.height, sizeB.width);
}
if (haveC && transC_) {
sizeC = Size(sizeC.height, sizeC.width);
}

Size sizeD(sizeB.width, sizeA.height);

CV_CheckEQ(sizeA.width, sizeB.height, "Invalid dimension for matrix multiplification");
if (haveC) { // TODO: support matC broadcasting
CV_CheckTrue(sizeC == sizeD, "Shape of matC is not equal to the shape of matD");
}

matD.create(sizeD, type);
if (matA.offset() % esz != 0 || matA.step() % esz != 0 ||
matB.offset() % esz != 0 || matB.step() % esz != 0 ||
(haveC && (matC.offset() % esz != 0 || matC.step() % esz != 0)) )
return false;

UMat A = matA.getUMat(), B = matB.getUMat(), D = matD.getUMat();
if (!ocl::internal::isCLBuffer(A) || !ocl::internal::isCLBuffer(B) || !ocl::internal::isCLBuffer(D)) {
return false;
}
if (haveC) {
UMat C = matC.getUMat();
if (!ocl::internal::isCLBuffer(C))
return false;

if (transC_) {
transpose(matC, D);
} else {
matC.copyTo(D);
}
} else {
D.setTo(Scalar::all(0));
}

int M = sizeD.height, N = sizeD.width, K = sizeA.width;
int lda = static_cast<int>(A.step / esz),
ldb = static_cast<int>(B.step / esz),
ldc = static_cast<int>(D.step / esz);
int offsetA = static_cast<int>(A.offset / esz),
offsetB = static_cast<int>(B.offset / esz),
offsetC = static_cast<int>(D.offset / esz);

cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr();
CLBlastTranspose transA = transA_ ? CLBlastTransposeYes : CLBlastTransposeNo,
transB = transB_ ? CLBlastTransposeYes : CLBlastTransposeNo;
CLBlastLayout layout = CLBlastLayoutRowMajor;
CLBlastStatusCode status = CLBlastUnknownError;

if (type == CV_32FC1) {
status = CLBlastSgemm(layout, transA, transB, M, N, K,
(float)alpha,
(const cl_mem)A.handle(ACCESS_READ), offsetA, lda,
(const cl_mem)B.handle(ACCESS_READ), offsetB, ldb,
(float)beta,
(cl_mem)D.handle(ACCESS_RW), offsetC, ldc,
&queue, NULL);
} else if (type == CV_64FC1) {
status = CLBlastDgemm(layout, transA, transB, M, N, K,
alpha,
(const cl_mem)A.handle(ACCESS_READ), offsetA, lda,
(const cl_mem)B.handle(ACCESS_READ), offsetB, ldb,
beta,
(cl_mem)D.handle(ACCESS_RW), offsetC, ldc,
&queue, NULL);
} else if (type == CV_32FC2) {
cl_float2 alpha2{{(cl_float)alpha, 0.f}};
cl_float2 beta2{{(cl_float)beta, 0.f}};
status = CLBlastCgemm(layout, transA, transB, M, N, K,
alpha2,
(const cl_mem)A.handle(ACCESS_READ), offsetA, lda,
(const cl_mem)B.handle(ACCESS_READ), offsetB, ldb,
beta2,
(cl_mem)D.handle(ACCESS_RW), offsetC, ldc,
&queue, NULL);
} else if (type == CV_64FC2) {
cl_double2 alpha2{{alpha, 0}};
cl_double2 beta2{{beta, 0}};
status = CLBlastZgemm(layout, transA, transB, M, N, K,
alpha2,
(const cl_mem)A.handle(ACCESS_READ), offsetA, lda,
(const cl_mem)B.handle(ACCESS_READ), offsetB, ldb,
beta2,
(cl_mem)D.handle(ACCESS_RW), offsetC, ldc,
&queue, NULL);
} else if (type == CV_16FC1) {
status = CLBlastHgemm(layout, transA, transB, M, N, K,
(cl_half)alpha,
(const cl_mem)A.handle(ACCESS_READ), offsetA, lda,
(const cl_mem)B.handle(ACCESS_READ), offsetB, ldb,
(cl_half)beta,
(cl_mem)D.handle(ACCESS_RW), offsetC, ldc,
&queue, NULL);
} else {
CV_Error(Error::StsUnsupportedFormat, "");
}

return status == CLBlastSuccess;
}

#endif

#ifdef HAVE_CLAMDBLAS

static bool ocl_gemm_amdblas( InputArray matA, InputArray matB, double alpha,
Expand Down Expand Up @@ -338,6 +471,11 @@ void gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t s
void gemm(InputArray matA, InputArray matB, double alpha,
InputArray matC, double beta, OutputArray _matD, int flags)
{
#ifdef HAVE_CLBLAST
CV_OCL_RUN(ocl::haveClblast() && matA.dims() <= 2 && matB.dims() <= 2 && matC.dims() <= 2 && _matD.isUMat(),
ocl_gemm_clblast(matA, matB, alpha, matC, beta, _matD, flags))
#endif

#ifdef HAVE_CLAMDBLAS
CV_OCL_RUN(ocl::haveAmdBlas() && matA.dims() <= 2 && matB.dims() <= 2 && matC.dims() <= 2 && _matD.isUMat() &&
matA.cols() > 20 && matA.rows() > 20 && matB.cols() > 20, // since it works incorrect for small sizes
Expand Down
10 changes: 9 additions & 1 deletion modules/core/src/ocl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1248,7 +1248,15 @@ void setUseOpenCL(bool flag)
}
}


#ifdef HAVE_CLBLAST
bool haveClblast() {
return true;
}
#else
bool haveClblast() {
return false;
}
#endif

#ifdef HAVE_CLAMDBLAS

Expand Down
Loading

0 comments on commit f06b90c

Please sign in to comment.