Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix dynamic loading of clBLAS and clFFT (formerly, clAmdBlas and clAmdFft) #20203

Merged
merged 6 commits into from Jun 7, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view

Large diffs are not rendered by default.

Large diffs are not rendered by default.

46 changes: 24 additions & 22 deletions modules/core/src/dxt.cpp
Expand Up @@ -2420,7 +2420,7 @@ namespace cv {

#define CLAMDDFT_Assert(func) \
{ \
clAmdFftStatus s = (func); \
clfftStatus s = (func); \
CV_Assert(s == CLFFT_SUCCESS); \
}

Expand All @@ -2437,8 +2437,8 @@ class PlanCache
bool dft_scale = (flags & DFT_SCALE) != 0;
bool dft_rows = (flags & DFT_ROWS) != 0;

clAmdFftLayout inLayout = CLFFT_REAL, outLayout = CLFFT_REAL;
clAmdFftDim dim = dft_size.height == 1 || dft_rows ? CLFFT_1D : CLFFT_2D;
clfftLayout inLayout = CLFFT_REAL, outLayout = CLFFT_REAL;
clfftDim dim = dft_size.height == 1 || dft_rows ? CLFFT_1D : CLFFT_2D;

size_t batchSize = dft_rows ? dft_size.height : 1;
size_t clLengthsIn[3] = { (size_t)dft_size.width, dft_rows ? 1 : (size_t)dft_size.height, 1 };
Expand Down Expand Up @@ -2475,28 +2475,30 @@ class PlanCache
clStridesIn[2] = dft_rows ? clStridesIn[1] : dft_size.width * clStridesIn[1];
clStridesOut[2] = dft_rows ? clStridesOut[1] : dft_size.width * clStridesOut[1];

CLAMDDFT_Assert(clAmdFftCreateDefaultPlan(&plHandle, (cl_context)ocl::Context::getDefault().ptr(), dim, clLengthsIn))
CLAMDDFT_Assert(clfftCreateDefaultPlan(&plHandle, (cl_context)ocl::Context::getDefault().ptr(), dim, clLengthsIn))

// setting plan properties
CLAMDDFT_Assert(clAmdFftSetPlanPrecision(plHandle, doubleFP ? CLFFT_DOUBLE : CLFFT_SINGLE));
CLAMDDFT_Assert(clAmdFftSetResultLocation(plHandle, inplace ? CLFFT_INPLACE : CLFFT_OUTOFPLACE))
CLAMDDFT_Assert(clAmdFftSetLayout(plHandle, inLayout, outLayout))
CLAMDDFT_Assert(clAmdFftSetPlanBatchSize(plHandle, batchSize))
CLAMDDFT_Assert(clAmdFftSetPlanInStride(plHandle, dim, clStridesIn))
CLAMDDFT_Assert(clAmdFftSetPlanOutStride(plHandle, dim, clStridesOut))
CLAMDDFT_Assert(clAmdFftSetPlanDistance(plHandle, clStridesIn[dim], clStridesOut[dim]))
CLAMDDFT_Assert(clfftSetPlanPrecision(plHandle, doubleFP ? CLFFT_DOUBLE : CLFFT_SINGLE));
CLAMDDFT_Assert(clfftSetResultLocation(plHandle, inplace ? CLFFT_INPLACE : CLFFT_OUTOFPLACE))
CLAMDDFT_Assert(clfftSetLayout(plHandle, inLayout, outLayout))
CLAMDDFT_Assert(clfftSetPlanBatchSize(plHandle, batchSize))
CLAMDDFT_Assert(clfftSetPlanInStride(plHandle, dim, clStridesIn))
CLAMDDFT_Assert(clfftSetPlanOutStride(plHandle, dim, clStridesOut))
CLAMDDFT_Assert(clfftSetPlanDistance(plHandle, clStridesIn[dim], clStridesOut[dim]))

float scale = dft_scale ? 1.0f / (dft_rows ? dft_size.width : dft_size.area()) : 1.0f;
CLAMDDFT_Assert(clAmdFftSetPlanScale(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale))
CLAMDDFT_Assert(clfftSetPlanScale(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale))

// ready to bake
cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr();
CLAMDDFT_Assert(clAmdFftBakePlan(plHandle, 1, &queue, NULL, NULL))
CLAMDDFT_Assert(clfftBakePlan(plHandle, 1, &queue, NULL, NULL))
}

~FftPlan()
{
// clAmdFftDestroyPlan(&plHandle);
// Do not tear down clFFT.
// The user application may still use clFFT even after OpenCV is unloaded.
/*clfftDestroyPlan(&plHandle);*/
}

friend class PlanCache;
Expand All @@ -2510,7 +2512,7 @@ class PlanCache
FftType fftType;

cl_context context;
clAmdFftPlanHandle plHandle;
clfftPlanHandle plHandle;
};

public:
Expand All @@ -2519,8 +2521,8 @@ class PlanCache
CV_SINGLETON_LAZY_INIT_REF(PlanCache, new PlanCache())
}

clAmdFftPlanHandle getPlanHandle(const Size & dft_size, int src_step, int dst_step, bool doubleFP,
bool inplace, int flags, FftType fftType)
clfftPlanHandle getPlanHandle(const Size & dft_size, int src_step, int dst_step, bool doubleFP,
bool inplace, int flags, FftType fftType)
{
cl_context currentContext = (cl_context)ocl::Context::getDefault().ptr();

Expand Down Expand Up @@ -2620,13 +2622,13 @@ static bool ocl_dft_amdfft(InputArray _src, OutputArray _dst, int flags)
UMat src = _src.getUMat(), dst = _dst.getUMat();
bool inplace = src.u == dst.u;

clAmdFftPlanHandle plHandle = PlanCache::getInstance().
clfftPlanHandle plHandle = PlanCache::getInstance().
getPlanHandle(ssize, (int)src.step, (int)dst.step,
depth == CV_64F, inplace, flags, fftType);

// get the bufferSize
size_t bufferSize = 0;
CLAMDDFT_Assert(clAmdFftGetTmpBufSize(plHandle, &bufferSize))
CLAMDDFT_Assert(clfftGetTmpBufSize(plHandle, &bufferSize))
UMat tmpBuffer(1, (int)bufferSize, CV_8UC1);

cl_mem srcarg = (cl_mem)src.handle(ACCESS_READ);
Expand All @@ -2635,9 +2637,9 @@ static bool ocl_dft_amdfft(InputArray _src, OutputArray _dst, int flags)
cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr();
cl_event e = 0;

CLAMDDFT_Assert(clAmdFftEnqueueTransform(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
1, &queue, 0, NULL, &e,
&srcarg, &dstarg, (cl_mem)tmpBuffer.handle(ACCESS_RW)))
CLAMDDFT_Assert(clfftEnqueueTransform(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
1, &queue, 0, NULL, &e,
&srcarg, &dstarg, (cl_mem)tmpBuffer.handle(ACCESS_RW)))

tmpBuffer.addref();
clSetEventCallback(e, CL_COMPLETE, oclCleanupCallback, tmpBuffer.u);
Expand Down
50 changes: 25 additions & 25 deletions modules/core/src/matmul.dispatch.cpp
Expand Up @@ -106,47 +106,47 @@ static bool ocl_gemm_amdblas( InputArray matA, InputArray matB, double alpha,
int offa = (int)A.offset / esz, offb = (int)B.offset / esz, offc = (int)D.offset / esz;

cl_command_queue clq = (cl_command_queue)ocl::Queue::getDefault().ptr();
clAmdBlasTranspose transA = atrans ? clAmdBlasTrans : clAmdBlasNoTrans;
clAmdBlasTranspose transB = btrans ? clAmdBlasTrans : clAmdBlasNoTrans;
clAmdBlasOrder order = clAmdBlasRowMajor;
clAmdBlasStatus status = clAmdBlasSuccess;
clblasTranspose transA = atrans ? clblasTrans : clblasNoTrans;
clblasTranspose transB = btrans ? clblasTrans : clblasNoTrans;
clblasOrder order = clblasRowMajor;
clblasStatus status = clblasSuccess;

if (type == CV_32FC1)
status = clAmdBlasSgemmEx(order, transA, transB, M, N, K,
(cl_float)alpha, (const cl_mem)A.handle(ACCESS_READ), offa, lda,
(const cl_mem)B.handle(ACCESS_READ), offb, ldb,
(cl_float)beta, (cl_mem)D.handle(ACCESS_RW), offc, ldc,
1, &clq, 0, NULL, NULL);
status = clblasSgemm(order, transA, transB, M, N, K,
(cl_float)alpha, (const cl_mem)A.handle(ACCESS_READ), offa, lda,
(const cl_mem)B.handle(ACCESS_READ), offb, ldb,
(cl_float)beta, (cl_mem)D.handle(ACCESS_RW), offc, ldc,
1, &clq, 0, NULL, NULL);
else if (type == CV_64FC1)
status = clAmdBlasDgemmEx(order, transA, transB, M, N, K,
alpha, (const cl_mem)A.handle(ACCESS_READ), offa, lda,
(const cl_mem)B.handle(ACCESS_READ), offb, ldb,
beta, (cl_mem)D.handle(ACCESS_RW), offc, ldc,
1, &clq, 0, NULL, NULL);
status = clblasDgemm(order, transA, transB, M, N, K,
alpha, (const cl_mem)A.handle(ACCESS_READ), offa, lda,
(const cl_mem)B.handle(ACCESS_READ), offb, ldb,
beta, (cl_mem)D.handle(ACCESS_RW), offc, ldc,
1, &clq, 0, NULL, NULL);
else if (type == CV_32FC2)
{
cl_float2 alpha_2 = { { (cl_float)alpha, 0 } };
cl_float2 beta_2 = { { (cl_float)beta, 0 } };
status = clAmdBlasCgemmEx(order, transA, transB, M, N, K,
alpha_2, (const cl_mem)A.handle(ACCESS_READ), offa, lda,
(const cl_mem)B.handle(ACCESS_READ), offb, ldb,
beta_2, (cl_mem)D.handle(ACCESS_RW), offc, ldc,
1, &clq, 0, NULL, NULL);
status = clblasCgemm(order, transA, transB, M, N, K,
alpha_2, (const cl_mem)A.handle(ACCESS_READ), offa, lda,
(const cl_mem)B.handle(ACCESS_READ), offb, ldb,
beta_2, (cl_mem)D.handle(ACCESS_RW), offc, ldc,
1, &clq, 0, NULL, NULL);
}
else if (type == CV_64FC2)
{
cl_double2 alpha_2 = { { alpha, 0 } };
cl_double2 beta_2 = { { beta, 0 } };
status = clAmdBlasZgemmEx(order, transA, transB, M, N, K,
alpha_2, (const cl_mem)A.handle(ACCESS_READ), offa, lda,
(const cl_mem)B.handle(ACCESS_READ), offb, ldb,
beta_2, (cl_mem)D.handle(ACCESS_RW), offc, ldc,
1, &clq, 0, NULL, NULL);
status = clblasZgemm(order, transA, transB, M, N, K,
alpha_2, (const cl_mem)A.handle(ACCESS_READ), offa, lda,
(const cl_mem)B.handle(ACCESS_READ), offb, ldb,
beta_2, (cl_mem)D.handle(ACCESS_RW), offc, ldc,
1, &clq, 0, NULL, NULL);
}
else
CV_Error(Error::StsUnsupportedFormat, "");

return status == clAmdBlasSuccess;
return status == clblasSuccess;
}

#endif
Expand Down
26 changes: 15 additions & 11 deletions modules/core/src/ocl.cpp
Expand Up @@ -1254,11 +1254,13 @@ class AmdBlasHelper

~AmdBlasHelper()
{
try
// Do not tear down clBLAS.
// The user application may still use clBLAS even after OpenCV is unloaded.
/*try
{
clAmdBlasTeardown();
clblasTeardown();
}
catch (...) { }
catch (...) { }*/
}

protected:
Expand All @@ -1274,7 +1276,7 @@ class AmdBlasHelper
{
try
{
g_isAmdBlasAvailable = clAmdBlasSetup() == clAmdBlasSuccess;
g_isAmdBlasAvailable = clblasSetup() == clblasSuccess;
}
catch (...)
{
Expand Down Expand Up @@ -1328,11 +1330,13 @@ class AmdFftHelper

~AmdFftHelper()
{
try
// Do not tear down clFFT.
// The user application may still use clFFT even after OpenCV is unloaded.
/*try
{
// clAmdFftTeardown();
clfftTeardown();
}
catch (...) { }
catch (...) { }*/
}

protected:
Expand All @@ -1349,10 +1353,10 @@ class AmdFftHelper
try
{
cl_uint major, minor, patch;
CV_Assert(clAmdFftInitSetupData(&setupData) == CLFFT_SUCCESS);
CV_Assert(clfftInitSetupData(&setupData) == CLFFT_SUCCESS);

// it throws exception in case AmdFft binaries are not found
CV_Assert(clAmdFftGetVersion(&major, &minor, &patch) == CLFFT_SUCCESS);
CV_Assert(clfftGetVersion(&major, &minor, &patch) == CLFFT_SUCCESS);
g_isAmdFftAvailable = true;
}
catch (const Exception &)
Expand All @@ -1369,12 +1373,12 @@ class AmdFftHelper
}

private:
static clAmdFftSetupData setupData;
static clfftSetupData setupData;
static bool g_isAmdFftInitialized;
static bool g_isAmdFftAvailable;
};

clAmdFftSetupData AmdFftHelper::setupData;
clfftSetupData AmdFftHelper::setupData;
bool AmdFftHelper::g_isAmdFftAvailable = false;
bool AmdFftHelper::g_isAmdFftInitialized = false;

Expand Down