Skip to content

Commit

Permalink
HAL added for sub8x32f
Browse files Browse the repository at this point in the history
  • Loading branch information
Rostislav Vasilikhin committed May 17, 2024
1 parent c3f9d4e commit eb783fe
Showing 1 changed file with 44 additions and 39 deletions.
83 changes: 44 additions & 39 deletions modules/core/src/arithm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
}

#endif

typedef int (*ExtendedTypeFunc)(const uchar* src1, size_t step1,
const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height,
Expand Down Expand Up @@ -866,7 +867,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
const uchar* extSptr2 = sptr2;
if( swapped12 )
std::swap(extSptr1, extSptr1);

// try to perform operation with conversion in one call
// if fail, use converter functions
uchar* opconverted = haveMask ? maskbuf : dptr;
Expand Down Expand Up @@ -898,7 +899,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
copymask(opconverted, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz);
ptrs[2] += bsz;
}

ptrs[0] += bsz*esz1; ptrs[1] += bsz*dsz;
}
}
Expand All @@ -919,35 +920,33 @@ static BinaryFuncC* getAddTab()
return addTab;
}

static BinaryFuncC* getSubTab(bool extendSub);

static void sub8u32fWrapper(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height, void* )
static int sub8u32fWrapper(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height, void* )
{
CALL_HAL(sub8u32f, cv_hal_sub8u32f, src1, step1, src2, step2, (float*)dst, step, width, height);

// fallback if HAL does not work
Mat src1Arr(height, width, CV_8UC1, const_cast<uchar*>(src1), step1);
Mat src2Arr(height, width, CV_8UC1, const_cast<uchar*>(src2), step2);
Mat dstArr(height, width, CV_32FC1, dst, step);
arithm_op(src1Arr, src2Arr, dstArr, noArray(), CV_32F, getSubTab(false),
/* muldiv */ false, 0, OCL_OP_SUB, /* skipConversion */ false);
int res = cv_hal_sub8u32f(src1, step1, src2, step2, (float *)dst, step, width, height);
if (res == CV_HAL_ERROR_OK || res == CV_HAL_ERROR_NOT_IMPLEMENTED)
return res;
else
{
CV_Error_(cv::Error::StsInternal, ("HAL implementation sub8u32f ==> " CVAUX_STR(cv_hal_sub8u32f)
" returned %d (0x%08x)", res, res));
}
}

static void sub8s32fWrapper(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height, void* )
static int sub8s32fWrapper(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height, void* )
{
CALL_HAL(sub8s32f, cv_hal_sub8s32f, (schar*)src1, step1, (schar*)src2, step2, (float*)dst, step, width, height);

// fallback if HAL does not work
Mat src1Arr(height, width, CV_8SC1, const_cast<uchar*>(src1), step1);
Mat src2Arr(height, width, CV_8SC1, const_cast<uchar*>(src2), step2);
Mat dstArr(height, width, CV_32FC1, dst, step);
arithm_op(src1Arr, src2Arr, dstArr, noArray(), CV_32F, getSubTab(false),
/* muldiv */ false, 0, OCL_OP_SUB, /* skipConversion */ false);
int res = cv_hal_sub8s32f((schar*)src1, step1, (schar*)src2, step2, (float *)dst, step, width, height);
if (res == CV_HAL_ERROR_OK || res == CV_HAL_ERROR_NOT_IMPLEMENTED)
return res;
else
{
CV_Error_(cv::Error::StsInternal, ("HAL implementation sub8s32f ==> " CVAUX_STR(cv_hal_sub8s32f)
" returned %d (0x%08x)", res, res));
}
}

static BinaryFuncC* getSubTab(bool extendSub)
static BinaryFuncC* getSubTab()
{
static BinaryFuncC subTab[CV_DEPTH_MAX] =
{
Expand All @@ -958,12 +957,23 @@ static BinaryFuncC* getSubTab(bool extendSub)
0
};

static BinaryFuncC extendSubTab[] =
{
(BinaryFuncC)sub8u32fWrapper, (BinaryFuncC)sub8s32fWrapper,
};
return subTab;
}

return extendSub ? extendSubTab : subTab;
static ExtendedTypeFunc getSubExtFunc(int src1Type, int src2Type, int dstType)
{
if (src1Type == CV_8U && src2Type == CV_8U && dstType == CV_32F)
{
return sub8u32fWrapper;
}
else if (src1Type == CV_8S && src2Type == CV_8S && dstType == CV_32F)
{
return sub8s32fWrapper;
}
else
{
return nullptr;
}
}

static BinaryFuncC* getAbsDiffTab()
Expand Down Expand Up @@ -991,18 +1001,13 @@ void cv::add( InputArray src1, InputArray src2, OutputArray dst,
}

void cv::subtract( InputArray _src1, InputArray _src2, OutputArray _dst,
InputArray mask, int dtype )
InputArray mask, int dtype )
{
CV_INSTRUMENT_REGION();

static bool hal8u32fAvailable = cv_hal_sub8u32f != hal_ni_sub8u32f;
static bool hal8s32fAvailable = cv_hal_sub8s32f != hal_ni_sub8s32f;

bool extendSub = (hal8u32fAvailable && (_src1.depth() == CV_8U) && (_src2.depth() == CV_8U) && (dtype == CV_32F)) ||
(hal8s32fAvailable && (_src1.depth() == CV_8S) && (_src2.depth() == CV_8S) && (dtype == CV_32F));

arithm_op(_src1, _src2, _dst, mask, dtype, getSubTab(extendSub), false, 0, OCL_OP_SUB,
/* skipConversion */ extendSub);
ExtendedTypeFunc subExtFunc = getSubExtFunc(_src1.depth(), _src2.depth(), dtype < 0 ? _dst.depth() : dtype);
arithm_op(_src1, _src2, _dst, mask, dtype, getSubTab(), false, 0, OCL_OP_SUB,
/* extendedFunc */ subExtFunc);
}

void cv::absdiff( InputArray src1, InputArray src2, OutputArray dst )
Expand Down

0 comments on commit eb783fe

Please sign in to comment.