From eb783fea4dbcf1469b7672aafa96f3827ccdbe0d Mon Sep 17 00:00:00 2001 From: Rostislav Vasilikhin Date: Thu, 16 May 2024 18:11:58 +0200 Subject: [PATCH] HAL added for sub8x32f --- modules/core/src/arithm.cpp | 83 ++++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 39 deletions(-) diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index bae8fd0cde90..fffffa3d251f 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -584,6 +584,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, } #endif + typedef int (*ExtendedTypeFunc)(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, @@ -866,7 +867,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, const uchar* extSptr2 = sptr2; if( swapped12 ) std::swap(extSptr1, extSptr1); - + // try to perform operation with conversion in one call // if fail, use converter functions uchar* opconverted = haveMask ? maskbuf : dptr; @@ -898,7 +899,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, copymask(opconverted, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz); ptrs[2] += bsz; } - + ptrs[0] += bsz*esz1; ptrs[1] += bsz*dsz; } } @@ -919,35 +920,33 @@ static BinaryFuncC* getAddTab() return addTab; } -static BinaryFuncC* getSubTab(bool extendSub); - -static void sub8u32fWrapper(const uchar* src1, size_t step1, const uchar* src2, size_t step2, - uchar* dst, size_t step, int width, int height, void* ) +static int sub8u32fWrapper(const uchar* src1, size_t step1, const uchar* src2, size_t step2, + uchar* dst, size_t step, int width, int height, void* ) { - CALL_HAL(sub8u32f, cv_hal_sub8u32f, src1, step1, src2, step2, (float*)dst, step, width, height); - - // fallback if HAL does not work - Mat src1Arr(height, width, CV_8UC1, const_cast(src1), step1); - Mat src2Arr(height, width, CV_8UC1, const_cast(src2), step2); - Mat dstArr(height, width, CV_32FC1, dst, step); - arithm_op(src1Arr, src2Arr, dstArr, noArray(), CV_32F, getSubTab(false), - /* muldiv */ false, 0, OCL_OP_SUB, /* skipConversion */ false); + int res = cv_hal_sub8u32f(src1, step1, src2, step2, (float *)dst, step, width, height); + if (res == CV_HAL_ERROR_OK || res == CV_HAL_ERROR_NOT_IMPLEMENTED) + return res; + else + { + CV_Error_(cv::Error::StsInternal, ("HAL implementation sub8u32f ==> " CVAUX_STR(cv_hal_sub8u32f) + " returned %d (0x%08x)", res, res)); + } } -static void sub8s32fWrapper(const uchar* src1, size_t step1, const uchar* src2, size_t step2, - uchar* dst, size_t step, int width, int height, void* ) +static int sub8s32fWrapper(const uchar* src1, size_t step1, const uchar* src2, size_t step2, + uchar* dst, size_t step, int width, int height, void* ) { - CALL_HAL(sub8s32f, cv_hal_sub8s32f, (schar*)src1, step1, (schar*)src2, step2, (float*)dst, step, width, height); - - // fallback if HAL does not work - Mat src1Arr(height, width, CV_8SC1, const_cast(src1), step1); - Mat src2Arr(height, width, CV_8SC1, const_cast(src2), step2); - Mat dstArr(height, width, CV_32FC1, dst, step); - arithm_op(src1Arr, src2Arr, dstArr, noArray(), CV_32F, getSubTab(false), - /* muldiv */ false, 0, OCL_OP_SUB, /* skipConversion */ false); + int res = cv_hal_sub8s32f((schar*)src1, step1, (schar*)src2, step2, (float *)dst, step, width, height); + if (res == CV_HAL_ERROR_OK || res == CV_HAL_ERROR_NOT_IMPLEMENTED) + return res; + else + { + CV_Error_(cv::Error::StsInternal, ("HAL implementation sub8s32f ==> " CVAUX_STR(cv_hal_sub8s32f) + " returned %d (0x%08x)", res, res)); + } } -static BinaryFuncC* getSubTab(bool extendSub) +static BinaryFuncC* getSubTab() { static BinaryFuncC subTab[CV_DEPTH_MAX] = { @@ -958,12 +957,23 @@ static BinaryFuncC* getSubTab(bool extendSub) 0 }; - static BinaryFuncC extendSubTab[] = - { - (BinaryFuncC)sub8u32fWrapper, (BinaryFuncC)sub8s32fWrapper, - }; + return subTab; +} - return extendSub ? extendSubTab : subTab; +static ExtendedTypeFunc getSubExtFunc(int src1Type, int src2Type, int dstType) +{ + if (src1Type == CV_8U && src2Type == CV_8U && dstType == CV_32F) + { + return sub8u32fWrapper; + } + else if (src1Type == CV_8S && src2Type == CV_8S && dstType == CV_32F) + { + return sub8s32fWrapper; + } + else + { + return nullptr; + } } static BinaryFuncC* getAbsDiffTab() @@ -991,18 +1001,13 @@ void cv::add( InputArray src1, InputArray src2, OutputArray dst, } void cv::subtract( InputArray _src1, InputArray _src2, OutputArray _dst, - InputArray mask, int dtype ) + InputArray mask, int dtype ) { CV_INSTRUMENT_REGION(); - static bool hal8u32fAvailable = cv_hal_sub8u32f != hal_ni_sub8u32f; - static bool hal8s32fAvailable = cv_hal_sub8s32f != hal_ni_sub8s32f; - - bool extendSub = (hal8u32fAvailable && (_src1.depth() == CV_8U) && (_src2.depth() == CV_8U) && (dtype == CV_32F)) || - (hal8s32fAvailable && (_src1.depth() == CV_8S) && (_src2.depth() == CV_8S) && (dtype == CV_32F)); - - arithm_op(_src1, _src2, _dst, mask, dtype, getSubTab(extendSub), false, 0, OCL_OP_SUB, - /* skipConversion */ extendSub); + ExtendedTypeFunc subExtFunc = getSubExtFunc(_src1.depth(), _src2.depth(), dtype < 0 ? _dst.depth() : dtype); + arithm_op(_src1, _src2, _dst, mask, dtype, getSubTab(), false, 0, OCL_OP_SUB, + /* extendedFunc */ subExtFunc); } void cv::absdiff( InputArray src1, InputArray src2, OutputArray dst )