From a5ee850095596a62bca0b0c075f150bdd89d548f Mon Sep 17 00:00:00 2001 From: Rostislav Vasilikhin Date: Thu, 16 May 2024 17:37:46 +0200 Subject: [PATCH] ported from mul8x8to16 --- modules/core/src/arithm.cpp | 145 ++++++++++++++++++++++-------------- 1 file changed, 91 insertions(+), 54 deletions(-) diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index f171b97524da..b1c0836b9296 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -584,10 +584,14 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, } #endif +typedef int (*ExtendedTypeFunc)(const uchar* src1, size_t step1, + const uchar* src2, size_t step2, + uchar* dst, size_t step, int width, int height, + void*); static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, InputArray _mask, int dtype, BinaryFuncC* tab, bool muldiv=false, - void* usrdata=0, int oclop=-1, bool skipConversion = false ) + void* usrdata=0, int oclop=-1, ExtendedTypeFunc extendedFunc = nullptr ) { const _InputArray *psrc1 = &_src1, *psrc2 = &_src2; _InputArray::KindFlag kind1 = psrc1->kind(), kind2 = psrc2->kind(); @@ -617,7 +621,11 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat(); Size sz = getContinuousSize2D(src1, src2, dst, src1.channels()); - tab[depth1](src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, usrdata); + if (!extendedFunc || extendedFunc(src1.ptr(), src1.step, src2.ptr(), src2.step, + dst.ptr(), dst.step, sz.width, sz.height, usrdata) != 0) + { + tab[depth1](src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, usrdata); + } return; } @@ -715,9 +723,9 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, ocl_arithm_op(*psrc1, *psrc2, _dst, _mask, wtype, usrdata, oclop, haveScalar)) - BinaryFunc cvtsrc1 = type1 == wtype ? 0 : (skipConversion ? nullptr : getConvertFunc(type1, wtype)); - BinaryFunc cvtsrc2 = type2 == type1 ? cvtsrc1 : type2 == wtype ? 0 : (skipConversion ? nullptr : getConvertFunc(type2, wtype)); - BinaryFunc cvtdst = dtype == wtype ? 0 : (skipConversion ? nullptr : getConvertFunc(wtype, dtype)); + BinaryFunc cvtsrc1 = type1 == wtype ? 0 : getConvertFunc(type1, wtype); + BinaryFunc cvtsrc2 = type2 == type1 ? cvtsrc1 : type2 == wtype ? 0 : getConvertFunc(type2, wtype); + BinaryFunc cvtdst = dtype == wtype ? 0 : getConvertFunc(wtype, dtype); size_t esz1 = CV_ELEM_SIZE(type1), esz2 = CV_ELEM_SIZE(type2); size_t dsz = CV_ELEM_SIZE(dtype), wsz = CV_ELEM_SIZE(wtype); @@ -748,14 +756,22 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, _buf.allocate(bufesz*blocksize + 64); buf = _buf.data(); if( cvtsrc1 ) + { buf1 = buf, buf = alignPtr(buf + blocksize*wsz, 16); + } if( cvtsrc2 ) + { buf2 = buf, buf = alignPtr(buf + blocksize*wsz, 16); + } wbuf = maskbuf = buf; if( cvtdst ) + { buf = alignPtr(buf + blocksize*wsz, 16); + } if( haveMask ) + { maskbuf = buf; + } for( size_t i = 0; i < it.nplanes; i++, ++it ) { @@ -765,38 +781,44 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, Size bszn(bsz*cn, 1); const uchar *sptr1 = ptrs[0], *sptr2 = ptrs[1]; uchar* dptr = ptrs[2]; - if( cvtsrc1 ) - { - cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 ); - sptr1 = buf1; - } - if( ptrs[0] == ptrs[1] ) - sptr2 = sptr1; - else if( cvtsrc2 ) + // try to perform operation with conversion in one call + // if fail, use converter functions + uchar* opconverted = haveMask ? maskbuf : dptr; + if (!extendedFunc || extendedFunc(sptr1, 1, sptr2, 1, opconverted, (!haveMask), + bszn.width, bszn.height, usrdata) != 0) { - cvtsrc2( sptr2, 1, 0, 1, buf2, 1, bszn, 0 ); - sptr2 = buf2; - } - - if( !haveMask && !cvtdst ) - func( sptr1, 1, sptr2, 1, dptr, 1, bszn.width, bszn.height, usrdata ); - else - { - func( sptr1, 1, sptr2, 1, wbuf, 0, bszn.width, bszn.height, usrdata ); - if( !haveMask ) - cvtdst( wbuf, 1, 0, 1, dptr, 1, bszn, 0 ); - else if( !cvtdst ) + if( cvtsrc1 ) + { + cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 ); + sptr1 = buf1; + } + if( ptrs[0] == ptrs[1] ) { - copymask( wbuf, 1, ptrs[3], 1, dptr, 1, Size(bsz, 1), &dsz ); - ptrs[3] += bsz; + sptr2 = sptr1; } - else + else if( cvtsrc2 ) { - cvtdst( wbuf, 1, 0, 1, maskbuf, 1, bszn, 0 ); - copymask( maskbuf, 1, ptrs[3], 1, dptr, 1, Size(bsz, 1), &dsz ); - ptrs[3] += bsz; + cvtsrc2( sptr2, 1, 0, 1, buf2, 1, bszn, 0 ); + sptr2 = buf2; } + + uchar* fdst = (haveMask || cvtdst) ? wbuf : dptr; + func(sptr1, 1, sptr2, 1, fdst, (!haveMask && !cvtdst), bszn.width, bszn.height, usrdata); + + if (cvtdst) + { + uchar* cdst = haveMask ? maskbuf : dptr; + cvtdst(wbuf, 1, 0, 1, cdst, 1, bszn, 0); + } + opconverted = cvtdst ? maskbuf : wbuf; } + + if (haveMask) + { + copymask(opconverted, 1, ptrs[3], 1, dptr, 1, Size(bsz, 1), &dsz); + ptrs[3] += bsz; + } + ptrs[0] += bsz*esz1; ptrs[1] += bsz*esz2; ptrs[2] += bsz*dsz; } } @@ -812,13 +834,19 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, _buf.allocate(bufesz*blocksize + 64); buf = _buf.data(); if( cvtsrc1 ) - buf1 = buf, buf = alignPtr(buf + blocksize*wsz, 16); + { + buf1 = buf, buf = alignPtr(buf + blocksize * wsz, 16); + } buf2 = buf; buf = alignPtr(buf + blocksize*wsz, 16); wbuf = maskbuf = buf; if( cvtdst ) - buf = alignPtr(buf + blocksize*wsz, 16); + { + buf = alignPtr(buf + blocksize * wsz, 16); + } if( haveMask ) + { maskbuf = buf; + } convertAndUnrollScalar( src2, wtype, buf2, blocksize); @@ -832,34 +860,43 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, const uchar* sptr2 = buf2; uchar* dptr = ptrs[1]; - if( cvtsrc1 ) - { - cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 ); - sptr1 = buf1; - } - + const uchar* extSptr1 = sptr1; + const uchar* extSptr2 = sptr2; if( swapped12 ) - std::swap(sptr1, sptr2); - - if( !haveMask && !cvtdst ) - func( sptr1, 1, sptr2, 1, dptr, 1, bszn.width, bszn.height, usrdata ); - else + std::swap(extSptr1, extSptr1); + + // try to perform operation with conversion in one call + // if fail, use converter functions + uchar* opconverted = haveMask ? maskbuf : dptr; + if (!extendedFunc || extendedFunc(extSptr1, 1, extSptr2, 1, opconverted, 1, + bszn.width, bszn.height, usrdata) != 0) { - func( sptr1, 1, sptr2, 1, wbuf, 1, bszn.width, bszn.height, usrdata ); - if( !haveMask ) - cvtdst( wbuf, 1, 0, 1, dptr, 1, bszn, 0 ); - else if( !cvtdst ) + if( cvtsrc1 ) { - copymask( wbuf, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz ); - ptrs[2] += bsz; + cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 ); + sptr1 = buf1; } - else + + if( swapped12 ) + std::swap(sptr1, sptr2); + + uchar* fdst = ( haveMask || cvtdst ) ? wbuf : dptr; + func( sptr1, 1, sptr2, 1, fdst, 1, bszn.width, bszn.height, usrdata ); + + if (cvtdst) { - cvtdst( wbuf, 1, 0, 1, maskbuf, 1, bszn, 0 ); - copymask( maskbuf, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz ); - ptrs[2] += bsz; + uchar* cdst = haveMask ? maskbuf : dptr; + cvtdst(wbuf, 1, 0, 1, cdst, 1, bszn, 0); } + opconverted = cvtdst ? maskbuf : wbuf; + } + + if (haveMask) + { + copymask(opconverted, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz); + ptrs[2] += bsz; } + ptrs[0] += bsz*esz1; ptrs[1] += bsz*dsz; } }