Skip to content

Commit

Permalink
ported from mul8x8to16
Browse files Browse the repository at this point in the history
  • Loading branch information
Rostislav Vasilikhin committed May 16, 2024
1 parent 9ca857c commit a5ee850
Showing 1 changed file with 91 additions and 54 deletions.
145 changes: 91 additions & 54 deletions modules/core/src/arithm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -584,10 +584,14 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
}

#endif
typedef int (*ExtendedTypeFunc)(const uchar* src1, size_t step1,
const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height,
void*);

static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
InputArray _mask, int dtype, BinaryFuncC* tab, bool muldiv=false,
void* usrdata=0, int oclop=-1, bool skipConversion = false )
void* usrdata=0, int oclop=-1, ExtendedTypeFunc extendedFunc = nullptr )
{
const _InputArray *psrc1 = &_src1, *psrc2 = &_src2;
_InputArray::KindFlag kind1 = psrc1->kind(), kind2 = psrc2->kind();
Expand Down Expand Up @@ -617,7 +621,11 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,

Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat();
Size sz = getContinuousSize2D(src1, src2, dst, src1.channels());
tab[depth1](src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, usrdata);
if (!extendedFunc || extendedFunc(src1.ptr(), src1.step, src2.ptr(), src2.step,
dst.ptr(), dst.step, sz.width, sz.height, usrdata) != 0)
{
tab[depth1](src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, usrdata);
}
return;
}

Expand Down Expand Up @@ -715,9 +723,9 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
ocl_arithm_op(*psrc1, *psrc2, _dst, _mask, wtype,
usrdata, oclop, haveScalar))

BinaryFunc cvtsrc1 = type1 == wtype ? 0 : (skipConversion ? nullptr : getConvertFunc(type1, wtype));
BinaryFunc cvtsrc2 = type2 == type1 ? cvtsrc1 : type2 == wtype ? 0 : (skipConversion ? nullptr : getConvertFunc(type2, wtype));
BinaryFunc cvtdst = dtype == wtype ? 0 : (skipConversion ? nullptr : getConvertFunc(wtype, dtype));
BinaryFunc cvtsrc1 = type1 == wtype ? 0 : getConvertFunc(type1, wtype);
BinaryFunc cvtsrc2 = type2 == type1 ? cvtsrc1 : type2 == wtype ? 0 : getConvertFunc(type2, wtype);
BinaryFunc cvtdst = dtype == wtype ? 0 : getConvertFunc(wtype, dtype);

size_t esz1 = CV_ELEM_SIZE(type1), esz2 = CV_ELEM_SIZE(type2);
size_t dsz = CV_ELEM_SIZE(dtype), wsz = CV_ELEM_SIZE(wtype);
Expand Down Expand Up @@ -748,14 +756,22 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
_buf.allocate(bufesz*blocksize + 64);
buf = _buf.data();
if( cvtsrc1 )
{
buf1 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
}
if( cvtsrc2 )
{
buf2 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
}
wbuf = maskbuf = buf;
if( cvtdst )
{
buf = alignPtr(buf + blocksize*wsz, 16);
}
if( haveMask )
{
maskbuf = buf;
}

for( size_t i = 0; i < it.nplanes; i++, ++it )
{
Expand All @@ -765,38 +781,44 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
Size bszn(bsz*cn, 1);
const uchar *sptr1 = ptrs[0], *sptr2 = ptrs[1];
uchar* dptr = ptrs[2];
if( cvtsrc1 )
{
cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 );
sptr1 = buf1;
}
if( ptrs[0] == ptrs[1] )
sptr2 = sptr1;
else if( cvtsrc2 )
// try to perform operation with conversion in one call
// if fail, use converter functions
uchar* opconverted = haveMask ? maskbuf : dptr;
if (!extendedFunc || extendedFunc(sptr1, 1, sptr2, 1, opconverted, (!haveMask),
bszn.width, bszn.height, usrdata) != 0)
{
cvtsrc2( sptr2, 1, 0, 1, buf2, 1, bszn, 0 );
sptr2 = buf2;
}

if( !haveMask && !cvtdst )
func( sptr1, 1, sptr2, 1, dptr, 1, bszn.width, bszn.height, usrdata );
else
{
func( sptr1, 1, sptr2, 1, wbuf, 0, bszn.width, bszn.height, usrdata );
if( !haveMask )
cvtdst( wbuf, 1, 0, 1, dptr, 1, bszn, 0 );
else if( !cvtdst )
if( cvtsrc1 )
{
cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 );
sptr1 = buf1;
}
if( ptrs[0] == ptrs[1] )
{
copymask( wbuf, 1, ptrs[3], 1, dptr, 1, Size(bsz, 1), &dsz );
ptrs[3] += bsz;
sptr2 = sptr1;
}
else
else if( cvtsrc2 )
{
cvtdst( wbuf, 1, 0, 1, maskbuf, 1, bszn, 0 );
copymask( maskbuf, 1, ptrs[3], 1, dptr, 1, Size(bsz, 1), &dsz );
ptrs[3] += bsz;
cvtsrc2( sptr2, 1, 0, 1, buf2, 1, bszn, 0 );
sptr2 = buf2;
}

uchar* fdst = (haveMask || cvtdst) ? wbuf : dptr;
func(sptr1, 1, sptr2, 1, fdst, (!haveMask && !cvtdst), bszn.width, bszn.height, usrdata);

if (cvtdst)
{
uchar* cdst = haveMask ? maskbuf : dptr;
cvtdst(wbuf, 1, 0, 1, cdst, 1, bszn, 0);
}
opconverted = cvtdst ? maskbuf : wbuf;
}

if (haveMask)
{
copymask(opconverted, 1, ptrs[3], 1, dptr, 1, Size(bsz, 1), &dsz);
ptrs[3] += bsz;
}

ptrs[0] += bsz*esz1; ptrs[1] += bsz*esz2; ptrs[2] += bsz*dsz;
}
}
Expand All @@ -812,13 +834,19 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
_buf.allocate(bufesz*blocksize + 64);
buf = _buf.data();
if( cvtsrc1 )
buf1 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
{
buf1 = buf, buf = alignPtr(buf + blocksize * wsz, 16);
}
buf2 = buf; buf = alignPtr(buf + blocksize*wsz, 16);
wbuf = maskbuf = buf;
if( cvtdst )
buf = alignPtr(buf + blocksize*wsz, 16);
{
buf = alignPtr(buf + blocksize * wsz, 16);
}
if( haveMask )
{
maskbuf = buf;
}

convertAndUnrollScalar( src2, wtype, buf2, blocksize);

Expand All @@ -832,34 +860,43 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
const uchar* sptr2 = buf2;
uchar* dptr = ptrs[1];

if( cvtsrc1 )
{
cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 );
sptr1 = buf1;
}

const uchar* extSptr1 = sptr1;
const uchar* extSptr2 = sptr2;
if( swapped12 )
std::swap(sptr1, sptr2);

if( !haveMask && !cvtdst )
func( sptr1, 1, sptr2, 1, dptr, 1, bszn.width, bszn.height, usrdata );
else
std::swap(extSptr1, extSptr1);

// try to perform operation with conversion in one call
// if fail, use converter functions
uchar* opconverted = haveMask ? maskbuf : dptr;
if (!extendedFunc || extendedFunc(extSptr1, 1, extSptr2, 1, opconverted, 1,
bszn.width, bszn.height, usrdata) != 0)
{
func( sptr1, 1, sptr2, 1, wbuf, 1, bszn.width, bszn.height, usrdata );
if( !haveMask )
cvtdst( wbuf, 1, 0, 1, dptr, 1, bszn, 0 );
else if( !cvtdst )
if( cvtsrc1 )
{
copymask( wbuf, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz );
ptrs[2] += bsz;
cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 );
sptr1 = buf1;
}
else

if( swapped12 )
std::swap(sptr1, sptr2);

uchar* fdst = ( haveMask || cvtdst ) ? wbuf : dptr;
func( sptr1, 1, sptr2, 1, fdst, 1, bszn.width, bszn.height, usrdata );

if (cvtdst)
{
cvtdst( wbuf, 1, 0, 1, maskbuf, 1, bszn, 0 );
copymask( maskbuf, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz );
ptrs[2] += bsz;
uchar* cdst = haveMask ? maskbuf : dptr;
cvtdst(wbuf, 1, 0, 1, cdst, 1, bszn, 0);
}
opconverted = cvtdst ? maskbuf : wbuf;
}

if (haveMask)
{
copymask(opconverted, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz);
ptrs[2] += bsz;
}

ptrs[0] += bsz*esz1; ptrs[1] += bsz*dsz;
}
}
Expand Down

0 comments on commit a5ee850

Please sign in to comment.