Skip to content

Commit

Permalink
ENH, SIMD: Implment intrinsic for mask division
Browse files Browse the repository at this point in the history
  • Loading branch information
seiko2plus committed Jan 29, 2023
1 parent 437c835 commit e9e8582
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 0 deletions.
13 changes: 13 additions & 0 deletions numpy/core/src/common/simd/avx512/maskop.h
Expand Up @@ -51,4 +51,17 @@ NPYV_IMPL_AVX512_MASK_ADDSUB(s64, b64, epi64)
NPYV_IMPL_AVX512_MASK_ADDSUB(f32, b32, ps)
NPYV_IMPL_AVX512_MASK_ADDSUB(f64, b64, pd)

// division, m ? a / b : c
NPY_FINLINE npyv_f32 npyv_ifdiv_f32(npyv_b32 m, npyv_f32 a, npyv_f32 b, npyv_f32 c)
{ return _mm512_mask_div_ps(c, m, a, b); }
// conditional division, m ? a / b : 0
NPY_FINLINE npyv_f32 npyv_ifdivz_f32(npyv_b32 m, npyv_f32 a, npyv_f32 b)
{ return _mm512_maskz_div_ps(m, a, b); }
// division, m ? a / b : c
NPY_FINLINE npyv_f64 npyv_ifdiv_f64(npyv_b32 m, npyv_f64 a, npyv_f64 b, npyv_f64 c)
{ return _mm512_mask_div_pd(c, m, a, b); }
// conditional division, m ? a / b : 0
NPY_FINLINE npyv_f64 npyv_ifdivz_f64(npyv_b32 m, npyv_f64 a, npyv_f64 b)
{ return _mm512_maskz_div_pd(m, a, b); }

#endif // _NPY_SIMD_AVX512_MASKOP_H
34 changes: 34 additions & 0 deletions numpy/core/src/common/simd/emulate_maskop.h
Expand Up @@ -42,5 +42,39 @@ NPYV_IMPL_EMULATE_MASK_ADDSUB(s64, b64)
#if NPY_SIMD_F64
NPYV_IMPL_EMULATE_MASK_ADDSUB(f64, b64)
#endif
#if NPY_SIMD_F32
// conditional division, m ? a / b : c
NPY_FINLINE npyv_f32
npyv_ifdiv_f32(npyv_b32 m, npyv_f32 a, npyv_f32 b, npyv_f32 c)
{
const npyv_f32 one = npyv_setall_f32(1.0f);
npyv_f32 div = npyv_div_f32(a, npyv_select_f32(m, b, one));
return npyv_select_f32(m, div, c);
}
// conditional division, m ? a / b : 0
NPY_FINLINE npyv_f32
npyv_ifdivz_f32(npyv_b32 m, npyv_f32 a, npyv_f32 b)
{
const npyv_f32 zero = npyv_zero_f32();
return npyv_ifdiv_f32(m, a, b, zero);
}
#endif
#if NPY_SIMD_F64
// conditional division, m ? a / b : c
NPY_FINLINE npyv_f64
npyv_ifdiv_f64(npyv_b64 m, npyv_f64 a, npyv_f64 b, npyv_f64 c)
{
const npyv_f64 one = npyv_setall_f64(1.0);
npyv_f64 div = npyv_div_f64(a, npyv_select_f64(m, b, one));
return npyv_select_f64(m, div, c);
}
// conditional division, m ? a / b : 0
NPY_FINLINE npyv_f64
npyv_ifdivz_f64(npyv_b64 m, npyv_f64 a, npyv_f64 b)
{
const npyv_f64 zero = npyv_zero_f64();
return npyv_ifdiv_f64(m, a, b, zero);
}
#endif

#endif // _NPY_SIMD_EMULATE_MASKOP_H

0 comments on commit e9e8582

Please sign in to comment.