Skip to content

Commit

Permalink
Lowering Mask Set1 intrinsics to LLVM IR
Browse files Browse the repository at this point in the history
This patch, together with a matching llvm patch (https://reviews.llvm.org/D37669), implements the lowering of X86 mask set1 intrinsics to IR.

Differential Revision: https://reviews.llvm.org/D37668

llvm-svn: 313624
  • Loading branch information
Jina Nahias committed Sep 19, 2017
1 parent cf0a22f commit 3ad702a
Show file tree
Hide file tree
Showing 10 changed files with 655 additions and 172 deletions.
9 changes: 0 additions & 9 deletions clang/include/clang/Basic/BuiltinsX86.def
Expand Up @@ -973,7 +973,6 @@ TARGET_BUILTIN(__builtin_ia32_pmuldq512, "V8LLiV16iV16i", "", "avx512f")
TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8LLiV16iV16i", "", "avx512f")
TARGET_BUILTIN(__builtin_ia32_ptestmd512, "UsV16iV16iUs", "", "avx512f")
TARGET_BUILTIN(__builtin_ia32_ptestmq512, "UcV8LLiV8LLiUc", "", "avx512f")
TARGET_BUILTIN(__builtin_ia32_pbroadcastd512_gpr_mask, "V16iiV16iUs", "", "avx512f")
TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "", "avx512f")
TARGET_BUILTIN(__builtin_ia32_loaddqudi512_mask, "V8LLiLLiC*V8LLiUc", "", "avx512f")
TARGET_BUILTIN(__builtin_ia32_loadups512_mask, "V16ffC*V16fUs", "", "avx512f")
Expand Down Expand Up @@ -1374,11 +1373,6 @@ TARGET_BUILTIN(__builtin_ia32_movdqa64load128_mask, "V2LLiV2LLiC*V2LLiUc","","av
TARGET_BUILTIN(__builtin_ia32_movdqa64load256_mask, "V4LLiV4LLiC*V4LLiUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, "vV2LLi*V2LLiUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, "vV4LLi*V4LLiUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_pbroadcastb512_gpr_mask, "V64ccV64cULLi","","avx512bw")
TARGET_BUILTIN(__builtin_ia32_pbroadcastb128_gpr_mask, "V16ccV16cUs","","avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_pbroadcastb256_gpr_mask, "V32ccV32cUi","","avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_pbroadcastd128_gpr_mask, "V4iiV4iUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pbroadcastd256_gpr_mask, "V8iiV8iUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512_mask, "V8LLiV8LLiV8LLiV8LLiUc","","avx512ifma")
TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc","","avx512ifma")
TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512_mask, "V8LLiV8LLiV8LLiV8LLiUc","","avx512ifma")
Expand Down Expand Up @@ -1589,9 +1583,6 @@ TARGET_BUILTIN(__builtin_ia32_broadcastmb128, "V2LLiUc","","avx512cd,avx512vl")
TARGET_BUILTIN(__builtin_ia32_broadcastmb256, "V4LLiUc","","avx512cd,avx512vl")
TARGET_BUILTIN(__builtin_ia32_broadcastmw128, "V4iUs","","avx512cd,avx512vl")
TARGET_BUILTIN(__builtin_ia32_broadcastmw256, "V8iUs","","avx512cd,avx512vl")
TARGET_BUILTIN(__builtin_ia32_pbroadcastw512_gpr_mask, "V32shV32sUi","","avx512bw")
TARGET_BUILTIN(__builtin_ia32_pbroadcastw256_gpr_mask, "V16shV16sUs","","avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_pbroadcastw128_gpr_mask, "V8ssV8sUc","","avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovsdb512_mask, "V16cV16iV16cUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovsdb512mem_mask, "vV16c*V16iUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovswb512mem_mask, "vV32c*V32sUi","","avx512bw")
Expand Down
3 changes: 0 additions & 3 deletions clang/include/clang/Basic/BuiltinsX86_64.def
Expand Up @@ -71,9 +71,6 @@ TARGET_BUILTIN(__builtin_ia32_pext_di, "ULLiULLiULLi", "", "bmi2")
TARGET_BUILTIN(__builtin_ia32_bextri_u64, "ULLiULLiIULLi", "", "tbm")
TARGET_BUILTIN(__builtin_ia32_lwpins64, "UcULLiUiUi", "", "lwp")
TARGET_BUILTIN(__builtin_ia32_lwpval64, "vULLiUiUi", "", "lwp")
TARGET_BUILTIN(__builtin_ia32_pbroadcastq512_gpr_mask, "V8LLiLLiV8LLiUc", "", "avx512f")
TARGET_BUILTIN(__builtin_ia32_pbroadcastq128_gpr_mask, "V2LLiULLiV2LLiUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pbroadcastq256_gpr_mask, "V4LLiULLiV4LLiUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_vcvtsd2si64, "LLiV2dIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi64, "ULLiV2dIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_vcvtss2si64, "LLiV4fIi","","avx512f")
Expand Down
25 changes: 12 additions & 13 deletions clang/lib/Headers/avx512bwintrin.h
Expand Up @@ -2026,18 +2026,17 @@ _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A)
{
return (__m512i) __builtin_ia32_pbroadcastb512_gpr_mask (__A,
(__v64qi) __O,
__M);
return (__m512i) __builtin_ia32_selectb_512(__M,
(__v64qi)_mm512_set1_epi8(__A),
(__v64qi) __O);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
{
return (__m512i) __builtin_ia32_pbroadcastb512_gpr_mask (__A,
(__v64qi)
_mm512_setzero_qi(),
__M);
return (__m512i) __builtin_ia32_selectb_512(__M,
(__v64qi) _mm512_set1_epi8(__A),
(__v64qi) _mm512_setzero_si512());
}

static __inline__ __mmask64 __DEFAULT_FN_ATTRS
Expand Down Expand Up @@ -2217,17 +2216,17 @@ _mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A)
{
return (__m512i) __builtin_ia32_pbroadcastw512_gpr_mask (__A,
(__v32hi) __O,
__M);
return (__m512i) __builtin_ia32_selectw_512(__M,
(__v32hi) _mm512_set1_epi16(__A),
(__v32hi) __O);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_set1_epi16 (__mmask32 __M, short __A)
{
return (__m512i) __builtin_ia32_pbroadcastw512_gpr_mask (__A,
(__v32hi) _mm512_setzero_hi(),
__M);
return (__m512i) __builtin_ia32_selectw_512(__M,
(__v32hi) _mm512_set1_epi16(__A),
(__v32hi) _mm512_setzero_si512());
}

static __inline__ __m512i __DEFAULT_FN_ATTRS
Expand Down
47 changes: 24 additions & 23 deletions clang/lib/Headers/avx512fintrin.h
Expand Up @@ -258,25 +258,6 @@ _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
(__v8di) _mm512_setzero_si512());
}

static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
{
return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
(__v16si)
_mm512_setzero_si512 (),
__M);
}

#ifdef __x86_64__
static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
{
return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
(__v8di)
_mm512_setzero_si512 (),
__M);
}
#endif

static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_setzero_ps(void)
Expand Down Expand Up @@ -335,12 +316,30 @@ _mm512_set1_epi32(int __s)
__s, __s, __s, __s, __s, __s, __s, __s };
}

static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
{
return (__m512i)__builtin_ia32_selectd_512(__M,
(__v16si)_mm512_set1_epi32(__A),
(__v16si)_mm512_setzero_si512());
}

static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_set1_epi64(long long __d)
{
return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
}

#ifdef __x86_64__
static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
{
return (__m512i)__builtin_ia32_selectq_512(__M,
(__v8di)_mm512_set1_epi64(__A),
(__v8di)_mm512_setzero_si512());
}
#endif

static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_broadcastss_ps(__m128 __A)
{
Expand Down Expand Up @@ -9737,16 +9736,18 @@ _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
{
return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
__M);
return (__m512i) __builtin_ia32_selectd_512(__M,
(__v16si) _mm512_set1_epi32(__A),
(__v16si) __O);
}

#ifdef __x86_64__
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
{
return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
__M);
return (__m512i) __builtin_ia32_selectq_512(__M,
(__v8di) _mm512_set1_epi64(__A),
(__v8di) __O);
}
#endif

Expand Down
50 changes: 24 additions & 26 deletions clang/lib/Headers/avx512vlbwintrin.h
Expand Up @@ -2660,35 +2660,33 @@ _mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A)
{
return (__m128i) __builtin_ia32_pbroadcastb128_gpr_mask (__A,
(__v16qi) __O,
__M);
return (__m128i) __builtin_ia32_selectb_128(__M,
(__v16qi) _mm_set1_epi8(__A),
(__v16qi) __O);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_set1_epi8 (__mmask16 __M, char __A)
{
return (__m128i) __builtin_ia32_pbroadcastb128_gpr_mask (__A,
(__v16qi)
_mm_setzero_si128 (),
__M);
return (__m128i) __builtin_ia32_selectb_128(__M,
(__v16qi) _mm_set1_epi8(__A),
(__v16qi) _mm_setzero_si128());
}

static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A)
{
return (__m256i) __builtin_ia32_pbroadcastb256_gpr_mask (__A,
(__v32qi) __O,
__M);
return (__m256i) __builtin_ia32_selectb_256(__M,
(__v32qi) _mm256_set1_epi8(__A),
(__v32qi) __O);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_set1_epi8 (__mmask32 __M, char __A)
{
return (__m256i) __builtin_ia32_pbroadcastb256_gpr_mask (__A,
(__v32qi)
_mm256_setzero_si256 (),
__M);
return (__m256i) __builtin_ia32_selectb_256(__M,
(__v32qi) _mm256_set1_epi8(__A),
(__v32qi) _mm256_setzero_si256());
}

static __inline__ __m128i __DEFAULT_FN_ATTRS
Expand Down Expand Up @@ -3025,33 +3023,33 @@ _mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A)
{
return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A,
(__v16hi) __O,
__M);
return (__m256i) __builtin_ia32_selectw_256 (__M,
(__v16hi) _mm256_set1_epi16(__A),
(__v16hi) __O);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_set1_epi16 (__mmask16 __M, short __A)
{
return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A,
(__v16hi) _mm256_setzero_si256 (),
__M);
return (__m256i) __builtin_ia32_selectw_256(__M,
(__v16hi)_mm256_set1_epi16(__A),
(__v16hi) _mm256_setzero_si256());
}

static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A)
{
return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A,
(__v8hi) __O,
__M);
return (__m128i) __builtin_ia32_selectw_128(__M,
(__v8hi) _mm_set1_epi16(__A),
(__v8hi) __O);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_set1_epi16 (__mmask8 __M, short __A)
{
return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A,
(__v8hi) _mm_setzero_si128 (),
__M);
return (__m128i) __builtin_ia32_selectw_128(__M,
(__v8hi) _mm_set1_epi16(__A),
(__v8hi) _mm_setzero_si128());
}

static __inline__ __m128i __DEFAULT_FN_ATTRS
Expand Down
69 changes: 41 additions & 28 deletions clang/lib/Headers/avx512vlintrin.h
Expand Up @@ -5723,59 +5723,72 @@ _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
(__v4df)_mm256_setzero_pd());
}

static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
{
return (__m128i)__builtin_ia32_selectd_128(__M,
(__v4si) _mm_set1_epi32(__A),
(__v4si)__O);
}

#define _mm_mask_set1_epi32(O, M, A) __extension__ ({ \
(__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \
(__v4si)(__m128i)(O), \
(__mmask8)(M)); })
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_set1_epi32( __mmask8 __M, int __A)
{
return (__m128i)__builtin_ia32_selectd_128(__M,
(__v4si) _mm_set1_epi32(__A),
(__v4si)_mm_setzero_si128());
}

#define _mm_maskz_set1_epi32(M, A) __extension__ ({ \
(__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \
(__v4si)_mm_setzero_si128(), \
(__mmask8)(M)); })
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
{
return (__m256i)__builtin_ia32_selectd_256(__M,
(__v8si) _mm256_set1_epi32(__A),
(__v8si)__O);
}

#define _mm256_mask_set1_epi32(O, M, A) __extension__ ({ \
(__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \
(__v8si)(__m256i)(O), \
(__mmask8)(M)); })
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_set1_epi32( __mmask8 __M, int __A)
{
return (__m256i)__builtin_ia32_selectd_256(__M,
(__v8si) _mm256_set1_epi32(__A),
(__v8si)_mm256_setzero_si256());
}

#define _mm256_maskz_set1_epi32(M, A) __extension__ ({ \
(__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \
(__v8si)_mm256_setzero_si256(), \
(__mmask8)(M)); })

#ifdef __x86_64__
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
{
return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
__M);
return (__m128i) __builtin_ia32_selectq_128(__M,
(__v2di) _mm_set1_epi8(__A),
(__v2di) __O);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
{
return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
(__v2di)
_mm_setzero_si128 (),
__M);
return (__m128i) __builtin_ia32_selectq_128(__M,
(__v2di) _mm_set1_epi8(__A),
(__v2di) _mm_setzero_si128());
}

static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
{
return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
__M);
return (__m256i) __builtin_ia32_selectq_256(__M,
(__v4di) _mm256_set1_epi64x(__A),
(__v4di) __O) ;
}

static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
{
return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
(__v4di)
_mm256_setzero_si256 (),
__M);
return (__m256i) __builtin_ia32_selectq_256(__M,
(__v4di) _mm256_set1_epi64x(__A),
(__v4di) _mm256_setzero_si256());
}

#endif

#define _mm_fixupimm_pd(A, B, C, imm) __extension__ ({ \
Expand Down

0 comments on commit 3ad702a

Please sign in to comment.