Skip to content

Commit

Permalink
[CLANG] [AVX512] [BUILTIN] Adding prol{d|q|w}{128|256|512} builtin to…
Browse files Browse the repository at this point in the history
… clang .

Differential Revision: http://reviews.llvm.org/D16985

llvm-svn: 261516
  • Loading branch information
michaelz-eng committed Feb 22, 2016
1 parent 84f2f18 commit 38a2727
Show file tree
Hide file tree
Showing 5 changed files with 225 additions and 2 deletions.
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/BuiltinsX86.def
Expand Up @@ -1608,6 +1608,12 @@ TARGET_BUILTIN(__builtin_ia32_pmovzxwd128_mask, "V4iV8sV4iUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovzxwd256_mask, "V8iV8sV8iUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovzxwq128_mask, "V2LLiV8sV2LLiUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovzxwq256_mask, "V4LLiV8sV4LLiUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_prold512_mask, "V16iV16iIiV16iUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_prolq512_mask, "V8LLiV8LLiIiV8LLiUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_prold128_mask, "V4iV4iIiV4iUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_prold256_mask, "V8iV8iIiV8iUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_prolq128_mask, "V2LLiV2LLiIiV2LLiUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_prolq256_mask, "V4LLiV4LLiIiV4LLiUc","","avx512vl")

#undef BUILTIN
#undef TARGET_BUILTIN
35 changes: 35 additions & 0 deletions clang/lib/Headers/avx512fintrin.h
Expand Up @@ -3330,6 +3330,41 @@ _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
(__v8di)(__m512i)(b), (p), \
(__mmask8)(m)); })

#define _mm512_rol_epi32(a, b) __extension__ ({ \
(__m512i) __builtin_ia32_prold512_mask ((__v16si) (a), (b),\
(__v16si)\
_mm512_setzero_si512 (),\
(__mmask16) -1); })

#define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \
(__m512i) __builtin_ia32_prold512_mask ((__v16si) (a), (b),\
(__v16si) (W),\
(__mmask16) (U)); })

#define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \
(__m512i) __builtin_ia32_prold512_mask ((__v16si) (a), (b),\
(__v16si)\
_mm512_setzero_si512 (),\
(__mmask16) (U)); })

#define _mm512_rol_epi64(a, b) __extension__ ({ \
(__m512i) __builtin_ia32_prolq512_mask ((__v8di) (a), (b),\
(__v8di)\
_mm512_setzero_si512 (),\
(__mmask8) -1); })

#define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \
(__m512i) __builtin_ia32_prolq512_mask ((__v8di) (a), (b),\
(__v8di) (W),\
(__mmask8) (U)); })

#define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \
(__m512i) __builtin_ia32_prolq512_mask ((__v8di) (a), (b),\
(__v8di)\
_mm512_setzero_si512 (),\
(__mmask8) (U)); })


#undef __DEFAULT_FN_ATTRS

#endif // __AVX512FINTRIN_H
73 changes: 73 additions & 0 deletions clang/lib/Headers/avx512vlintrin.h
Expand Up @@ -31,6 +31,11 @@
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl")))
#define __DEFAULT_FN_ATTRS_BOTH __attribute__((__always_inline__, __nodebug__, __target__("avx512vl, avx512bw")))

static __inline __v2di __DEFAULT_FN_ATTRS
_mm_setzero_di(void) {
return (__v2di){ 0, 0};
}

/* Integer compare */

static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
Expand Down Expand Up @@ -4942,6 +4947,74 @@ _mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
}


#define _mm_rol_epi32(a, b) __extension__ ({\
(__m128i)__builtin_ia32_prold128_mask((__v4si) (a), (b),\
(__v4si)\
_mm_setzero_si128 (),\
(__mmask8) -1); })

#define _mm_mask_rol_epi32(w, u, a, b) __extension__ ({\
(__m128i)__builtin_ia32_prold128_mask((__v4si) (a), (b),\
(__v4si) (w),\
(__mmask8) (u)); })

#define _mm_maskz_rol_epi32(u, a, b) __extension__ ({\
(__m128i)__builtin_ia32_prold128_mask((__v4si) (a), (b),\
(__v4si)\
_mm_setzero_si128 (),\
(__mmask8) (u)); })

#define _mm256_rol_epi32(a, b) __extension__ ({\
(__m256i)__builtin_ia32_prold256_mask((__v8si) (a), (b),\
(__v8si)\
_mm256_setzero_si256 (),\
(__mmask8) -1); })

#define _mm256_mask_rol_epi32(w, u, a, b) __extension__ ({\
(__m256i)__builtin_ia32_prold256_mask((__v8si) (a), (b),\
(__v8si) (w),\
(__mmask8) (u)); })

#define _mm256_maskz_rol_epi32(u, a, b) __extension__ ({\
(__m256i)__builtin_ia32_prold256_mask((__v8si) (a), (b),\
(__v8si)\
_mm256_setzero_si256 (),\
(__mmask8) (u)); })

#define _mm_rol_epi64(a, b) _extension__ ({\
(__m128i)__builtin_ia32_prolq128_mask((__v2di) (a), (b),\
(__v2di)\
_mm_setzero_di (),\
(__mmask8) -1); })

#define _mm_mask_rol_epi64(w, u, a, b) __extension__ ({\
(__m128i)__builtin_ia32_prolq128_mask((__v2di) (a), (b),\
(__v2di) (w),\
(__mmask8) (u)); })

#define _mm_maskz_rol_epi64(u, a, b) __extension__ ({\
(__m128i)__builtin_ia32_prolq128_mask((__v2di) (a), (b),\
(__v2di)\
_mm_setzero_di(),\
(__mmask8) (u)); })

#define _mm256_rol_epi64(a, b) __extension__ ({\
(__m256i)__builtin_ia32_prolq256_mask((__v4di) (a), (b),\
(__v4di)\
_mm256_setzero_si256 (),\
(__mmask8) -1); })

#define _mm256_mask_rol_epi64(w, u, a, b) __extension__ ({\
(__m256i)__builtin_ia32_prolq256_mask((__v4di) (a), (b),\
(__v4di) (w),\
(__mmask8) (u)); })

#define _mm256_maskz_rol_epi64(u, a, b) __extension__ ({\
(__m256i)__builtin_ia32_prolq256_mask((__v4di) (a), (b),\
(__v4di)\
_mm256_setzero_si256 (),\
(__mmask8) (u)); })

#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS_BOTH

Expand Down
39 changes: 38 additions & 1 deletion clang/test/CodeGen/avx512f-builtins.c
Expand Up @@ -2078,4 +2078,41 @@ __m512i test_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: @test_mm512_maskz_cvtepu16_epi64
// CHECK: @llvm.x86.avx512.mask.pmovzxw.q.512
return _mm512_maskz_cvtepu16_epi64(__U, __A);
}
}


__m512i test_mm512_rol_epi32(__m512i __A) {
// CHECK-LABEL: @test_mm512_rol_epi32
// CHECK: @llvm.x86.avx512.mask.prol.d.512
return _mm512_rol_epi32(__A, 5);
}

__m512i test_mm512_mask_rol_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_mask_rol_epi32
// CHECK: @llvm.x86.avx512.mask.prol.d.512
return _mm512_mask_rol_epi32(__W, __U, __A, 5);
}

__m512i test_mm512_maskz_rol_epi32(__mmask16 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_maskz_rol_epi32
// CHECK: @llvm.x86.avx512.mask.prol.d.512
return _mm512_maskz_rol_epi32(__U, __A, 5);
}

__m512i test_mm512_rol_epi64(__m512i __A) {
// CHECK-LABEL: @test_mm512_rol_epi64
// CHECK: @llvm.x86.avx512.mask.prol.q.512
return _mm512_rol_epi64(__A, 5);
}

__m512i test_mm512_mask_rol_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_mask_rol_epi64
// CHECK: @llvm.x86.avx512.mask.prol.q.512
return _mm512_mask_rol_epi64(__W, __U, __A, 5);
}

__m512i test_mm512_maskz_rol_epi64(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_maskz_rol_epi64
// CHECK: @llvm.x86.avx512.mask.prol.q.512
return _mm512_maskz_rol_epi64(__U, __A, 5);
}
74 changes: 73 additions & 1 deletion clang/test/CodeGen/avx512vl-builtins.c
Expand Up @@ -3393,4 +3393,76 @@ __m256i test_mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: @test_mm256_maskz_cvtepu16_epi64
// CHECK: @llvm.x86.avx512.mask.pmovzxw.q.256
return _mm256_maskz_cvtepu16_epi64(__U, __A);
}
}

__m128i test_mm_rol_epi32(__m128i __A) {
// CHECK-LABEL: @test_mm_rol_epi32
// CHECK: @llvm.x86.avx512.mask.prol.d.128
return _mm_rol_epi32(__A, 5);
}

__m128i test_mm_mask_rol_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: @test_mm_mask_rol_epi32
// CHECK: @llvm.x86.avx512.mask.prol.d.128
return _mm_mask_rol_epi32(__W, __U, __A, 5);
}

__m128i test_mm_maskz_rol_epi32(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: @test_mm_maskz_rol_epi32
// CHECK: @llvm.x86.avx512.mask.prol.d.128
return _mm_maskz_rol_epi32(__U, __A, 5);
}

__m256i test_mm256_rol_epi32(__m256i __A) {
// CHECK-LABEL: @test_mm256_rol_epi32
// CHECK: @llvm.x86.avx512.mask.prol.d.256
return _mm256_rol_epi32(__A, 5);
}

__m256i test_mm256_mask_rol_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK-LABEL: @test_mm256_mask_rol_epi32
// CHECK: @llvm.x86.avx512.mask.prol.d.256
return _mm256_mask_rol_epi32(__W, __U, __A, 5);
}

__m256i test_mm256_maskz_rol_epi32(__mmask8 __U, __m256i __A) {
// CHECK-LABEL: @test_mm256_maskz_rol_epi32
// CHECK: @llvm.x86.avx512.mask.prol.d.256
return _mm256_maskz_rol_epi32(__U, __A, 5);
}

__m128i test_mm_rol_epi64(__m128i __A) {
// CHECK-LABEL: @test_mm_rol_epi64
// CHECK: @llvm.x86.avx512.mask.prol.q.128
return _mm_rol_epi64(__A, 5);
}

__m128i test_mm_mask_rol_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: @test_mm_mask_rol_epi64
// CHECK: @llvm.x86.avx512.mask.prol.q.128
return _mm_mask_rol_epi64(__W, __U, __A, 5);
}

__m128i test_mm_maskz_rol_epi64(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: @test_mm_maskz_rol_epi64
// CHECK: @llvm.x86.avx512.mask.prol.q.128
return _mm_maskz_rol_epi64(__U, __A, 5);
}

__m256i test_mm256_rol_epi64(__m256i __A) {
// CHECK-LABEL: @test_mm256_rol_epi64
// CHECK: @llvm.x86.avx512.mask.prol.q.256
return _mm256_rol_epi64(__A, 5);
}

__m256i test_mm256_mask_rol_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK-LABEL: @test_mm256_mask_rol_epi64
// CHECK: @llvm.x86.avx512.mask.prol.q.256
return _mm256_mask_rol_epi64(__W, __U, __A, 5);
}

__m256i test_mm256_maskz_rol_epi64(__mmask8 __U, __m256i __A) {
// CHECK-LABEL: @test_mm256_maskz_rol_epi64
// CHECK: @llvm.x86.avx512.mask.prol.q.256
return _mm256_maskz_rol_epi64(__U, __A, 5);
}

0 comments on commit 38a2727

Please sign in to comment.