Skip to content

Commit

Permalink
[Clang][Intrinsics][avx512] Continue Adding round cvt to clang
Browse files Browse the repository at this point in the history
And remove trailing spaces in intrinsic f test
Differential Revision: http://reviews.llvm.org/D20810

llvm-svn: 271398
  • Loading branch information
michaelz-eng committed Jun 1, 2016
1 parent 47d1e37 commit 6170c15
Show file tree
Hide file tree
Showing 2 changed files with 151 additions and 23 deletions.
66 changes: 65 additions & 1 deletion clang/lib/Headers/avx512fintrin.h
Expand Up @@ -3585,6 +3585,27 @@ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)

/* Conversion */

#define _mm512_cvtt_roundps_epu32( __A, __R) __extension__ ({ \
__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\
(__v16si)\
_mm512_undefined_epi32 (),\
(__mmask16) -1,( __R));\
})

#define _mm512_mask_cvtt_roundps_epu32( __W, __U, __A, __R) __extension__ ({ \
__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\
(__v16si)( __W),\
(__mmask16)( __U),( __R));\
})

#define _mm512_maskz_cvtt_roundps_epu32( __U, __A, __R) __extension__ ({ \
__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\
(__v16si)\
_mm512_setzero_si512 (),\
(__mmask16)( __U),( __R));\
})


static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_cvttps_epu32(__m512 __A)
{
Expand Down Expand Up @@ -3795,6 +3816,28 @@ _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
_MM_FROUND_CUR_DIRECTION);
}

#define _mm512_cvt_roundps_ph( __A, __I) __extension__ ({ \
(__m256i)__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\
(__I),\
(__v16hi)_mm256_undefined_si256 (),\
(__mmask16) -1);\
})

#define _mm512_mask_cvt_roundps_ph( __U, __W, __A, __I) __extension__ ({ \
(__m256i)__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\
(__I),\
(__v16hi)( __U),\
(__mmask16)( __W));\
})

#define _mm512_maskz_cvt_roundps_ph( __W, __A, __I) __extension__ ({ \
(__m256i)__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\
(__I),\
(__v16hi)\
_mm256_setzero_si256 (),\
(__mmask16)( __W));\
})

#define _mm512_cvtps_ph(A, I) __extension__ ({ \
(__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
(__v16hi)_mm256_setzero_si256(), \
Expand All @@ -3809,7 +3852,28 @@ _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
(__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
(__v16hi)_mm256_setzero_si256(), \
(__mmask16)(W)); })


#define _mm512_cvt_roundph_ps( __A, __R) __extension__ ({ \
__builtin_ia32_vcvtph2ps512_mask ((__v16hi)( __A),\
(__v16sf)\
_mm512_undefined_ps (),\
(__mmask16) -1,( __R));\
})

#define _mm512_mask_cvt_roundph_ps( __W, __U, __A, __R) __extension__ ({ \
__builtin_ia32_vcvtph2ps512_mask ((__v16hi)( __A),\
(__v16sf)( __W),\
(__mmask16)( __U),( __R));\
})

#define _mm512_maskz_cvt_roundph_ps( __U, __A, __R) __extension__ ({ \
__builtin_ia32_vcvtph2ps512_mask ((__v16hi)( __A),\
(__v16sf)\
_mm512_setzero_ps (),\
(__mmask16)( __U),( __R));\
})


static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_cvtph_ps(__m256i __A)
{
Expand Down
108 changes: 86 additions & 22 deletions clang/test/CodeGen/avx512f-builtins.c
Expand Up @@ -2948,19 +2948,19 @@ __m512 test_mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B) {
int test_mm_cvt_roundsd_si32(__m128d __A) {
// CHECK-LABEL: @test_mm_cvt_roundsd_si32
// CHECK: @llvm.x86.avx512.vcvtsd2si32
return _mm_cvt_roundsd_si32(__A, _MM_FROUND_CUR_DIRECTION);
return _mm_cvt_roundsd_si32(__A, _MM_FROUND_CUR_DIRECTION);
}

int test_mm_cvt_roundsd_i32(__m128d __A) {
// CHECK-LABEL: @test_mm_cvt_roundsd_i32
// CHECK: @llvm.x86.avx512.vcvtsd2si32
return _mm_cvt_roundsd_i32(__A, _MM_FROUND_CUR_DIRECTION);
return _mm_cvt_roundsd_i32(__A, _MM_FROUND_CUR_DIRECTION);
}

unsigned test_mm_cvt_roundsd_u32(__m128d __A) {
// CHECK-LABEL: @test_mm_cvt_roundsd_u32
// CHECK: @llvm.x86.avx512.vcvtsd2usi32
return _mm_cvt_roundsd_u32(__A, _MM_FROUND_CUR_DIRECTION);
return _mm_cvt_roundsd_u32(__A, _MM_FROUND_CUR_DIRECTION);
}

unsigned test_mm_cvtsd_u32(__m128d __A) {
Expand All @@ -2972,7 +2972,7 @@ unsigned test_mm_cvtsd_u32(__m128d __A) {
unsigned long long test_mm_cvt_roundsd_u64(__m128d __A) {
// CHECK-LABEL: @test_mm_cvt_roundsd_u64
// CHECK: @llvm.x86.avx512.vcvtsd2usi64
return _mm_cvt_roundsd_u64(__A, _MM_FROUND_CUR_DIRECTION);
return _mm_cvt_roundsd_u64(__A, _MM_FROUND_CUR_DIRECTION);
}

unsigned long long test_mm_cvtsd_u64(__m128d __A) {
Expand All @@ -2984,31 +2984,31 @@ unsigned long long test_mm_cvtsd_u64(__m128d __A) {
int test_mm_cvt_roundss_si32(__m128 __A) {
// CHECK-LABEL: @test_mm_cvt_roundss_si32
// CHECK: @llvm.x86.avx512.vcvtss2si32
return _mm_cvt_roundss_si32(__A, _MM_FROUND_CUR_DIRECTION);
return _mm_cvt_roundss_si32(__A, _MM_FROUND_CUR_DIRECTION);
}

int test_mm_cvt_roundss_i32(__m128 __A) {
// CHECK-LABEL: @test_mm_cvt_roundss_i32
// CHECK: @llvm.x86.avx512.vcvtss2si32
return _mm_cvt_roundss_i32(__A, _MM_FROUND_CUR_DIRECTION);
return _mm_cvt_roundss_i32(__A, _MM_FROUND_CUR_DIRECTION);
}

int test_mm_cvt_roundss_si64(__m128 __A) {
// CHECK-LABEL: @test_mm_cvt_roundss_si64
// CHECK: @llvm.x86.avx512.vcvtss2si64
return _mm_cvt_roundss_si64(__A, _MM_FROUND_CUR_DIRECTION);
return _mm_cvt_roundss_si64(__A, _MM_FROUND_CUR_DIRECTION);
}

long long test_mm_cvt_roundss_i64(__m128 __A) {
// CHECK-LABEL: @test_mm_cvt_roundss_i64
// CHECK: @llvm.x86.avx512.vcvtss2si64
return _mm_cvt_roundss_i64(__A, _MM_FROUND_CUR_DIRECTION);
return _mm_cvt_roundss_i64(__A, _MM_FROUND_CUR_DIRECTION);
}

unsigned test_mm_cvt_roundss_u32(__m128 __A) {
// CHECK-LABEL: @test_mm_cvt_roundss_u32
// CHECK: @llvm.x86.avx512.vcvtss2usi32
return _mm_cvt_roundss_u32(__A, _MM_FROUND_CUR_DIRECTION);
return _mm_cvt_roundss_u32(__A, _MM_FROUND_CUR_DIRECTION);
}

unsigned test_mm_cvtss_u32(__m128 __A) {
Expand All @@ -3020,7 +3020,7 @@ unsigned test_mm_cvtss_u32(__m128 __A) {
unsigned long long test_mm_cvt_roundss_u64(__m128 __A) {
// CHECK-LABEL: @test_mm_cvt_roundss_u64
// CHECK: @llvm.x86.avx512.vcvtss2usi64
return _mm_cvt_roundss_u64(__A, _MM_FROUND_CUR_DIRECTION);
return _mm_cvt_roundss_u64(__A, _MM_FROUND_CUR_DIRECTION);
}

unsigned long long test_mm_cvtss_u64(__m128 __A) {
Expand All @@ -3032,13 +3032,13 @@ unsigned long long test_mm_cvtss_u64(__m128 __A) {
int test_mm_cvtt_roundsd_i32(__m128d __A) {
// CHECK-LABEL: @test_mm_cvtt_roundsd_i32
// CHECK: @llvm.x86.avx512.cvttsd2si
return _mm_cvtt_roundsd_i32(__A, _MM_FROUND_CUR_DIRECTION);
return _mm_cvtt_roundsd_i32(__A, _MM_FROUND_CUR_DIRECTION);
}

int test_mm_cvtt_roundsd_si32(__m128d __A) {
// CHECK-LABEL: @test_mm_cvtt_roundsd_si32
// CHECK: @llvm.x86.avx512.cvttsd2si
return _mm_cvtt_roundsd_si32(__A, _MM_FROUND_CUR_DIRECTION);
return _mm_cvtt_roundsd_si32(__A, _MM_FROUND_CUR_DIRECTION);
}

int test_mm_cvttsd_i32(__m128d __A) {
Expand All @@ -3050,13 +3050,13 @@ int test_mm_cvttsd_i32(__m128d __A) {
unsigned long long test_mm_cvtt_roundsd_si64(__m128d __A) {
// CHECK-LABEL: @test_mm_cvtt_roundsd_si64
// CHECK: @llvm.x86.avx512.cvttsd2si64
return _mm_cvtt_roundsd_si64(__A, _MM_FROUND_CUR_DIRECTION);
return _mm_cvtt_roundsd_si64(__A, _MM_FROUND_CUR_DIRECTION);
}

long long test_mm_cvtt_roundsd_i64(__m128d __A) {
// CHECK-LABEL: @test_mm_cvtt_roundsd_i64
// CHECK: @llvm.x86.avx512.cvttsd2si64
return _mm_cvtt_roundsd_i64(__A, _MM_FROUND_CUR_DIRECTION);
return _mm_cvtt_roundsd_i64(__A, _MM_FROUND_CUR_DIRECTION);
}

long long test_mm_cvttsd_i64(__m128d __A) {
Expand All @@ -3068,7 +3068,7 @@ long long test_mm_cvttsd_i64(__m128d __A) {
unsigned test_mm_cvtt_roundsd_u32(__m128d __A) {
// CHECK-LABEL: @test_mm_cvtt_roundsd_u32
// CHECK: @llvm.x86.avx512.cvttsd2usi
return _mm_cvtt_roundsd_u32(__A, _MM_FROUND_CUR_DIRECTION);
return _mm_cvtt_roundsd_u32(__A, _MM_FROUND_CUR_DIRECTION);
}

unsigned test_mm_cvttsd_u32(__m128d __A) {
Expand All @@ -3080,7 +3080,7 @@ unsigned test_mm_cvttsd_u32(__m128d __A) {
unsigned long long test_mm_cvtt_roundsd_u64(__m128d __A) {
// CHECK-LABEL: @test_mm_cvtt_roundsd_u64
// CHECK: @llvm.x86.avx512.cvttsd2usi64
return _mm_cvtt_roundsd_u64(__A, _MM_FROUND_CUR_DIRECTION);
return _mm_cvtt_roundsd_u64(__A, _MM_FROUND_CUR_DIRECTION);
}

unsigned long long test_mm_cvttsd_u64(__m128d __A) {
Expand All @@ -3092,13 +3092,13 @@ unsigned long long test_mm_cvttsd_u64(__m128d __A) {
int test_mm_cvtt_roundss_i32(__m128 __A) {
// CHECK-LABEL: @test_mm_cvtt_roundss_i32
// CHECK: @llvm.x86.avx512.cvttss2si
return _mm_cvtt_roundss_i32(__A, _MM_FROUND_CUR_DIRECTION);
return _mm_cvtt_roundss_i32(__A, _MM_FROUND_CUR_DIRECTION);
}

int test_mm_cvtt_roundss_si32(__m128 __A) {
// CHECK-LABEL: @test_mm_cvtt_roundss_si32
// CHECK: @llvm.x86.avx512.cvttss2si
return _mm_cvtt_roundss_si32(__A, _MM_FROUND_CUR_DIRECTION);
return _mm_cvtt_roundss_si32(__A, _MM_FROUND_CUR_DIRECTION);
}

int test_mm_cvttss_i32(__m128 __A) {
Expand All @@ -3110,13 +3110,13 @@ int test_mm_cvttss_i32(__m128 __A) {
float test_mm_cvtt_roundss_i64(__m128 __A) {
// CHECK-LABEL: @test_mm_cvtt_roundss_i64
// CHECK: @llvm.x86.avx512.cvttss2si64
return _mm_cvtt_roundss_i64(__A, _MM_FROUND_CUR_DIRECTION);
return _mm_cvtt_roundss_i64(__A, _MM_FROUND_CUR_DIRECTION);
}

long long test_mm_cvtt_roundss_si64(__m128 __A) {
// CHECK-LABEL: @test_mm_cvtt_roundss_si64
// CHECK: @llvm.x86.avx512.cvttss2si64
return _mm_cvtt_roundss_si64(__A, _MM_FROUND_CUR_DIRECTION);
return _mm_cvtt_roundss_si64(__A, _MM_FROUND_CUR_DIRECTION);
}

long long test_mm_cvttss_i64(__m128 __A) {
Expand All @@ -3128,7 +3128,7 @@ long long test_mm_cvttss_i64(__m128 __A) {
unsigned test_mm_cvtt_roundss_u32(__m128 __A) {
// CHECK-LABEL: @test_mm_cvtt_roundss_u32
// CHECK: @llvm.x86.avx512.cvttss2usi
return _mm_cvtt_roundss_u32(__A, _MM_FROUND_CUR_DIRECTION);
return _mm_cvtt_roundss_u32(__A, _MM_FROUND_CUR_DIRECTION);
}

unsigned test_mm_cvttss_u32(__m128 __A) {
Expand All @@ -3140,7 +3140,7 @@ unsigned test_mm_cvttss_u32(__m128 __A) {
unsigned long long test_mm_cvtt_roundss_u64(__m128 __A) {
// CHECK-LABEL: @test_mm_cvtt_roundss_u64
// CHECK: @llvm.x86.avx512.cvttss2usi64
return _mm_cvtt_roundss_u64(__A, _MM_FROUND_CUR_DIRECTION);
return _mm_cvtt_roundss_u64(__A, _MM_FROUND_CUR_DIRECTION);
}

unsigned long long test_mm_cvttss_u64(__m128 __A) {
Expand All @@ -3149,6 +3149,70 @@ unsigned long long test_mm_cvttss_u64(__m128 __A) {
return _mm_cvttss_u64(__A);
}

__m512i test_mm512_cvtt_roundps_epu32(__m512 __A)
{
// CHECK-LABEL: @test_mm512_cvtt_roundps_epu32
// CHECK: @llvm.x86.avx512.mask.cvttps2udq.512
return _mm512_cvtt_roundps_epu32(__A, _MM_FROUND_CUR_DIRECTION);
}

__m512i test_mm512_mask_cvtt_roundps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
{
// CHECK-LABEL: @test_mm512_mask_cvtt_roundps_epu32
// CHECK: @llvm.x86.avx512.mask.cvttps2udq.512
return _mm512_mask_cvtt_roundps_epu32(__W, __U, __A, _MM_FROUND_CUR_DIRECTION);
}

__m512i test_mm512_maskz_cvtt_roundps_epu32( __mmask16 __U, __m512 __A)
{
// CHECK-LABEL: @test_mm512_maskz_cvtt_roundps_epu32
// CHECK: @llvm.x86.avx512.mask.cvttps2udq.512

return _mm512_maskz_cvtt_roundps_epu32(__U, __A, _MM_FROUND_CUR_DIRECTION);
}

__m256i test_mm512_cvt_roundps_ph(__m512 __A)
{
// CHECK-LABEL: @test_mm512_cvt_roundps_ph
// CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512
return _mm512_cvt_roundps_ph(__A, _MM_FROUND_CUR_DIRECTION);
}

__m256i test_mm512_mask_cvt_roundps_ph(__m256i __W , __mmask16 __U, __m512 __A)
{
// CHECK-LABEL: @test_mm512_mask_cvt_roundps_ph
// CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512
return _mm512_mask_cvt_roundps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION);
}

__m256i test_mm512_maskz_cvt_roundps_ph(__mmask16 __U, __m512 __A)
{
// CHECK-LABEL: @test_mm512_maskz_cvt_roundps_ph
// CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512
return _mm512_maskz_cvt_roundps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION);
}

__m512 test_mm512_cvt_roundph_ps(__m256i __A)
{
// CHECK-LABEL: @test_mm512_cvt_roundph_ps
// CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512
return _mm512_cvt_roundph_ps(__A, _MM_FROUND_CUR_DIRECTION);
}

__m512 test_mm512_mask_cvt_roundph_ps(__m512 __W, __mmask16 __U, __m256i __A)
{
// CHECK-LABEL: @test_mm512_mask_cvt_roundph_ps
// CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512
return _mm512_mask_cvt_roundph_ps(__W, __U, __A, _MM_FROUND_CUR_DIRECTION);
}

__m512 test_mm512_maskz_cvt_roundph_ps(__mmask16 __U, __m256i __A)
{
// CHECK-LABEL: @test_mm512_maskz_cvt_roundph_ps
// CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512
return _mm512_maskz_cvt_roundph_ps(__U, __A, _MM_FROUND_CUR_DIRECTION);
}

__m512 test_mm512_mask_cvt_roundepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
{
// CHECK-LABEL: @test_mm512_mask_cvt_roundepi32_ps
Expand Down

0 comments on commit 6170c15

Please sign in to comment.