Skip to content

Commit

Permalink
[Clang][AVX512][INTRINSICS] adding round cvt and fix regular cvtps_ph
Browse files Browse the repository at this point in the history
Differential Revision: http://reviews.llvm.org/D20870

llvm-svn: 271498
  • Loading branch information
michaelz-eng committed Jun 2, 2016
1 parent 1c2cb1d commit 9e7d0a9
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 8 deletions.
40 changes: 36 additions & 4 deletions clang/lib/Headers/avx512vlintrin.h
Expand Up @@ -9383,26 +9383,58 @@ _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
(__mmask8) __U);
}

#define _mm_mask_cvtps_ph(W, U, A, I) __extension__ ({ \
static __inline __m128i __DEFAULT_FN_ATTRS
_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A)
{
return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
(__v8hi) __W,
(__mmask8) __U);
}

static __inline __m128i __DEFAULT_FN_ATTRS
_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A)
{
return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
(__v8hi) _mm_setzero_si128 (),
(__mmask8) __U);
}

#define _mm_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \
(__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
(__v8hi)(__m128i)(W), \
(__mmask8)(U)); })

#define _mm_maskz_cvtps_ph(U, A, I) __extension__ ({ \
#define _mm_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \
(__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
(__v8hi)_mm_setzero_si128(), \
(__mmask8)(U)); })

#define _mm256_mask_cvtps_ph(W, U, A, I) __extension__ ({ \
static __inline __m128i __DEFAULT_FN_ATTRS
_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A)
{
return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
(__v8hi) __W,
(__mmask8) __U);
}

static __inline __m128i __DEFAULT_FN_ATTRS
_mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A)
{
return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
(__v8hi) _mm_setzero_si128(),
(__mmask8) __U);
}
#define _mm256_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \
(__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
(__v8hi)(__m128i)(W), \
(__mmask8)(U)); })

#define _mm256_maskz_cvtps_ph(U, A, I) __extension__ ({ \
#define _mm256_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \
(__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
(__v8hi)_mm_setzero_si128(), \
(__mmask8)(U)); })


#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS_BOTH

Expand Down
31 changes: 27 additions & 4 deletions clang/test/CodeGen/avx512vl-builtins.c
Expand Up @@ -6726,24 +6726,47 @@ __m256 test_mm256_maskz_cvtph_ps(__mmask8 __U, __m128i __A) {
__m128i test_mm_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m128 __A) {
// CHECK-LABEL: @test_mm_mask_cvtps_ph
// CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128
return _mm_mask_cvtps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION);
return _mm_mask_cvtps_ph(__W, __U, __A);
}

__m128i test_mm_maskz_cvtps_ph(__mmask8 __U, __m128 __A) {
// CHECK-LABEL: @test_mm_maskz_cvtps_ph
// CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128
return _mm_maskz_cvtps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION);
return _mm_maskz_cvtps_ph(__U, __A);
}

__m128i test_mm256_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m256 __A) {
// CHECK-LABEL: @test_mm256_mask_cvtps_ph
// CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256
return _mm256_mask_cvtps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION);
return _mm256_mask_cvtps_ph(__W, __U, __A);
}

__m128i test_mm256_maskz_cvtps_ph(__mmask8 __U, __m256 __A) {
// CHECK-LABEL: @test_mm256_maskz_cvtps_ph
// CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256
return _mm256_maskz_cvtps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION);
return _mm256_maskz_cvtps_ph(__U, __A);
}

__m128i test_mm_mask_cvt_roundps_ph(__m128i __W, __mmask8 __U, __m128 __A) {
// CHECK-LABEL: @test_mm_mask_cvt_roundps_ph
// CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128
return _mm_mask_cvt_roundps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION);
}

__m128i test_mm_maskz_cvt_roundps_ph(__mmask8 __U, __m128 __A) {
// CHECK-LABEL: @test_mm_maskz_cvt_roundps_ph
// CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128
return _mm_maskz_cvt_roundps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION);
}

__m128i test_mm256_mask_cvt_roundps_ph(__m128i __W, __mmask8 __U, __m256 __A) {
// CHECK-LABEL: @test_mm256_mask_cvt_roundps_ph
// CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256
return _mm256_mask_cvt_roundps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION);
}

__m128i test_mm256_maskz_cvt_roundps_ph(__mmask8 __U, __m256 __A) {
// CHECK-LABEL: @test_mm256_maskz_cvt_roundps_ph
// CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256
return _mm256_maskz_cvt_roundps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION);
}

0 comments on commit 9e7d0a9

Please sign in to comment.