Skip to content

Commit

Permalink
[CLANG][avx512][BUILTIN] Adding fixupimm{pd|ps|sd|ss}
Browse files Browse the repository at this point in the history
getexp{sd|ss} getmant{sd|ss} kunpck{di|si} loada{pd|ps} loaddqu{di|hi|qi|si} max{sd|ss} min{sd|ss} kmov16 builtins to clang


Differential Revision: http://reviews.llvm.org/D18215

llvm-svn: 264574
  • Loading branch information
michaelz-eng committed Mar 28, 2016
1 parent 5c83a09 commit def7875
Show file tree
Hide file tree
Showing 9 changed files with 1,234 additions and 0 deletions.
45 changes: 45 additions & 0 deletions clang/include/clang/Basic/BuiltinsX86.def
Original file line number Diff line number Diff line change
Expand Up @@ -1749,6 +1749,51 @@ TARGET_BUILTIN(__builtin_ia32_vpermt2varqi256_mask, "V32cV32cV32cV32cUi","","avx
TARGET_BUILTIN(__builtin_ia32_vpermt2varqi256_maskz, "V32cV32cV32cV32cUi","","avx512vbmi,avx512vl")
TARGET_BUILTIN(__builtin_ia32_vcomisd, "iV2dV2dIiIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_vcomiss, "iV4fV4fIiIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_kunpckdi, "ULLiULLiULLi","","avx512bw")
TARGET_BUILTIN(__builtin_ia32_kunpcksi, "UiUiUi","","avx512bw")
TARGET_BUILTIN(__builtin_ia32_loaddquhi512_mask, "V32sV32s*V32sUi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_loaddquqi512_mask, "V64cV64c*V64cULLi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_mask, "V8dV8dV8dV8LLiIiUcIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_maskz, "V8dV8dV8dV8LLiIiUcIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_fixupimmps512_mask, "V16fV16fV16fV16iIiUsIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_fixupimmps512_maskz, "V16fV16fV16fV16iIiUsIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_fixupimmsd_mask, "V2dV2dV2dV2LLiIiUcIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_fixupimmsd_maskz, "V2dV2dV2dV2LLiIiUcIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_fixupimmss_mask, "V4fV4fV4fV4iIiUcIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_fixupimmss_maskz, "V4fV4fV4fV4iIiUcIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_getexpsd128_round_mask, "V2dV2dV2dV2dUcIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_getexpss128_round_mask, "V4fV4fV4fV4fUcIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_getmantsd_round_mask, "V2dV2dV2dIiV2dUcIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_getmantss_round_mask, "V4fV4fV4fIiV4fUcIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_kmov16, "UsUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_maxsd_round, "V2dV2dV2dIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_maxss_round, "V4fV4fV4fIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_minsd_round, "V2dV2dV2dIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_minss_round, "V4fV4fV4fIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_loaddquhi128_mask, "V8sV8s*V8sUc","","avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_loaddquhi256_mask, "V16sV16s*V16sUs","","avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_loaddquqi128_mask, "V16cV16c*V16cUs","","avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_loaddquqi256_mask, "V32cV32c*V32cUi","","avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_fixupimmpd128_mask, "V2dV2dV2dV2LLiIiUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_fixupimmpd128_maskz, "V2dV2dV2dV2LLiIiUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_fixupimmpd256_mask, "V4dV4dV4dV4LLiIiUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_fixupimmpd256_maskz, "V4dV4dV4dV4LLiIiUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_fixupimmps128_mask, "V4fV4fV4fV4iIiUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_fixupimmps128_maskz, "V4fV4fV4fV4iIiUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_fixupimmps256_mask, "V8fV8fV8fV8iIiUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_fixupimmps256_maskz, "V8fV8fV8fV8iIiUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_loadapd128_mask, "V2dV2d*V2dUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_loadapd256_mask, "V4dV4d*V4dUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_loadaps128_mask, "V4fV4f*V4fUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_loadaps256_mask, "V8fV8f*V8fUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_loaddqudi128_mask, "V2LLiV2LLi*V2LLiUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_loaddqudi256_mask, "V4LLiV4LLi*V4LLiUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_loaddqusi128_mask, "V4iV4i*V4iUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_loaddqusi256_mask, "V8iV8i*V8iUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_loadupd128_mask, "V2dV2d*V2dUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_loadupd256_mask, "V4dV4d*V4dUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_loadups128_mask, "V4fV4f*V4fUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_loadups256_mask, "V8fV8f*V8fUc","","avx512vl")

#undef BUILTIN
#undef TARGET_BUILTIN
47 changes: 47 additions & 0 deletions clang/lib/Headers/avx512bwintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1934,6 +1934,53 @@ _mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
__M);
}

static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
{
return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
(__mmask64) __B);
}

static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
{
return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
(__mmask32) __B);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
{
return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
(__v32hi) __W,
(__mmask32) __U);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P)
{
return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
(__v32hi)
_mm512_setzero_hi (),
(__mmask32) __U);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
{
return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
(__v64qi) __W,
(__mmask64) __U);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P)
{
return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
(__v64qi)
_mm512_setzero_hi (),
(__mmask64) __U);
}
#undef __DEFAULT_FN_ATTRS

#endif
271 changes: 271 additions & 0 deletions clang/lib/Headers/avx512fintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,21 @@ typedef unsigned short __mmask16;
#define _MM_FROUND_TO_ZERO 0x03
#define _MM_FROUND_CUR_DIRECTION 0x04

typedef enum
{
_MM_MANT_NORM_1_2, /* interval [1, 2) */
_MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
_MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
_MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
} _MM_MANTISSA_NORM_ENUM;

typedef enum
{
_MM_MANT_SIGN_src, /* sign = sign(SRC) */
_MM_MANT_SIGN_zero, /* sign = 0 */
_MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
} _MM_MANTISSA_SIGN_ENUM;

/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))

Expand Down Expand Up @@ -3695,6 +3710,262 @@ _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
(__mmask8) __U);
}

#define _mm512_fixupimm_round_pd( __A, __B, __C, __imm, __R) __extension__ ({ \
__builtin_ia32_fixupimmpd512_mask ((__v8df)( __A),\
(__v8df)( __B),\
(__v8di)( __C),\
(__imm),\
(__mmask8) -1, (__R));\
})

#define _mm512_mask_fixupimm_round_pd( __A, __U, __B, __C, __imm, __R) __extension__ ({ \
__builtin_ia32_fixupimmpd512_mask ((__v8df)( __A),\
(__v8df)( __B),\
(__v8di)( __C),\
(__imm),\
(__mmask8)( __U), (__R));\
})

#define _mm512_fixupimm_pd( __A, __B, __C, __imm) __extension__ ({ \
__builtin_ia32_fixupimmpd512_mask ((__v8df)( __A),\
(__v8df)( __B),\
(__v8di)( __C),\
( __imm),\
(__mmask8) -1,\
_MM_FROUND_CUR_DIRECTION);\
})

#define _mm512_mask_fixupimm_pd( __A, __U, __B, __C, __imm) __extension__ ({ \
__builtin_ia32_fixupimmpd512_mask ((__v8df)( __A),\
(__v8df)( __B),\
(__v8di)( __C),\
( __imm),\
(__mmask8)( __U),\
_MM_FROUND_CUR_DIRECTION);\
})

#define _mm512_maskz_fixupimm_round_pd( __U, __A, __B, __C, __imm, __R) __extension__ ({ \
__builtin_ia32_fixupimmpd512_maskz ((__v8df)( __A),\
(__v8df)( __B),\
(__v8di)( __C),\
(__imm),\
(__mmask8)( __U), (__R));\
})

#define _mm512_maskz_fixupimm_pd( __U, __A, __B, __C, __imm) __extension__ ({ \
__builtin_ia32_fixupimmpd512_maskz ((__v8df)( __A),\
(__v8df)( __B),\
(__v8di)( __C),\
( __imm),\
(__mmask8)( __U),\
_MM_FROUND_CUR_DIRECTION);\
})

#define _mm512_fixupimm_round_ps( __A, __B, __C, __imm, __R) __extension__ ({ \
__builtin_ia32_fixupimmps512_mask ((__v16sf)( __A),\
(__v16sf)( __B),\
(__v16si)( __C),\
(__imm),\
(__mmask16) -1, (__R));\
})

#define _mm512_mask_fixupimm_round_ps( __A, __U, __B, __C, __imm, __R) __extension__ ({ \
__builtin_ia32_fixupimmps512_mask ((__v16sf)( __A),\
(__v16sf)( __B),\
(__v16si)( __C),\
(__imm),\
(__mmask16)( __U), (__R));\
})

#define _mm512_fixupimm_ps( __A, __B, __C, __imm) __extension__ ({ \
__builtin_ia32_fixupimmps512_mask ((__v16sf)( __A),\
(__v16sf)( __B),\
(__v16si)( __C),\
( __imm),\
(__mmask16) -1,\
_MM_FROUND_CUR_DIRECTION);\
})

#define _mm512_mask_fixupimm_ps( __A, __U, __B, __C, __imm) __extension__ ({ \
__builtin_ia32_fixupimmps512_mask ((__v16sf)( __A),\
(__v16sf)( __B),\
(__v16si)( __C),\
( __imm),\
(__mmask16)( __U),\
_MM_FROUND_CUR_DIRECTION);\
})

#define _mm512_maskz_fixupimm_round_ps( __U, __A, __B, __C, __imm, __R) __extension__ ({ \
__builtin_ia32_fixupimmps512_maskz ((__v16sf)( __A),\
(__v16sf)( __B),\
(__v16si)( __C),\
(__imm),\
(__mmask16)( __U), (__R));\
})

#define _mm512_maskz_fixupimm_ps( __U, __A, __B, __C, __imm) __extension__ ({ \
__builtin_ia32_fixupimmps512_maskz ((__v16sf)( __A),\
(__v16sf)( __B),\
(__v16si)( __C),\
( __imm),\
(__mmask16)( __U),\
_MM_FROUND_CUR_DIRECTION);\
})

#define _mm_fixupimm_round_sd( __A, __B, __C, __imm, __R) __extension__ ({ \
__builtin_ia32_fixupimmsd_mask ((__v2df)( __A),\
(__v2df)( __B),\
(__v2di)( __C), __imm,\
(__mmask8) -1, (__R));\
})

#define _mm_mask_fixupimm_round_sd( __A, __U, __B, __C, __imm, __R) __extension__ ({ \
__builtin_ia32_fixupimmsd_mask ((__v2df)( __A),\
(__v2df)( __B),\
(__v2di)( __C), __imm,\
(__mmask8)( __U), (__R));\
})

#define _mm_fixupimm_sd( __A, __B, __C, __imm) __extension__ ({ \
__builtin_ia32_fixupimmsd_mask ((__v2df)( __A),\
(__v2df)( __B),\
(__v2di)( __C),( __imm),\
(__mmask8) -1,\
_MM_FROUND_CUR_DIRECTION);\
})

#define _mm_mask_fixupimm_sd( __A, __U, __B, __C, __imm) __extension__ ({ \
__builtin_ia32_fixupimmsd_mask ((__v2df)( __A),\
(__v2df)( __B),\
(__v2di)( __C),( __imm),\
(__mmask8)( __U),\
_MM_FROUND_CUR_DIRECTION);\
})

#define _mm_maskz_fixupimm_round_sd( __U, __A, __B, __C, __imm, __R) __extension__ ({ \
__builtin_ia32_fixupimmsd_maskz ((__v2df)( __A),\
(__v2df)( __B),\
(__v2di)( __C),\
__imm,\
(__mmask8)( __U), (__R));\
})

#define _mm_maskz_fixupimm_sd( __U, __A, __B, __C, __imm) __extension__ ({ \
__builtin_ia32_fixupimmsd_maskz ((__v2df)( __A),\
(__v2df)( __B),\
(__v2di)( __C),\
( __imm),\
(__mmask8)( __U),\
_MM_FROUND_CUR_DIRECTION);\
})

#define _mm_fixupimm_round_ss( __A, __B, __C, __imm, __R) __extension__ ({ \
__builtin_ia32_fixupimmss_mask ((__v4sf)( __A),\
(__v4sf)( __B),\
(__v4si)( __C), (__imm),\
(__mmask8) -1, (__R));\
})

#define _mm_mask_fixupimm_round_ss( __A, __U, __B, __C, __imm, __R) __extension__ ({ \
__builtin_ia32_fixupimmss_mask ((__v4sf)( __A),\
(__v4sf)( __B),\
(__v4si)( __C), (__imm),\
(__mmask8)( __U), (__R));\
})

#define _mm_fixupimm_ss( __A, __B, __C, __imm) __extension__ ({ \
__builtin_ia32_fixupimmss_mask ((__v4sf)( __A),\
(__v4sf)( __B),\
(__v4si)( __C),( __imm),\
(__mmask8) -1,\
_MM_FROUND_CUR_DIRECTION);\
})

#define _mm_mask_fixupimm_ss( __A, __U, __B, __C, __imm) __extension__ ({ \
__builtin_ia32_fixupimmss_mask ((__v4sf)( __A),\
(__v4sf)( __B),\
(__v4si)( __C),( __imm),\
(__mmask8)( __U),\
_MM_FROUND_CUR_DIRECTION);\
})

#define _mm_maskz_fixupimm_round_ss( __U, __A, __B, __C, __imm, __R) __extension__ ({ \
__builtin_ia32_fixupimmss_maskz ((__v4sf)( __A),\
(__v4sf)( __B),\
(__v4si)( __C), (__imm),\
(__mmask8)( __U), (__R));\
})

#define _mm_maskz_fixupimm_ss( __U, __A, __B, __C, __imm) __extension__ ({ \
__builtin_ia32_fixupimmss_maskz ((__v4sf)( __A),\
(__v4sf)( __B),\
(__v4si)( __C),( __imm),\
(__mmask8)( __U),\
_MM_FROUND_CUR_DIRECTION);\
})

#define _mm_getexp_round_sd( __A, __B ,__R) __extension__ ({ \
__builtin_ia32_getexpsd128_round_mask ((__v2df)(__A),\
(__v2df)( __B), (__v2df) _mm_setzero_pd(), (__mmask8) -1,\
( __R));\
})


static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_getexp_sd (__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
(__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
}

#define _mm_getexp_round_ss( __A, __B, __R) __extension__ ({ \
__builtin_ia32_getexpss128_round_mask ((__v4sf)( __A),\
(__v4sf)( __B), (__v4sf) _mm_setzero_ps(), (__mmask8) -1,\
( __R));\
})

static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_getexp_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
(__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
}

#define _mm_getmant_round_sd( __A, __B, __C, __D, __R) __extension__ ({ \
__builtin_ia32_getmantsd_round_mask ((__v2df)( __A),\
(__v2df)( __B),\
(( __D) << 2) |( __C), (__v2df) _mm_setzero_pd(), (__mmask8) -1,\
( __R));\
})

#define _mm_getmant_sd( __A, __B, __C, __D) __extension__ ({ \
__builtin_ia32_getmantsd_round_mask ((__v2df)( __A),\
(__v2df)( __B),\
(( __D) << 2) |( __C), (__v2df) _mm_setzero_pd(), (__mmask8) -1,\
_MM_FROUND_CUR_DIRECTION);\
})

#define _mm_getmant_round_ss( __A, __B, __C, __D, __R) __extension__ ({ \
__builtin_ia32_getmantss_round_mask ((__v4sf)( __A),\
(__v4sf)( __B),\
((__D) << 2) |( __C), (__v4sf) _mm_setzero_ps(), (__mmask8) -1,\
( __R));\
})

#define _mm_getmant_ss(__A, __B, __C, __D) __extension__ ({ \
__builtin_ia32_getmantss_round_mask ((__v4sf)( __A),\
(__v4sf)( __B),\
((__D) << 2) |( __C), (__v4sf) _mm_setzero_ps(), (__mmask8) -1,\
_MM_FROUND_CUR_DIRECTION);\
})


static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kmov (__mmask16 __A)
{
return __A;
}

#define _mm_comi_round_sd(__A, __B, __P, __R) __extension__ ({\
__builtin_ia32_vcomisd ((__v2df) (__A), (__v2df) (__B), ( __P), ( __R));\
})
Expand Down

0 comments on commit def7875

Please sign in to comment.