Skip to content

Commit

Permalink
[Clang][AVX512][BuiltIn] Adding avx512 ( vperm{i|t}2var, vpermil{var}…
Browse files Browse the repository at this point in the history
…{ps|pd}{256|512} ) builtin to clang.

Differential Revision: http://reviews.llvm.org/D18933

llvm-svn: 265915
  • Loading branch information
michaelz-eng committed Apr 11, 2016
1 parent 05333fc commit d8d2f62
Show file tree
Hide file tree
Showing 5 changed files with 552 additions and 0 deletions.
20 changes: 20 additions & 0 deletions clang/include/clang/Basic/BuiltinsX86.def
Original file line number Diff line number Diff line change
Expand Up @@ -1848,6 +1848,26 @@ TARGET_BUILTIN(__builtin_ia32_vcvttss2si32, "iV4fIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_vcvttss2si64, "LLiV4fIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_vcvttss2usi32, "UiV4fIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_vcvttss2usi64, "ULLiV4fIi","","avx512f")
TARGET_BUILTIN(__builtin_ia32_vpermi2vard512_mask, "V16iV16iV16iV16iUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_vpermi2varpd512_mask, "V8dV8dV8LLiV8dUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_vpermi2varps512_mask, "V16fV16fV16iV16fUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_vpermi2varq512_mask, "V8LLiV8LLiV8LLiV8LLiUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_vpermilpd512_mask, "V8dV8dIiV8dUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_vpermilps512_mask, "V16fV16fIiV16fUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_vpermilvarpd512_mask, "V8dV8dV8LLiV8dUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_vpermilvarps512_mask, "V16fV16fV16iV16fUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_vpermt2vard512_maskz, "V16iV16iV16iV16iUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_vpermt2varpd512_maskz, "V8dV8LLiV8dV8dUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_vpermt2varps512_maskz, "V16fV16iV16fV16fUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_vpermt2varq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_vpermilpd_mask, "V2dV2dIiV2dUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_vpermilpd256_mask, "V4dV4dIiV4dUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_vpermilps_mask, "V4fV4fIiV4fUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_vpermilps256_mask, "V8fV8fIiV8fUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_vpermilvarpd_mask, "V2dV2dV2LLiV2dUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_vpermilvarpd256_mask, "V4dV4dV4LLiV4dUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_vpermilvarps_mask, "V4fV4fV4iV4fUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_vpermilvarps256_mask, "V8fV8fV8iV8fUc","","avx512vl")

#undef BUILTIN
#undef TARGET_BUILTIN
185 changes: 185 additions & 0 deletions clang/lib/Headers/avx512fintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -3985,6 +3985,16 @@ _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
#define _mm_cvt_roundsd_si64( __A, __R) __extension__ ({ \
__builtin_ia32_vcvtsd2si64 ((__v2df)( __A),( __R));\
})
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
__mmask16 __U, __m512i __B)
{
return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
(__v16si) __I
/* idx */ ,
(__v16si) __B,
(__mmask16) __U);
}

static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
Expand Down Expand Up @@ -4233,6 +4243,181 @@ _mm_cvttss_u64 (__m128 __A)
_MM_FROUND_CUR_DIRECTION);
}

static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
__m512d __B)
{
return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
(__v8di) __I
/* idx */ ,
(__v8df) __B,
(__mmask8) __U);
}

static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
__m512 __B)
{
return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
(__v16si) __I
/* idx */ ,
(__v16sf) __B,
(__mmask16) __U);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
__mmask8 __U, __m512i __B)
{
return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
(__v8di) __I
/* idx */ ,
(__v8di) __B,
(__mmask8) __U);
}

#define _mm512_permute_pd( __X, __C) __extension__ ({ \
__builtin_ia32_vpermilpd512_mask ((__v8df)( __X),( __C),\
(__v8df)\
_mm512_undefined_pd (),\
(__mmask8) -1);\
})

#define _mm512_mask_permute_pd( __W, __U, __X, __C) __extension__ ({ \
__builtin_ia32_vpermilpd512_mask ((__v8df)( __X),( __C),\
(__v8df)( __W),\
(__mmask8)( __U));\
})

#define _mm512_maskz_permute_pd( __U, __X, __C) __extension__ ({ \
__builtin_ia32_vpermilpd512_mask ((__v8df)( __X),( __C),\
(__v8df)\
_mm512_setzero_pd (),\
(__mmask8)( __U));\
})

#define _mm512_permute_ps( __X, __C) __extension__ ({ \
__builtin_ia32_vpermilps512_mask ((__v16sf)( __X),( __C),\
(__v16sf)\
_mm512_undefined_ps (),\
(__mmask16) -1);\
})

#define _mm512_mask_permute_ps( __W, __U, __X, __C) __extension__ ({ \
__builtin_ia32_vpermilps512_mask ((__v16sf)( __X),( __C),\
(__v16sf)( __W),\
(__mmask16)( __U));\
})

#define _mm512_maskz_permute_ps( __U, __X, __C) __extension__ ({ \
__builtin_ia32_vpermilps512_mask ((__v16sf)( __X),( __C),\
(__v16sf)\
_mm512_setzero_ps (),\
(__mmask16)( __U));\
})

static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_permutevar_pd (__m512d __A, __m512i __C)
{
return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
(__v8di) __C,
(__v8df)
_mm512_undefined_pd (),
(__mmask8) -1);
}

static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
{
return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
(__v8di) __C,
(__v8df) __W,
(__mmask8) __U);
}

static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
{
return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
(__v8di) __C,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U);
}

static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_permutevar_ps (__m512 __A, __m512i __C)
{
return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
(__v16si) __C,
(__v16sf)
_mm512_undefined_ps (),
(__mmask16) -1);
}

static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
{
return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
(__v16si) __C,
(__v16sf) __W,
(__mmask16) __U);
}

static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
{
return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
(__v16si) __C,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
__m512i __I, __m512i __B)
{
return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
/* idx */ ,
(__v16si) __A,
(__v16si) __B,
(__mmask16) __U);
}

static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
__m512d __B)
{
return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
/* idx */ ,
(__v8df) __A,
(__v8df) __B,
(__mmask8) __U);
}

static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
__m512 __B)
{
return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
/* idx */ ,
(__v16sf) __A,
(__v16sf) __B,
(__mmask16) __U);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
__m512i __I, __m512i __B)
{
return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
/* idx */ ,
(__v8di) __A,
(__v8di) __B,
(__mmask8) __U);
}

#undef __DEFAULT_FN_ATTRS

#endif // __AVX512FINTRIN_H
135 changes: 135 additions & 0 deletions clang/lib/Headers/avx512vlintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -6824,6 +6824,141 @@ _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
(__mmask8) __U);
}


#define _mm_mask_permute_pd( __W, __U, __X, __C) __extension__ ({ \
__builtin_ia32_vpermilpd_mask ((__v2df)( __X),( __C),\
(__v2df)( __W),\
(__mmask8)( __U));\
})

#define _mm_maskz_permute_pd( __U, __X, __C) __extension__ ({ \
__builtin_ia32_vpermilpd_mask ((__v2df)( __X),( __C),\
(__v2df)\
_mm_setzero_pd (),\
(__mmask8)( __U));\
})

#define _mm256_mask_permute_pd( __W, __U, __X, __C) __extension__ ({ \
__builtin_ia32_vpermilpd256_mask ((__v4df)( __X),( __C),\
(__v4df)( __W),\
(__mmask8)( __U));\
})

#define _mm256_maskz_permute_pd( __U, __X, __C) __extension__ ({ \
__builtin_ia32_vpermilpd256_mask ((__v4df)( __X),( __C),\
(__v4df)\
_mm256_setzero_pd (),\
(__mmask8)( __U));\
})

#define _mm_mask_permute_ps( __W, __U, __X, __C) __extension__ ({ \
__builtin_ia32_vpermilps_mask ((__v4sf)( __X),( __C),\
(__v4sf)( __W),\
(__mmask8)( __U));\
})

#define _mm_maskz_permute_ps( __U, __X, __C) __extension__ ({ \
__builtin_ia32_vpermilps_mask ((__v4sf)( __X),( __C),\
(__v4sf)\
_mm_setzero_ps (),\
(__mmask8)( __U));\
})

#define _mm256_mask_permute_ps( __W, __U, __X, __C) __extension__ ({ \
__builtin_ia32_vpermilps256_mask ((__v8sf)( __X),( __C),\
(__v8sf)( __W),\
(__mmask8)( __U));\
})

#define _mm256_maskz_permute_ps( __U, __X, __C) __extension__ ({ \
__builtin_ia32_vpermilps256_mask ((__v8sf)( __X),( __C),\
(__v8sf)\
_mm256_setzero_ps (),\
(__mmask8)( __U));\
})

static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
__m128i __C)
{
return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
(__v2di) __C,
(__v2df) __W,
(__mmask8) __U);
}

static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
{
return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
(__v2di) __C,
(__v2df)
_mm_setzero_pd (),
(__mmask8) __U);
}

static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
__m256i __C)
{
return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
(__v4di) __C,
(__v4df) __W,
(__mmask8)
__U);
}

static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
{
return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
(__v4di) __C,
(__v4df)
_mm256_setzero_pd (),
(__mmask8)
__U);
}

static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
__m128i __C)
{
return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
(__v4si) __C,
(__v4sf) __W,
(__mmask8) __U);
}

static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
{
return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
(__v4si) __C,
(__v4sf)
_mm_setzero_ps (),
(__mmask8) __U);
}

static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
__m256i __C)
{
return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
(__v8si) __C,
(__v8sf) __W,
(__mmask8) __U);
}

static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
{
return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
(__v8si) __C,
(__v8sf)
_mm256_setzero_ps (),
(__mmask8) __U);
}

#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS_BOTH

Expand Down
Loading

0 comments on commit d8d2f62

Please sign in to comment.