-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[Clang] Add constexpr support for AVX512 permutexvar intrinsics #167802
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-clang Author: NagaChaitanya Vellanki (chaitanyav) ChangesResolves: #167476 Patch is 55.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/167802.diff 14 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index cb08e2107f072..b261b681990e0 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -603,6 +603,11 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, _Constant int)">;
}
+let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+ def permvarsi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
+ def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
+}
+
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
@@ -617,9 +622,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
- def permvarsi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
- def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
}
@@ -3052,38 +3055,38 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
def permdi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def permvarhi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def permvardf512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>)">;
def permvardi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">;
def permvarsf512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">;
def permvarsi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">;
}
-let Features = "avx512vbmi", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512vbmi", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def permvarqi512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
}
-let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def permvarqi128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
}
-let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def permvarqi256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
}
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def permvarhi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
}
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def permvarhi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def permvardf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">;
def permvardi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 6c7b2f502cc51..c72a3566681b1 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4414,6 +4414,50 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return std::pair<unsigned, int>{0, static_cast<int>(DstIdx)};
}
});
+ case X86::BI__builtin_ia32_permvarsi256:
+ case X86::BI__builtin_ia32_permvarsf256:
+ case X86::BI__builtin_ia32_permvardf512:
+ case X86::BI__builtin_ia32_permvardi512:
+ case X86::BI__builtin_ia32_permvarhi128:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x7;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ });
+ case X86::BI__builtin_ia32_permvarqi128:
+ case X86::BI__builtin_ia32_permvarhi256:
+ case X86::BI__builtin_ia32_permvarsi512:
+ case X86::BI__builtin_ia32_permvarsf512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0xF;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ });
+ case X86::BI__builtin_ia32_permvardi256:
+ case X86::BI__builtin_ia32_permvardf256:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x3;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ });
+ case X86::BI__builtin_ia32_permvarqi256:
+ case X86::BI__builtin_ia32_permvarhi512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x1F;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ });
+ case X86::BI__builtin_ia32_permvarqi512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x3F;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ });
case X86::BI__builtin_ia32_vpermi2varq128:
case X86::BI__builtin_ia32_vpermi2varpd128:
return interp__builtin_ia32_shuffle_generic(
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 1bfea24b228e8..e9e448143477e 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13551,6 +13551,70 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return false;
return Success(R, E);
}
+ case X86::BI__builtin_ia32_permvarsi256:
+ case X86::BI__builtin_ia32_permvarsf256:
+ case X86::BI__builtin_ia32_permvardf512:
+ case X86::BI__builtin_ia32_permvardi512:
+ case X86::BI__builtin_ia32_permvarhi128: {
+ APValue R;
+ if (!evalShuffleGeneric(Info, E, R,
+ [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x7;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_permvarqi128:
+ case X86::BI__builtin_ia32_permvarhi256:
+ case X86::BI__builtin_ia32_permvarsi512:
+ case X86::BI__builtin_ia32_permvarsf512: {
+ APValue R;
+ if (!evalShuffleGeneric(Info, E, R,
+ [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0xF;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_permvardi256:
+ case X86::BI__builtin_ia32_permvardf256: {
+ APValue R;
+ if (!evalShuffleGeneric(Info, E, R,
+ [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x3;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_permvarqi256:
+ case X86::BI__builtin_ia32_permvarhi512: {
+ APValue R;
+ if (!evalShuffleGeneric(Info, E, R,
+ [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x1F;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_permvarqi512: {
+ APValue R;
+ if (!evalShuffleGeneric(Info, E, R,
+ [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x3F;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
case X86::BI__builtin_ia32_vpermi2varq128:
case X86::BI__builtin_ia32_vpermi2varpd128: {
APValue R;
diff --git a/clang/lib/Headers/avx10_2_512bf16intrin.h b/clang/lib/Headers/avx10_2_512bf16intrin.h
index 46ec12a63ef9c..3201307af4731 100644
--- a/clang/lib/Headers/avx10_2_512bf16intrin.h
+++ b/clang/lib/Headers/avx10_2_512bf16intrin.h
@@ -179,7 +179,7 @@ _mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) {
(__v32hi)__B);
}
-static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_permutexvar_pbh(__m512i __A, __m512bh __B) {
return (__m512bh)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
}
diff --git a/clang/lib/Headers/avx10_2bf16intrin.h b/clang/lib/Headers/avx10_2bf16intrin.h
index 8fb8cd7cd0865..9f5b726d7b789 100644
--- a/clang/lib/Headers/avx10_2bf16intrin.h
+++ b/clang/lib/Headers/avx10_2bf16intrin.h
@@ -307,12 +307,12 @@ _mm256_permutex2var_pbh(__m256bh __A, __m256i __I, __m256bh __B) {
(__v16hi)__B);
}
-static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_permutexvar_pbh(__m128i __A, __m128bh __B) {
return (__m128bh)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A);
}
-static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_permutexvar_pbh(__m256i __A, __m256bh __B) {
return (__m256bh)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A);
}
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 3cbaaece7b38e..3e3c13d8bd662 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -3214,9 +3214,8 @@ _mm_broadcastq_epi64(__m128i __X) {
/// A 256-bit vector of [8 x i32] containing indexes of values to use from
/// \a __a.
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b);
}
@@ -3272,9 +3271,8 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
/// A 256-bit vector of [8 x i32] containing indexes of values to use from
/// \a __a.
/// \returns A 256-bit vector of [8 x float] containing the result.
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_permutevar8x32_ps(__m256 __a, __m256i __b) {
return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b);
}
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 4a02c96620335..3cfa32eb9e727 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -1846,25 +1846,21 @@ _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A)
(__v32hi) _mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_epi16 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_epi16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A,
- __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_epi16(__mmask32 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_permutexvar_epi16(__A, __B),
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
- __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_epi16(__m512i __W, __mmask32 __M, __m512i __A,
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_permutexvar_epi16(__A, __B),
(__v32hi)__W);
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 997e9608e112f..79c37173ac838 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -7959,93 +7959,82 @@ _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
(__v8di)_mm512_permutex_epi64((X), (C)), \
(__v8di)_mm512_setzero_si512()))
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_pd(__m512i __X, __m512d __Y) {
return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X,
+ __m512d __Y) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_permutexvar_pd(__X, __Y),
(__v8df)__W);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_permutexvar_pd(__X, __Y),
(__v8df)_mm512_setzero_pd());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_epi64(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_permutexvar_epi64(__X, __Y),
(__v8di)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
- __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X,
+ __m512i __Y) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_permutexvar_epi64(__X, __Y),
(__v8di)__W);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_ps(__m512i __X, __m512 __Y) {
return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_permutexvar_ps(__X, __Y),
(__v16sf)__W);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_permutexvar_ps(__X, __Y),
(__v16sf)_mm512_setzero_ps());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_epi32(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
}
#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_permutexvar_epi32(__X, __Y),
(__v16si)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
- __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X,
+ __m512i __Y) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_permutexvar_epi32(__...
[truncated]
|
|
@llvm/pr-subscribers-backend-x86 Author: NagaChaitanya Vellanki (chaitanyav) ChangesResolves: #167476 Patch is 55.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/167802.diff 14 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index cb08e2107f072..b261b681990e0 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -603,6 +603,11 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, _Constant int)">;
}
+let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+ def permvarsi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
+ def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
+}
+
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
@@ -617,9 +622,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
- def permvarsi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
- def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
}
@@ -3052,38 +3055,38 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
def permdi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def permvarhi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def permvardf512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>)">;
def permvardi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">;
def permvarsf512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">;
def permvarsi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">;
}
-let Features = "avx512vbmi", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512vbmi", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def permvarqi512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
}
-let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def permvarqi128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
}
-let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def permvarqi256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
}
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def permvarhi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
}
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def permvarhi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def permvardf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">;
def permvardi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 6c7b2f502cc51..c72a3566681b1 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4414,6 +4414,50 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return std::pair<unsigned, int>{0, static_cast<int>(DstIdx)};
}
});
+ case X86::BI__builtin_ia32_permvarsi256:
+ case X86::BI__builtin_ia32_permvarsf256:
+ case X86::BI__builtin_ia32_permvardf512:
+ case X86::BI__builtin_ia32_permvardi512:
+ case X86::BI__builtin_ia32_permvarhi128:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x7;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ });
+ case X86::BI__builtin_ia32_permvarqi128:
+ case X86::BI__builtin_ia32_permvarhi256:
+ case X86::BI__builtin_ia32_permvarsi512:
+ case X86::BI__builtin_ia32_permvarsf512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0xF;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ });
+ case X86::BI__builtin_ia32_permvardi256:
+ case X86::BI__builtin_ia32_permvardf256:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x3;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ });
+ case X86::BI__builtin_ia32_permvarqi256:
+ case X86::BI__builtin_ia32_permvarhi512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x1F;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ });
+ case X86::BI__builtin_ia32_permvarqi512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x3F;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ });
case X86::BI__builtin_ia32_vpermi2varq128:
case X86::BI__builtin_ia32_vpermi2varpd128:
return interp__builtin_ia32_shuffle_generic(
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 1bfea24b228e8..e9e448143477e 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13551,6 +13551,70 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return false;
return Success(R, E);
}
+ case X86::BI__builtin_ia32_permvarsi256:
+ case X86::BI__builtin_ia32_permvarsf256:
+ case X86::BI__builtin_ia32_permvardf512:
+ case X86::BI__builtin_ia32_permvardi512:
+ case X86::BI__builtin_ia32_permvarhi128: {
+ APValue R;
+ if (!evalShuffleGeneric(Info, E, R,
+ [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x7;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_permvarqi128:
+ case X86::BI__builtin_ia32_permvarhi256:
+ case X86::BI__builtin_ia32_permvarsi512:
+ case X86::BI__builtin_ia32_permvarsf512: {
+ APValue R;
+ if (!evalShuffleGeneric(Info, E, R,
+ [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0xF;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_permvardi256:
+ case X86::BI__builtin_ia32_permvardf256: {
+ APValue R;
+ if (!evalShuffleGeneric(Info, E, R,
+ [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x3;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_permvarqi256:
+ case X86::BI__builtin_ia32_permvarhi512: {
+ APValue R;
+ if (!evalShuffleGeneric(Info, E, R,
+ [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x1F;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_permvarqi512: {
+ APValue R;
+ if (!evalShuffleGeneric(Info, E, R,
+ [](unsigned DstIdx, unsigned ShuffleMask) {
+ int Offset = ShuffleMask & 0x3F;
+ unsigned SrcIdx = 0;
+ return std::pair<unsigned, int>{SrcIdx, Offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
case X86::BI__builtin_ia32_vpermi2varq128:
case X86::BI__builtin_ia32_vpermi2varpd128: {
APValue R;
diff --git a/clang/lib/Headers/avx10_2_512bf16intrin.h b/clang/lib/Headers/avx10_2_512bf16intrin.h
index 46ec12a63ef9c..3201307af4731 100644
--- a/clang/lib/Headers/avx10_2_512bf16intrin.h
+++ b/clang/lib/Headers/avx10_2_512bf16intrin.h
@@ -179,7 +179,7 @@ _mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) {
(__v32hi)__B);
}
-static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_permutexvar_pbh(__m512i __A, __m512bh __B) {
return (__m512bh)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
}
diff --git a/clang/lib/Headers/avx10_2bf16intrin.h b/clang/lib/Headers/avx10_2bf16intrin.h
index 8fb8cd7cd0865..9f5b726d7b789 100644
--- a/clang/lib/Headers/avx10_2bf16intrin.h
+++ b/clang/lib/Headers/avx10_2bf16intrin.h
@@ -307,12 +307,12 @@ _mm256_permutex2var_pbh(__m256bh __A, __m256i __I, __m256bh __B) {
(__v16hi)__B);
}
-static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_permutexvar_pbh(__m128i __A, __m128bh __B) {
return (__m128bh)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A);
}
-static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_permutexvar_pbh(__m256i __A, __m256bh __B) {
return (__m256bh)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A);
}
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 3cbaaece7b38e..3e3c13d8bd662 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -3214,9 +3214,8 @@ _mm_broadcastq_epi64(__m128i __X) {
/// A 256-bit vector of [8 x i32] containing indexes of values to use from
/// \a __a.
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b);
}
@@ -3272,9 +3271,8 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
/// A 256-bit vector of [8 x i32] containing indexes of values to use from
/// \a __a.
/// \returns A 256-bit vector of [8 x float] containing the result.
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_permutevar8x32_ps(__m256 __a, __m256i __b) {
return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b);
}
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 4a02c96620335..3cfa32eb9e727 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -1846,25 +1846,21 @@ _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A)
(__v32hi) _mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_epi16 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_epi16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A,
- __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_epi16(__mmask32 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_permutexvar_epi16(__A, __B),
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
- __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_epi16(__m512i __W, __mmask32 __M, __m512i __A,
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_permutexvar_epi16(__A, __B),
(__v32hi)__W);
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 997e9608e112f..79c37173ac838 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -7959,93 +7959,82 @@ _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
(__v8di)_mm512_permutex_epi64((X), (C)), \
(__v8di)_mm512_setzero_si512()))
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_pd(__m512i __X, __m512d __Y) {
return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X,
+ __m512d __Y) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_permutexvar_pd(__X, __Y),
(__v8df)__W);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_permutexvar_pd(__X, __Y),
(__v8df)_mm512_setzero_pd());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_epi64(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_permutexvar_epi64(__X, __Y),
(__v8di)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
- __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X,
+ __m512i __Y) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_permutexvar_epi64(__X, __Y),
(__v8di)__W);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_ps(__m512i __X, __m512 __Y) {
return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_permutexvar_ps(__X, __Y),
(__v16sf)__W);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_permutexvar_ps(__X, __Y),
(__v16sf)_mm512_setzero_ps());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_epi32(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
}
#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_permutexvar_epi32(__X, __Y),
(__v16si)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
- __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X,
+ __m512i __Y) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_permutexvar_epi32(__...
[truncated]
|
6f32ecc to
ceaae3c
Compare
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sorry - you missed the avx2 tests
| _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) | ||
| { | ||
| static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR | ||
| _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
test coverage?
… AVX512 permutexvar intrinsics to be used in constexpr Resolves: llvm#167476
- Group permvarsi256/permvarsf256 with other AVX2 constexpr builtins - Remove unnecessary SrcIdx variable and use zero directly in pair construction
ceaae3c to
3502a04
Compare
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
Resolves: #167476