-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow PSLL/PSRA/PSRL var intrinsics to be used in constexpr #169276
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-clang Author: NagaChaitanya Vellanki (chaitanyav) ChangesResolves:#169176 Patch is 88.43 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169276.diff 13 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 4aa3d51931980..253eb3cbd7ee9 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -214,17 +214,6 @@ let Header = "emmintrin.h", Attributes = [NoThrow, RequireDeclaration] in {
def _mm_pause : X86LibBuiltin<"void()">;
}
-let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def psraw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
- def psrad128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
- def psrlw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
- def psrld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
- def psrlq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
- def psllw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
- def pslld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
- def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
-}
-
let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def movmskpd : X86Builtin<"int(_Vector<2, double>)">;
def pmovmskb128 : X86Builtin<"int(_Vector<16, char>)">;
@@ -265,6 +254,15 @@ let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def vec_ext_v4sf : X86Builtin<"float(_Vector<4, float>, _Constant int)">;
def vec_ext_v8hi : X86Builtin<"short(_Vector<8, short>, _Constant int)">;
def vec_set_v8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, short, _Constant int)">;
+
+ def psraw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+ def psrad128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+ def psrlw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+ def psrld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+ def psrlq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
+ def psllw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+ def pslld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+ def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
}
let Features = "sse3", Attributes = [NoThrow] in {
@@ -585,14 +583,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def psadbw256
: X86Builtin<
"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
- def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
- def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
- def psllq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
- def psraw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
- def psrad256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
- def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
- def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
- def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
@@ -669,6 +659,15 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def permvarsi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
+
+ def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
+ def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
+ def psllq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
+ def psraw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
+ def psrad256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
+ def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
+ def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
+ def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
}
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
@@ -1930,16 +1929,13 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVect
def prorq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
-}
-
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pmaddubsw512 : X86Builtin<"_Vector<32, short>(_Vector<64, char>, _Vector<64, char>)">;
def pmaddwd512 : X86Builtin<"_Vector<16, int>(_Vector<32, short>, _Vector<32, short>)">;
def psllv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
+ def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
}
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
@@ -1995,7 +1991,7 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVect
def psravq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def psraw512
: X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
def psrlw512
@@ -2312,25 +2308,17 @@ let Features = "avx512f",
def psraqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def psraq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def psraq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
-}
-
-let Features = "avx512vl",
- Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def psraqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, int)">;
}
-let Features = "avx512vl",
- Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+ def psraq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
def psraqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pslld512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">;
def psllq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">;
def psrad512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 83e40f64fd979..1b1866034b50d 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3468,6 +3468,69 @@ static bool interp__builtin_ia32_shuffle_generic(
return true;
}
+static bool interp__builtin_ia32_shift_with_count(
+ InterpState &S, CodePtr OpPC, const CallExpr *Call,
+ llvm::function_ref<APInt(const APInt &, uint64_t)> ShiftOp,
+ llvm::function_ref<APInt(const APInt &, unsigned)> OverflowOp) {
+
+ assert(Call->getNumArgs() == 2);
+
+ const Pointer &Count = S.Stk.pop<Pointer>();
+ const Pointer &Source = S.Stk.pop<Pointer>();
+
+ QualType SourceType = Call->getArg(0)->getType();
+ QualType CountType = Call->getArg(1)->getType();
+ assert(SourceType->isVectorType() && CountType->isVectorType());
+
+ const auto *SourceVecT = SourceType->castAs<VectorType>();
+ const auto *CountVecT = CountType->castAs<VectorType>();
+ PrimType SourceElemT = *S.getContext().classify(SourceVecT->getElementType());
+ PrimType CountElemT = *S.getContext().classify(CountVecT->getElementType());
+
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ unsigned DestEltWidth =
+ S.getASTContext().getTypeSize(SourceVecT->getElementType());
+ bool IsDestUnsigned = SourceVecT->getElementType()->isUnsignedIntegerType();
+ unsigned DestLen = SourceVecT->getNumElements();
+ unsigned CountEltWidth =
+ S.getASTContext().getTypeSize(CountVecT->getElementType());
+ unsigned NumBitsInQWord = 64;
+ unsigned NumCountElts = NumBitsInQWord / CountEltWidth;
+
+ uint64_t CountLQWord = 0;
+ for (unsigned EltIdx = 0; EltIdx != NumCountElts; ++EltIdx) {
+ uint64_t Elt = 0;
+ INT_TYPE_SWITCH(CountElemT,
+ { Elt = static_cast<uint64_t>(Count.elem<T>(EltIdx)); });
+ CountLQWord |= (Elt << (EltIdx * CountEltWidth));
+ }
+
+ for (unsigned EltIdx = 0; EltIdx != DestLen; ++EltIdx) {
+ APSInt Elt;
+ INT_TYPE_SWITCH(SourceElemT, { Elt = Source.elem<T>(EltIdx).toAPSInt(); });
+
+ APInt Result;
+ if (CountLQWord < DestEltWidth) {
+ Result = ShiftOp(Elt, CountLQWord);
+ } else {
+ Result = OverflowOp(Elt, DestEltWidth);
+ }
+ if (IsDestUnsigned) {
+ INT_TYPE_SWITCH(SourceElemT, {
+ Dst.elem<T>(EltIdx) = T::from(Result.getZExtValue());
+ });
+ } else {
+ INT_TYPE_SWITCH(SourceElemT, {
+ Dst.elem<T>(EltIdx) = T::from(Result.getSExtValue());
+ });
+ }
+ }
+
+ Dst.initializeAllElements();
+ return true;
+}
+
static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {
@@ -4826,6 +4889,48 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_phminposuw128:
return interp__builtin_ia32_phminposuw(S, OpPC, Call);
+ case X86::BI__builtin_ia32_psraq128:
+ case X86::BI__builtin_ia32_psraq256:
+ case X86::BI__builtin_ia32_psraq512:
+ case X86::BI__builtin_ia32_psrad128:
+ case X86::BI__builtin_ia32_psrad256:
+ case X86::BI__builtin_ia32_psrad512:
+ case X86::BI__builtin_ia32_psraw128:
+ case X86::BI__builtin_ia32_psraw256:
+ case X86::BI__builtin_ia32_psraw512:
+ return interp__builtin_ia32_shift_with_count(
+ S, OpPC, Call,
+ [](const APInt &Elt, uint64_t Count) { return Elt.ashr(Count); },
+ [](const APInt &Elt, unsigned Width) { return Elt.ashr(Width - 1); });
+
+ case X86::BI__builtin_ia32_psllq128:
+ case X86::BI__builtin_ia32_psllq256:
+ case X86::BI__builtin_ia32_psllq512:
+ case X86::BI__builtin_ia32_pslld128:
+ case X86::BI__builtin_ia32_pslld256:
+ case X86::BI__builtin_ia32_pslld512:
+ case X86::BI__builtin_ia32_psllw128:
+ case X86::BI__builtin_ia32_psllw256:
+ case X86::BI__builtin_ia32_psllw512:
+ return interp__builtin_ia32_shift_with_count(
+ S, OpPC, Call,
+ [](const APInt &Elt, uint64_t Count) { return Elt.shl(Count); },
+ [](const APInt &Elt, unsigned Width) { return APInt::getZero(Width); });
+
+ case X86::BI__builtin_ia32_psrlq128:
+ case X86::BI__builtin_ia32_psrlq256:
+ case X86::BI__builtin_ia32_psrlq512:
+ case X86::BI__builtin_ia32_psrld128:
+ case X86::BI__builtin_ia32_psrld256:
+ case X86::BI__builtin_ia32_psrld512:
+ case X86::BI__builtin_ia32_psrlw128:
+ case X86::BI__builtin_ia32_psrlw256:
+ case X86::BI__builtin_ia32_psrlw512:
+ return interp__builtin_ia32_shift_with_count(
+ S, OpPC, Call,
+ [](const APInt &Elt, uint64_t Count) { return Elt.lshr(Count); },
+ [](const APInt &Elt, unsigned Width) { return APInt::getZero(Width); });
+
case X86::BI__builtin_ia32_pternlogd128_mask:
case X86::BI__builtin_ia32_pternlogd256_mask:
case X86::BI__builtin_ia32_pternlogd512_mask:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 3b91678f7d400..7e86f1252a23d 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12166,6 +12166,52 @@ static bool evalShuffleGeneric(
return true;
}
+static bool evalShiftWithCount(
+ EvalInfo &Info, const CallExpr *Call, APValue &Out,
+ llvm::function_ref<APInt(const APInt &, uint64_t)> ShiftOp,
+ llvm::function_ref<APInt(const APInt &, unsigned)> OverflowOp) {
+
+ APValue Source, Count;
+ if (!EvaluateAsRValue(Info, Call->getArg(0), Source) ||
+ !EvaluateAsRValue(Info, Call->getArg(1), Count))
+ return false;
+
+ assert(Call->getNumArgs() == 2);
+
+ QualType SourceTy = Call->getArg(0)->getType();
+ QualType CountTy = Call->getArg(1)->getType();
+ assert(SourceTy->isVectorType() && CountTy->isVectorType());
+
+ QualType DestEltTy = SourceTy->castAs<VectorType>()->getElementType();
+ unsigned DestEltWidth = Source.getVectorElt(0).getInt().getBitWidth();
+ unsigned DestLen = Source.getVectorLength();
+ bool IsDestUnsigned = DestEltTy->isUnsignedIntegerType();
+ unsigned CountEltWidth = Count.getVectorElt(0).getInt().getBitWidth();
+ unsigned NumBitsInQWord = 64;
+ unsigned NumCountElts = NumBitsInQWord / CountEltWidth;
+ SmallVector<APValue, 64> Result;
+ Result.reserve(DestLen);
+
+ uint64_t CountLQWord = 0;
+ for (unsigned EltIdx = 0; EltIdx != NumCountElts; ++EltIdx) {
+ uint64_t Elt = Count.getVectorElt(EltIdx).getInt().getZExtValue();
+ CountLQWord |= (Elt << (EltIdx * CountEltWidth));
+ }
+
+ for (unsigned EltIdx = 0; EltIdx != DestLen; ++EltIdx) {
+ APInt Elt = Source.getVectorElt(EltIdx).getInt();
+ if (CountLQWord < DestEltWidth) {
+ Result.push_back(
+ APValue(APSInt(ShiftOp(Elt, CountLQWord), IsDestUnsigned)));
+ } else {
+ Result.push_back(
+ APValue(APSInt(OverflowOp(Elt, DestEltWidth), IsDestUnsigned)));
+ }
+ }
+ Out = APValue(Result.data(), Result.size());
+ return true;
+}
+
bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!IsConstantEvaluatedBuiltinCall(E))
return ExprEvaluatorBaseTy::VisitCallExpr(E);
@@ -13130,6 +13176,66 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(Result.data(), Result.size()), E);
}
+ case X86::BI__builtin_ia32_psraq128:
+ case X86::BI__builtin_ia32_psraq256:
+ case X86::BI__builtin_ia32_psraq512:
+ case X86::BI__builtin_ia32_psrad128:
+ case X86::BI__builtin_ia32_psrad256:
+ case X86::BI__builtin_ia32_psrad512:
+ case X86::BI__builtin_ia32_psraw128:
+ case X86::BI__builtin_ia32_psraw256:
+ case X86::BI__builtin_ia32_psraw512: {
+ APValue R;
+ if (!evalShiftWithCount(
+ Info, E, R,
+ [](const APInt &Elt, uint64_t Count) { return Elt.ashr(Count); },
+ [](const APInt &Elt, unsigned Width) {
+ return Elt.ashr(Width - 1);
+ }))
+ return false;
+ return Success(R, E);
+ }
+
+ case X86::BI__builtin_ia32_psllq128:
+ case X86::BI__builtin_ia32_psllq256:
+ case X86::BI__builtin_ia32_psllq512:
+ case X86::BI__builtin_ia32_pslld128:
+ case X86::BI__builtin_ia32_pslld256:
+ case X86::BI__builtin_ia32_pslld512:
+ case X86::BI__builtin_ia32_psllw128:
+ case X86::BI__builtin_ia32_psllw256:
+ case X86::BI__builtin_ia32_psllw512: {
+ APValue R;
+ if (!evalShiftWithCount(
+ Info, E, R,
+ [](const APInt &Elt, uint64_t Count) { return Elt.shl(Count); },
+ [](const APInt &Elt, unsigned Width) {
+ return APInt::getZero(Width);
+ }))
+ return false;
+ return Success(R, E);
+ }
+
+ case X86::BI__builtin_ia32_psrlq128:
+ case X86::BI__builtin_ia32_psrlq256:
+ case X86::BI__builtin_ia32_psrlq512:
+ case X86::BI__builtin_ia32_psrld128:
+ case X86::BI__builtin_ia32_psrld256:
+ case X86::BI__builtin_ia32_psrld512:
+ case X86::BI__builtin_ia32_psrlw128:
+ case X86::BI__builtin_ia32_psrlw256:
+ case X86::BI__builtin_ia32_psrlw512: {
+ APValue R;
+ if (!evalShiftWithCount(
+ Info, E, R,
+ [](const APInt &Elt, uint64_t Count) { return Elt.lshr(Count); },
+ [](const APInt &Elt, unsigned Width) {
+ return APInt::getZero(Width);
+ }))
+ return false;
+ return Success(R, E);
+ }
+
case X86::BI__builtin_ia32_pternlogd128_mask:
case X86::BI__builtin_ia32_pternlogd256_mask:
case X86::BI__builtin_ia32_pternlogd512_mask:
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 3e3c13d8bd662..d3ceb2327ac62 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -2095,9 +2095,8 @@ _mm256_slli_epi16(__m256i __a, int __count) {
/// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sll_epi16(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sll_epi16(__m256i __a, __m128i __count) {
return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count);
}
@@ -2134,9 +2133,8 @@ _mm256_slli_epi32(__m256i __a, int __count) {
/// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sll_epi32(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sll_epi32(__m256i __a, __m128i __count) {
return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count);
}
@@ -2173,9 +2171,8 @@ _mm256_slli_epi64(__m256i __a, int __count) {
/// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sll_epi64(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sll_epi64(__m256i __a, __m128i __count) {
return __builtin_ia32_psllq256((__v4di)__a, __count);
}
@@ -2214,9 +2211,8 @@ _mm256_srai_epi16(__m256i __a, int __count) {
/// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sra_epi16(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sra_epi16(__m256i __a, __m128i __count) {
return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count);
}
@@ -2255,9 +2251,8 @@ _mm256_srai_epi32(__m256i __a, int __count) {
/// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-...
[truncated]
|
0f2f3ea to
65e2154
Compare
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove __anyext128 def from mmintrin.h?
65e2154 to
eb18c4e
Compare
eb18c4e to
3d7456e
Compare
… PSLL/PSRA/PSRL var intrinsics to be used in constexpr Resolves:llvm#169176
_mm_sll_pi16 _mm_sll_pi32 _mm_sll_si64 _mm_sra_pi16 _mm_sra_pi32 _mm_srl_pi16 _mm_srl_pi32 _mm_srl_si64 Also use _zext128 instead of _anyext128 to avoid negative indices.
3d7456e to
e0b578d
Compare
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…169822) This PR avoids a compiler warning, which turns into an error with `-Werror`, for a variable introduced in #169276 and only used in an assertion (which is, thus, unused if compiled without assertions). Signed-off-by: Ingo Müller <ingomueller@google.com> Co-authored-by: Simon Pilgrim <llvm-dev@redking.me.uk>
Resolves #169176