diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index a0181b7ae8f9d..5bb69cd98ad91 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -58,7 +58,7 @@ let Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<64>], Features } // SSE intrinsics -let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { +let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<128>] in { foreach Cmp = ["eq", "lt", "le", "gt", "ge", "neq"] in { let Features = "sse" in { def comi#Cmp : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">; @@ -70,6 +70,18 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } } + let Features = "sse" in { + def cmpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; + def cmpss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; + } + + let Features = "sse2" in { + def cmppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">; + def cmpsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">; + } +} + +let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { foreach Cmp = ["cmpeq", "cmplt", "cmple", "cmpunord", "cmpneq", "cmpnlt", "cmpnle", "cmpord", "min", "max"] in { let Features = "sse" in { @@ -82,17 +94,6 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } } - let Features = "sse" in { - def cmpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; - def cmpss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; - } - - let Features = "sse2" in { - def cmppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">; - def cmpsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">; - } - - let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def pavgb128 : X86Builtin<"_Vector<16, unsigned char>(_Vector<16, unsigned char>, _Vector<16, unsigned char>)">; def pavgw128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, unsigned short>, _Vector<8, unsigned short>)">; @@ -467,6 +468,11 @@ let Features = "avx512f,vpclmulqdq", Attributes = [NoThrow, Const, RequiredVecto def pclmulqdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant char)">; } +let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">; + def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">; +} + let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def vpermilvarpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>)">; def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>)">; @@ -475,8 +481,6 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in def shufpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">; def shufps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">; def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">; - def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">; - def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">; def vextractf128_pd256 : X86Builtin<"_Vector<2, double>(_Vector<4, double>, _Constant int)">; def vextractf128_ps256 : X86Builtin<"_Vector<4, float>(_Vector<8, float>, _Constant int)">; def vextractf128_si256 : X86Builtin<"_Vector<4, int>(_Vector<8, int>, _Constant int)">; @@ -3426,7 +3430,7 @@ let Features = "avx512vp2intersect,avx512vl", Attributes = [NoThrow, RequiredVec def vp2intersect_d_128 : X86Builtin<"void(_Vector<4, int>, _Vector<4, int>, unsigned char *, unsigned char *)">; } -let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512fp16", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def vcomish : X86Builtin<"int(_Vector<8, _Float16>, _Vector<8, _Float16>, _Constant int, _Constant int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 68ebfdf27ba43..23d95e1da6cd5 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2920,6 +2920,222 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, return true; } +/// Mapping for COMI/UCOMI/CMPS/CMPP +static inline bool evalCmpImm(uint32_t imm, llvm::APFloatBase::cmpResult cmp) { + using CmpResult = llvm::APFloatBase::cmpResult; + + bool result = false; + bool isUnordered = (cmp == llvm::APFloatBase::cmpUnordered); + bool isEq = (cmp == CmpResult::cmpEqual); + bool isGt = (cmp == CmpResult::cmpGreaterThan); + bool isLt = (cmp == CmpResult::cmpLessThan); + + switch (imm & 0x1F) { + case 0x00: /* _CMP_EQ_OQ */ + case 0x10: /* _CMP_EQ_OS */ + result = isEq && !isUnordered; + break; + case 0x01: /* _CMP_LT_OS */ + case 0x11: /* _CMP_LT_OQ */ + result = isLt && !isUnordered; + break; + case 0x02: /* _CMP_LE_OS */ + case 0x12: /* _CMP_LE_OQ */ + result = !isGt && !isUnordered; + break; + case 0x03: /* _CMP_UNORD_Q */ + case 0x13: /* _CMP_UNORD_S */ + result = isUnordered; + break; + case 0x04: /* _CMP_NEQ_UQ */ + case 0x14: /* _CMP_NEQ_US */ + result = !isEq || isUnordered; + break; + case 0x05: /* _CMP_NLT_US */ + case 0x15: /* _CMP_NLT_UQ */ + result = !isLt || isUnordered; + break; + case 0x06: /* _CMP_NLE_US */ + case 0x16: /* _CMP_NLE_UQ */ + result = isGt || isUnordered; + break; + case 0x07: /* _CMP_ORD_Q */ + case 0x17: /* _CMP_ORD_S */ + result = !isUnordered; + break; + case 0x08: /* _CMP_EQ_UQ */ + case 0x18: /* _CMP_EQ_US */ + result = isEq || isUnordered; + break; + case 0x09: /* _CMP_NGE_US */ + case 0x19: /* _CMP_NGE_UQ */ + result = isLt || isUnordered; + break; + case 0x0a: /* _CMP_NGT_US */ + case 0x1a: /* _CMP_NGT_UQ */ + result = !isGt || isUnordered; + break; + case 0x0b: /* _CMP_FALSE_OQ */ + case 0x1b: /* _CMP_FALSE_OS */ + result = false; + break; + case 0x0c: /* _CMP_NEQ_OQ */ + case 0x1c: /* _CMP_NEQ_OS */ + result = !isEq && !isUnordered; + break; + case 0x0d: /* _CMP_GE_OS */ + case 0x1d: /* _CMP_GE_OQ */ + result = !isLt && !isUnordered; + break; + case 0x0e: /* _CMP_GT_OS */ + case 0x1e: /* _CMP_GT_OQ */ + result = isGt && !isUnordered; + break; + case 0x0f: /* _CMP_TRUE_UQ */ + case 0x1f: /* _CMP_TRUE_US */ + result = true; + break; + } + return result; +} + +static inline void writeMaskFloat(Pointer &Vec, unsigned lane, bool truth, + bool isF64) { + if (isF64) { + llvm::APInt bits(64, truth ? ~0ULL : 0ULL); + llvm::APFloat F(llvm::APFloat::IEEEdouble(), bits); + Vec.elem(lane) = Floating(F); + } else { + llvm::APInt bits(32, truth ? 0xFFFFFFFFu : 0u); + llvm::APFloat F(llvm::APFloat::IEEEsingle(), bits); + Vec.elem(lane) = Floating(F); + } +} + +static inline bool laneCompareToBool(const Pointer &A, const Pointer &B, + int Lane, uint32_t Imm) { + llvm::APFloat A0 = A.elem(Lane).getAPFloat(); + llvm::APFloat B0 = B.elem(Lane).getAPFloat(); + auto CR = A0.compare(B0); + return evalCmpImm(Imm, CR); +} + +bool interp__builtin_x86_cmp(InterpState &S, CodePtr OpPC, + const InterpFrame *Frame, const CallExpr *Call, + unsigned ID) { + llvm::APSInt ImmAPS = + popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(2))); + uint32_t imm = ImmAPS.getZExtValue(); + const Pointer &VB = S.Stk.pop(); + const Pointer &VA = S.Stk.pop(); + Pointer &Dst = S.Stk.peek(); + + bool isScalar = (ID == X86::BI__builtin_ia32_cmpss) || + (ID == X86::BI__builtin_ia32_cmpsd); + bool isF64 = (ID == X86::BI__builtin_ia32_cmppd) || + (ID == X86::BI__builtin_ia32_cmpsd) || + (ID == X86::BI__builtin_ia32_cmppd256); + int lanes = VA.getNumElems(); + + if (isScalar) { + bool Result = laneCompareToBool(VA, VB, /*lane*/ 0, imm); + writeMaskFloat(Dst, /*lane*/ 0, Result, isF64); + for (int i = 1; i < lanes; ++i) + Dst.elem(i) = VA.elem(i); + } else { + for (int i = 0; i < lanes; i++) { + bool Result = laneCompareToBool(VA, VB, i, imm); + writeMaskFloat(Dst, i, Result, isF64); + } + } + + Dst.initializeAllElements(); + return true; +} + +static bool interp__builtin_x86_vcomish(InterpState &S, CodePtr OpPC, + const InterpFrame *Frame, + const CallExpr *Call) { + using CmpResult = llvm::APFloatBase::cmpResult; + + llvm::APSInt R = + popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(3))); + llvm::APSInt P = + popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(2))); + const Pointer &VB = S.Stk.pop(); + const Pointer &VA = S.Stk.pop(); + + llvm::APFloat A0 = VA.elem(0).getAPFloat(); + llvm::APFloat B0 = VB.elem(0).getAPFloat(); + CmpResult cmp = A0.compare(B0); + bool result = evalCmpImm(static_cast(P.getZExtValue()), cmp); + + pushInteger(S, result ? 1 : 0, Call->getType()); + return true; +} + +static bool interp__builtin_x86_compare_scalar(InterpState &S, CodePtr OpPC, + const InterpFrame *Frame, + const CallExpr *Call, + unsigned ID) { + using CmpResult = llvm::APFloatBase::cmpResult; + + const Pointer &VB = S.Stk.pop(); + const Pointer &VA = S.Stk.pop(); + + llvm::APFloat A0 = VA.elem(0).getAPFloat(); + llvm::APFloat B0 = VB.elem(0).getAPFloat(); + CmpResult cmp = A0.compare(B0); + + bool isEq = cmp == (CmpResult::cmpEqual); + bool isGt = cmp == (CmpResult::cmpGreaterThan); + bool isLt = cmp == (CmpResult::cmpLessThan); + bool result = false; + + switch (ID) { + case X86::BI__builtin_ia32_comieq: + case X86::BI__builtin_ia32_ucomieq: + case X86::BI__builtin_ia32_comisdeq: + case X86::BI__builtin_ia32_ucomisdeq: + result = isEq && !A0.isNaN() && !B0.isNaN(); + break; + case X86::BI__builtin_ia32_comineq: + case X86::BI__builtin_ia32_ucomineq: + case X86::BI__builtin_ia32_comisdneq: + case X86::BI__builtin_ia32_ucomisdneq: + result = !isEq || A0.isNaN() || B0.isNaN(); + break; + case X86::BI__builtin_ia32_comige: + case X86::BI__builtin_ia32_ucomige: + case X86::BI__builtin_ia32_comisdge: + case X86::BI__builtin_ia32_ucomisdge: + result = !isLt && !A0.isNaN() && !B0.isNaN(); + break; + case X86::BI__builtin_ia32_comilt: + case X86::BI__builtin_ia32_ucomilt: + case X86::BI__builtin_ia32_comisdlt: + case X86::BI__builtin_ia32_ucomisdlt: + result = isLt && !A0.isNaN() && !B0.isNaN(); + break; + case X86::BI__builtin_ia32_comigt: + case X86::BI__builtin_ia32_ucomigt: + case X86::BI__builtin_ia32_comisdgt: + case X86::BI__builtin_ia32_ucomisdgt: + result = isGt && !A0.isNaN() && !B0.isNaN(); + break; + case X86::BI__builtin_ia32_comile: + case X86::BI__builtin_ia32_ucomile: + case X86::BI__builtin_ia32_comisdle: + case X86::BI__builtin_ia32_ucomisdle: + result = !isGt && !A0.isNaN() && !B0.isNaN(); + break; + default: + return false; + } + pushInteger(S, result ? 1 : 0, S.getASTContext().IntTy); + return true; +} + static bool interp__builtin_vec_ext(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID) { assert(Call->getNumArgs() == 2); @@ -3798,6 +4014,41 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_insert128i256: return interp__builtin_x86_insert_subvector(S, OpPC, Call, BuiltinID); + case X86::BI__builtin_ia32_vcomish: + return interp__builtin_x86_vcomish(S, OpPC, Frame, Call); + case X86::BI__builtin_ia32_comieq: + case X86::BI__builtin_ia32_ucomieq: + case X86::BI__builtin_ia32_comisdeq: + case X86::BI__builtin_ia32_ucomisdeq: + case X86::BI__builtin_ia32_comineq: + case X86::BI__builtin_ia32_ucomineq: + case X86::BI__builtin_ia32_comisdneq: + case X86::BI__builtin_ia32_ucomisdneq: + case X86::BI__builtin_ia32_comige: + case X86::BI__builtin_ia32_ucomige: + case X86::BI__builtin_ia32_comisdge: + case X86::BI__builtin_ia32_ucomisdge: + case X86::BI__builtin_ia32_comilt: + case X86::BI__builtin_ia32_ucomilt: + case X86::BI__builtin_ia32_comisdlt: + case X86::BI__builtin_ia32_ucomisdlt: + case X86::BI__builtin_ia32_comile: + case X86::BI__builtin_ia32_ucomile: + case X86::BI__builtin_ia32_comisdle: + case X86::BI__builtin_ia32_ucomisdle: + case X86::BI__builtin_ia32_comigt: + case X86::BI__builtin_ia32_ucomigt: + case X86::BI__builtin_ia32_comisdgt: + case X86::BI__builtin_ia32_ucomisdgt: + return interp__builtin_x86_compare_scalar(S, OpPC, Frame, Call, BuiltinID); + + case X86::BI__builtin_ia32_cmpps: + case X86::BI__builtin_ia32_cmppd: + case X86::BI__builtin_ia32_cmpss: + case X86::BI__builtin_ia32_cmpsd: + case X86::BI__builtin_ia32_cmpps256: + case X86::BI__builtin_ia32_cmppd256: + return interp__builtin_x86_cmp(S, OpPC, Frame, Call, BuiltinID); case X86::BI__builtin_ia32_vec_ext_v4hi: case X86::BI__builtin_ia32_vec_ext_v16qi: case X86::BI__builtin_ia32_vec_ext_v8hi: @@ -3857,8 +4108,8 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E, break; } case OffsetOfNode::Array: { - // When generating bytecode, we put all the index expressions as Sint64 on - // the stack. + // When generating bytecode, we put all the index expressions as Sint64 + // on the stack. int64_t Index = ArrayIndices[ArrayIndex]; const ArrayType *AT = S.getASTContext().getAsArrayType(CurrentType); if (!AT) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 7bf28d988f405..b2a9a6a944e24 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -14939,6 +14939,340 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; }); } + case X86::BI__builtin_ia32_cmpps: + case X86::BI__builtin_ia32_cmppd: + case X86::BI__builtin_ia32_cmpss: + case X86::BI__builtin_ia32_cmpsd: + case X86::BI__builtin_ia32_cmpps256: + case X86::BI__builtin_ia32_cmppd256: { + using CmpResult = llvm::APFloatBase::cmpResult; + + const Expr *A = E->getArg(0); + const Expr *B = E->getArg(1); + const Expr *Imm = E->getArg(2); + + APValue AV, BV; + APSInt ImmVal; + if (!EvaluateVector(A, AV, Info) || !EvaluateVector(B, BV, Info)) + return false; + if (!EvaluateInteger(Imm, ImmVal, Info)) + return false; + + if (!AV.isVector() || !BV.isVector()) + return false; + unsigned Lanes = AV.getVectorLength(); + if (Lanes == 0 || BV.getVectorLength() != Lanes) + return false; + + QualType RetTy = E->getType(); + const auto *VT = RetTy->getAs(); + if (!VT) + return false; + bool IsF64 = + VT->getElementType()->isSpecificBuiltinType(BuiltinType::Double); + const bool IsScalar = (BuiltinOp == X86::BI__builtin_ia32_cmpss) || + (BuiltinOp == X86::BI__builtin_ia32_cmpsd); + const uint32_t imm = ImmVal.getZExtValue(); + + auto evalCmpImm = [](uint32_t imm, + llvm::APFloatBase::cmpResult cmp) -> bool { + bool isUnordered = (cmp == llvm::APFloatBase::cmpUnordered); + bool isEq = (cmp == CmpResult::cmpEqual); + bool isGt = (cmp == CmpResult::cmpGreaterThan); + bool isLt = (cmp == CmpResult::cmpLessThan); + bool result = false; + + switch (imm & 0x1F) { + case 0x00: /* _CMP_EQ_OQ */ + case 0x10: /* _CMP_EQ_OS */ + result = isEq && !isUnordered; + break; + case 0x01: /* _CMP_LT_OS */ + case 0x11: /* _CMP_LT_OQ */ + result = isLt && !isUnordered; + break; + case 0x02: /* _CMP_LE_OS */ + case 0x12: /* _CMP_LE_OQ */ + result = !isGt && !isUnordered; + break; + case 0x03: /* _CMP_UNORD_Q */ + case 0x13: /* _CMP_UNORD_S */ + result = isUnordered; + break; + case 0x04: /* _CMP_NEQ_UQ */ + case 0x14: /* _CMP_NEQ_US */ + result = !isEq || isUnordered; + break; + case 0x05: /* _CMP_NLT_US */ + case 0x15: /* _CMP_NLT_UQ */ + result = !isLt || isUnordered; + break; + case 0x06: /* _CMP_NLE_US */ + case 0x16: /* _CMP_NLE_UQ */ + result = isGt || isUnordered; + break; + case 0x07: /* _CMP_ORD_Q */ + case 0x17: /* _CMP_ORD_S */ + result = !isUnordered; + break; + case 0x08: /* _CMP_EQ_UQ */ + case 0x18: /* _CMP_EQ_US */ + result = isEq || isUnordered; + break; + case 0x09: /* _CMP_NGE_US */ + case 0x19: /* _CMP_NGE_UQ */ + result = isLt || isUnordered; + break; + case 0x0a: /* _CMP_NGT_US */ + case 0x1a: /* _CMP_NGT_UQ */ + result = !isGt || isUnordered; + break; + case 0x0b: /* _CMP_FALSE_OQ */ + case 0x1b: /* _CMP_FALSE_OS */ + result = false; + break; + case 0x0c: /* _CMP_NEQ_OQ */ + case 0x1c: /* _CMP_NEQ_OS */ + result = !isEq && !isUnordered; + break; + case 0x0d: /* _CMP_GE_OS */ + case 0x1d: /* _CMP_GE_OQ */ + result = !isLt && !isUnordered; + break; + case 0x0e: /* _CMP_GT_OS */ + case 0x1e: /* _CMP_GT_OQ */ + result = isGt && !isUnordered; + break; + case 0x0f: /* _CMP_TRUE_UQ */ + case 0x1f: /* _CMP_TRUE_US */ + result = true; + break; + } + return result; + }; + + auto writeMaskFloat = [&](APValue &Dst, unsigned Lane, bool Bit, + bool IsF64Elt) { + if (IsF64Elt) { + llvm::APInt I(64, Bit ? ~0ULL : 0ULL); + llvm::APFloat F(Info.Ctx.getFloatTypeSemantics(Info.Ctx.DoubleTy), I); + Dst.getVectorElt(Lane) = APValue(F); + } else { + llvm::APInt I(32, Bit ? 0xFFFFFFFFu : 0u); + llvm::APFloat F(Info.Ctx.getFloatTypeSemantics(Info.Ctx.FloatTy), I); + Dst.getVectorElt(Lane) = APValue(F); + } + }; + + auto cmpLaneToBit = [&](unsigned Lane) -> bool { + const APValue &AE = AV.getVectorElt(Lane); + const APValue &BE = BV.getVectorElt(Lane); + if (!AE.isFloat() || !BE.isFloat()) + return false; + llvm::APFloat A0 = AE.getFloat(); + llvm::APFloat B0 = BE.getFloat(); + return evalCmpImm(imm, A0.compare(B0)); + }; + + APValue Res((const APValue *)nullptr, Lanes); + for (unsigned i = 0; i < Lanes; ++i) + Res.getVectorElt(i) = AV.getVectorElt(i); + + if (IsScalar) { + bool bit = cmpLaneToBit(0); + writeMaskFloat(Res, 0, bit, IsF64); + } else { + for (unsigned i = 0; i < Lanes; ++i) { + bool bit = cmpLaneToBit(i); + writeMaskFloat(Res, i, bit, IsF64); + } + } + return Success(Res, E); + } + case X86::BI__builtin_ia32_vcomish: { + APSInt R, P; + if (!EvaluateInteger(E->getArg(3), R, Info)) + return false; + if (!EvaluateInteger(E->getArg(2), P, Info)) + return false; + APValue AV, BV; + if (!EvaluateVector(E->getArg(0), AV, Info) || + !EvaluateVector(E->getArg(1), BV, Info)) + return false; + if (!AV.isVector() || !BV.isVector() || AV.getVectorLength() == 0 || + BV.getVectorLength() == 0) + return false; + const APValue &A0V = AV.getVectorElt(0); + const APValue &B0V = BV.getVectorElt(0); + if (!A0V.isFloat() || !B0V.isFloat()) + return false; + const llvm::APFloat &A0 = A0V.getFloat(); + const llvm::APFloat &B0 = B0V.getFloat(); + auto Cmp = A0.compare(B0); + + const bool IsEq = (Cmp == llvm::APFloatBase::cmpEqual); + const bool IsLt = (Cmp == llvm::APFloatBase::cmpLessThan); + const bool IsGt = (Cmp == llvm::APFloatBase::cmpGreaterThan); + bool Result = false; + + switch (P.getExtValue()) { + case 0x00: /* _CMP_EQ_OQ */ + case 0x10: /* _CMP_EQ_OS */ + Result = IsEq && !A0.isNaN() && !B0.isNaN(); + break; + case 0x01: /* _CMP_LT_OS */ + case 0x11: /* _CMP_LT_OQ */ + Result = IsLt && !A0.isNaN() && !B0.isNaN(); + break; + case 0x02: /* _CMP_LE_OS */ + case 0x12: /* _CMP_LE_OQ */ + Result = !IsGt && !A0.isNaN() && !B0.isNaN(); + break; + case 0x03: /* _CMP_UNORD_Q */ + case 0x13: /* _CMP_UNORD_S */ + Result = A0.isNaN() || B0.isNaN(); + break; + case 0x04: /* _CMP_NEQ_UQ */ + case 0x14: /* _CMP_NEQ_US */ + Result = !IsEq || A0.isNaN() || B0.isNaN(); + break; + case 0x05: /* _CMP_NLT_US */ + case 0x15: /* _CMP_NLT_UQ */ + Result = !IsLt || A0.isNaN() || B0.isNaN(); + break; + case 0x06: /* _CMP_NLE_US */ + case 0x16: /* _CMP_NLE_UQ */ + Result = IsGt || A0.isNaN() || B0.isNaN(); + break; + case 0x07: /* _CMP_ORD_Q */ + case 0x17: /* _CMP_ORD_S */ + Result = !A0.isNaN() && !B0.isNaN(); + break; + case 0x08: /* _CMP_EQ_UQ */ + case 0x18: /* _CMP_EQ_US */ + Result = IsEq || A0.isNaN() || B0.isNaN(); + break; + case 0x09: /* _CMP_NGE_US */ + case 0x19: /* _CMP_NGE_UQ */ + Result = IsLt || A0.isNaN() || B0.isNaN(); + break; + case 0x0a: /* _CMP_NGT_US */ + case 0x1a: /* _CMP_NGT_UQ */ + Result = !IsGt || A0.isNaN() || B0.isNaN(); + break; + case 0x0b: /* _CMP_FALSE_OQ */ + case 0x1b: /* _CMP_FALSE_OS */ + Result = false; + break; + case 0x0c: /* _CMP_NEQ_OQ */ + case 0x1c: /* _CMP_NEQ_OS */ + Result = !IsEq && !A0.isNaN() && !B0.isNaN(); + break; + case 0x0d: /* _CMP_GE_OS */ + case 0x1d: /* _CMP_GE_OQ */ + Result = !IsLt && !A0.isNaN() && !B0.isNaN(); + break; + case 0x0e: /* _CMP_GT_OS */ + case 0x1e: /* _CMP_GT_OQ */ + Result = IsGt && !A0.isNaN() && !B0.isNaN(); + break; + case 0x0f: /* _CMP_TRUE_UQ */ + case 0x1f: /* _CMP_TRUE_US */ + Result = true; + break; + + default: + return false; + } + return Success(Result ? 1 : 0, E); + } + case X86::BI__builtin_ia32_comieq: + case X86::BI__builtin_ia32_ucomieq: + case X86::BI__builtin_ia32_comisdeq: + case X86::BI__builtin_ia32_ucomisdeq: + case X86::BI__builtin_ia32_comineq: + case X86::BI__builtin_ia32_ucomineq: + case X86::BI__builtin_ia32_comisdneq: + case X86::BI__builtin_ia32_ucomisdneq: + case X86::BI__builtin_ia32_comige: + case X86::BI__builtin_ia32_ucomige: + case X86::BI__builtin_ia32_comisdge: + case X86::BI__builtin_ia32_ucomisdge: + case X86::BI__builtin_ia32_comilt: + case X86::BI__builtin_ia32_ucomilt: + case X86::BI__builtin_ia32_comisdlt: + case X86::BI__builtin_ia32_ucomisdlt: + case X86::BI__builtin_ia32_comigt: + case X86::BI__builtin_ia32_ucomigt: + case X86::BI__builtin_ia32_comisdgt: + case X86::BI__builtin_ia32_ucomisdgt: + case X86::BI__builtin_ia32_comile: + case X86::BI__builtin_ia32_ucomile: + case X86::BI__builtin_ia32_comisdle: + case X86::BI__builtin_ia32_ucomisdle: { + APValue AV, BV; + if (!EvaluateVector(E->getArg(0), AV, Info) || + !EvaluateVector(E->getArg(1), BV, Info)) + return false; + if (!AV.isVector() || !BV.isVector() || AV.getVectorLength() == 0 || + BV.getVectorLength() == 0) + return false; + const APValue &A0V = AV.getVectorElt(0); + const APValue &B0V = BV.getVectorElt(0); + if (!A0V.isFloat() || !B0V.isFloat()) + return false; + const llvm::APFloat &A0 = A0V.getFloat(); + const llvm::APFloat &B0 = B0V.getFloat(); + auto Cmp = A0.compare(B0); + + const bool IsEq = (Cmp == llvm::APFloatBase::cmpEqual); + const bool IsLt = (Cmp == llvm::APFloatBase::cmpLessThan); + const bool IsGt = (Cmp == llvm::APFloatBase::cmpGreaterThan); + bool Result = false; + + switch (BuiltinOp) { + case X86::BI__builtin_ia32_comieq: + case X86::BI__builtin_ia32_ucomieq: + case X86::BI__builtin_ia32_comisdeq: + case X86::BI__builtin_ia32_ucomisdeq: + Result = IsEq && !A0.isNaN() && !B0.isNaN(); + break; + case X86::BI__builtin_ia32_comineq: + case X86::BI__builtin_ia32_ucomineq: + case X86::BI__builtin_ia32_comisdneq: + case X86::BI__builtin_ia32_ucomisdneq: + Result = !IsEq || A0.isNaN() || B0.isNaN(); + break; + case X86::BI__builtin_ia32_comige: + case X86::BI__builtin_ia32_ucomige: + case X86::BI__builtin_ia32_comisdge: + case X86::BI__builtin_ia32_ucomisdge: + Result = !IsLt && !A0.isNaN() && !B0.isNaN(); + break; + case X86::BI__builtin_ia32_comilt: + case X86::BI__builtin_ia32_ucomilt: + case X86::BI__builtin_ia32_comisdlt: + case X86::BI__builtin_ia32_ucomisdlt: + Result = IsLt && !A0.isNaN() && !B0.isNaN(); + break; + case X86::BI__builtin_ia32_comigt: + case X86::BI__builtin_ia32_ucomigt: + case X86::BI__builtin_ia32_comisdgt: + case X86::BI__builtin_ia32_ucomisdgt: + Result = IsGt && !A0.isNaN() && !B0.isNaN(); + break; + case X86::BI__builtin_ia32_comile: + case X86::BI__builtin_ia32_ucomile: + case X86::BI__builtin_ia32_comisdle: + case X86::BI__builtin_ia32_ucomisdle: + Result = !IsGt && !A0.isNaN() && !B0.isNaN(); + break; + default: + return false; + } + return Success(Result ? 1 : 0, E); + } + case clang::X86::BI__builtin_ia32_vec_ext_v4hi: case clang::X86::BI__builtin_ia32_vec_ext_v16qi: case clang::X86::BI__builtin_ia32_vec_ext_v8hi: diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h index 4bd798129a25d..d7e54d3d86ea5 100644 --- a/clang/lib/Headers/avx512fp16intrin.h +++ b/clang/lib/Headers/avx512fp16intrin.h @@ -298,74 +298,74 @@ _mm512_zextph256_ph512(__m256h __a) { #define _mm_comi_sh(A, B, pred) \ _mm_comi_round_sh((A), (B), (pred), _MM_FROUND_CUR_DIRECTION) -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comieq_sh(__m128h __A, - __m128h __B) { +static __inline__ int + __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_comieq_sh(__m128h __A, __m128h __B) { return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_EQ_OS, _MM_FROUND_CUR_DIRECTION); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comilt_sh(__m128h __A, - __m128h __B) { +static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_comilt_sh(__m128h __A, __m128h __B) { return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LT_OS, _MM_FROUND_CUR_DIRECTION); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comile_sh(__m128h __A, - __m128h __B) { +static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_comile_sh(__m128h __A, __m128h __B) { return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LE_OS, _MM_FROUND_CUR_DIRECTION); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comigt_sh(__m128h __A, - __m128h __B) { +static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_comigt_sh(__m128h __A, __m128h __B) { return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GT_OS, _MM_FROUND_CUR_DIRECTION); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comige_sh(__m128h __A, - __m128h __B) { +static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_comige_sh(__m128h __A, __m128h __B) { return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comineq_sh(__m128h __A, - __m128h __B) { +static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_comineq_sh(__m128h __A, __m128h __B) { return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_NEQ_US, _MM_FROUND_CUR_DIRECTION); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomieq_sh(__m128h __A, - __m128h __B) { +static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_ucomieq_sh(__m128h __A, __m128h __B) { return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_EQ_OQ, _MM_FROUND_CUR_DIRECTION); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomilt_sh(__m128h __A, - __m128h __B) { +static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_ucomilt_sh(__m128h __A, __m128h __B) { return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomile_sh(__m128h __A, - __m128h __B) { +static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_ucomile_sh(__m128h __A, __m128h __B) { return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LE_OQ, _MM_FROUND_CUR_DIRECTION); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomigt_sh(__m128h __A, - __m128h __B) { +static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_ucomigt_sh(__m128h __A, __m128h __B) { return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GT_OQ, _MM_FROUND_CUR_DIRECTION); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomige_sh(__m128h __A, - __m128h __B) { +static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_ucomige_sh(__m128h __A, __m128h __B) { return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GE_OQ, _MM_FROUND_CUR_DIRECTION); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomineq_sh(__m128h __A, - __m128h __B) { +static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_ucomineq_sh(__m128h __A, __m128h __B) { return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_NEQ_UQ, _MM_FROUND_CUR_DIRECTION); } diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 6597e7e7d4030..69833bb230d1e 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -999,8 +999,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_sd(__m128d __a, /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a, - __m128d __b) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comieq_sd(__m128d __a, + __m128d __b) { return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b); } @@ -1023,8 +1023,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a, /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_sd(__m128d __a, - __m128d __b) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comilt_sd(__m128d __a, + __m128d __b) { return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b); } @@ -1047,8 +1047,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_sd(__m128d __a, /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_sd(__m128d __a, - __m128d __b) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comile_sd(__m128d __a, + __m128d __b) { return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b); } @@ -1071,8 +1071,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_sd(__m128d __a, /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_sd(__m128d __a, - __m128d __b) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comigt_sd(__m128d __a, + __m128d __b) { return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b); } @@ -1095,8 +1095,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_sd(__m128d __a, /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_sd(__m128d __a, - __m128d __b) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comige_sd(__m128d __a, + __m128d __b) { return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b); } @@ -1119,8 +1119,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_sd(__m128d __a, /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_sd(__m128d __a, - __m128d __b) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comineq_sd(__m128d __a, + __m128d __b) { return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b); } @@ -1141,8 +1141,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_sd(__m128d __a, /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_sd(__m128d __a, - __m128d __b) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomieq_sd(__m128d __a, + __m128d __b) { return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b); } @@ -1165,8 +1165,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_sd(__m128d __a, /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_sd(__m128d __a, - __m128d __b) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomilt_sd(__m128d __a, + __m128d __b) { return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b); } @@ -1189,8 +1189,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_sd(__m128d __a, /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_sd(__m128d __a, - __m128d __b) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomile_sd(__m128d __a, + __m128d __b) { return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b); } @@ -1213,8 +1213,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_sd(__m128d __a, /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_sd(__m128d __a, - __m128d __b) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomigt_sd(__m128d __a, + __m128d __b) { return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b); } @@ -1237,8 +1237,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_sd(__m128d __a, /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_sd(__m128d __a, - __m128d __b) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomige_sd(__m128d __a, + __m128d __b) { return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b); } @@ -1261,8 +1261,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_sd(__m128d __a, /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison result. -static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a, - __m128d __b) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_ucomineq_sd(__m128d __a, __m128d __b) { return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b); } diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index d876b4735a7d2..f964952f80f36 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -1104,9 +1104,8 @@ _mm_cmpunord_ps(__m128 __a, __m128 __b) /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS -_mm_comieq_ss(__m128 __a, __m128 __b) -{ +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comieq_ss(__m128 __a, + __m128 __b) { return __builtin_ia32_comieq((__v4sf)__a, (__v4sf)__b); } @@ -1129,9 +1128,8 @@ _mm_comieq_ss(__m128 __a, __m128 __b) /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS -_mm_comilt_ss(__m128 __a, __m128 __b) -{ +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comilt_ss(__m128 __a, + __m128 __b) { return __builtin_ia32_comilt((__v4sf)__a, (__v4sf)__b); } @@ -1153,9 +1151,8 @@ _mm_comilt_ss(__m128 __a, __m128 __b) /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS -_mm_comile_ss(__m128 __a, __m128 __b) -{ +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comile_ss(__m128 __a, + __m128 __b) { return __builtin_ia32_comile((__v4sf)__a, (__v4sf)__b); } @@ -1177,9 +1174,8 @@ _mm_comile_ss(__m128 __a, __m128 __b) /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS -_mm_comigt_ss(__m128 __a, __m128 __b) -{ +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comigt_ss(__m128 __a, + __m128 __b) { return __builtin_ia32_comigt((__v4sf)__a, (__v4sf)__b); } @@ -1201,9 +1197,8 @@ _mm_comigt_ss(__m128 __a, __m128 __b) /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS -_mm_comige_ss(__m128 __a, __m128 __b) -{ +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comige_ss(__m128 __a, + __m128 __b) { return __builtin_ia32_comige((__v4sf)__a, (__v4sf)__b); } @@ -1225,9 +1220,8 @@ _mm_comige_ss(__m128 __a, __m128 __b) /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS -_mm_comineq_ss(__m128 __a, __m128 __b) -{ +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comineq_ss(__m128 __a, + __m128 __b) { return __builtin_ia32_comineq((__v4sf)__a, (__v4sf)__b); } @@ -1248,9 +1242,8 @@ _mm_comineq_ss(__m128 __a, __m128 __b) /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS -_mm_ucomieq_ss(__m128 __a, __m128 __b) -{ +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomieq_ss(__m128 __a, + __m128 __b) { return __builtin_ia32_ucomieq((__v4sf)__a, (__v4sf)__b); } @@ -1272,9 +1265,8 @@ _mm_ucomieq_ss(__m128 __a, __m128 __b) /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS -_mm_ucomilt_ss(__m128 __a, __m128 __b) -{ +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomilt_ss(__m128 __a, + __m128 __b) { return __builtin_ia32_ucomilt((__v4sf)__a, (__v4sf)__b); } @@ -1296,9 +1288,8 @@ _mm_ucomilt_ss(__m128 __a, __m128 __b) /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS -_mm_ucomile_ss(__m128 __a, __m128 __b) -{ +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomile_ss(__m128 __a, + __m128 __b) { return __builtin_ia32_ucomile((__v4sf)__a, (__v4sf)__b); } @@ -1320,9 +1311,8 @@ _mm_ucomile_ss(__m128 __a, __m128 __b) /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS -_mm_ucomigt_ss(__m128 __a, __m128 __b) -{ +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomigt_ss(__m128 __a, + __m128 __b) { return __builtin_ia32_ucomigt((__v4sf)__a, (__v4sf)__b); } @@ -1344,9 +1334,8 @@ _mm_ucomigt_ss(__m128 __a, __m128 __b) /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS -_mm_ucomige_ss(__m128 __a, __m128 __b) -{ +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomige_ss(__m128 __a, + __m128 __b) { return __builtin_ia32_ucomige((__v4sf)__a, (__v4sf)__b); } @@ -1367,9 +1356,8 @@ _mm_ucomige_ss(__m128 __a, __m128 __b) /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. -static __inline__ int __DEFAULT_FN_ATTRS -_mm_ucomineq_ss(__m128 __a, __m128 __b) -{ +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomineq_ss(__m128 __a, + __m128 __b) { return __builtin_ia32_ucomineq((__v4sf)__a, (__v4sf)__b); } diff --git a/clang/test/CodeGen/X86/avx512fp16-builtins.c b/clang/test/CodeGen/X86/avx512fp16-builtins.c index 37443d584614d..46750e9efba82 100644 --- a/clang/test/CodeGen/X86/avx512fp16-builtins.c +++ b/clang/test/CodeGen/X86/avx512fp16-builtins.c @@ -354,7 +354,7 @@ __m256h test_mm512_castph512_ph256(__m512h __a) { __m256h test_mm256_castph128_ph256(__m128h __a) { // CHECK-LABEL: test_mm256_castph128_ph256 - // CHECK: [[A:%.*]] = freeze <8 x half> poison + // CHECK: [[A:%.*]] = freeze <8 x half> poison // CHECK: shufflevector <8 x half> %{{.*}}, <8 x half> [[A]], <16 x i32> return _mm256_castph128_ph256(__a); } @@ -372,7 +372,7 @@ __m512h test_mm512_castph128_ph512(__m128h __a) { __m512h test_mm512_castph256_ph512(__m256h __a) { // CHECK-LABEL: test_mm512_castph256_ph512 - // CHECK: [[A:%.*]] = freeze <16 x half> poison + // CHECK: [[A:%.*]] = freeze <16 x half> poison // CHECK: shufflevector <16 x half> %{{.*}}, <16 x half> [[A]], <32 x i32> return _mm512_castph256_ph512(__a); } @@ -409,78 +409,194 @@ int test_mm_comi_sh(__m128h __A, __m128h __B) { // CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 0, i32 4) return _mm_comi_sh(__A, __B, 0); } +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(1.0f16), 0) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(1.0f16), 0) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16), 1) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(2.0f16), 1) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(1.0f16), 2) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16), 2) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(1.0f16), 2) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16), 3) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(2.0f16), 3) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16), 4) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(1.0f16), 4) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16), 5) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(1.0f16), 5) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(1.0f16), 5) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(1.0f16), 6) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16), 6) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(2.0f16), 6) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16), 7) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(2.0f16), 7) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(1.0f16), 8) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(1.0f16), 8) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16), 9) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(1.0f16), 9) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(2.0f16), 9) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16), 10) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(1.0f16), 10) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(1.0f16), 10) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(1.0f16), 11) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(1.0f16), 11) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16), 12) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(1.0f16), 12) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(1.0f16), 13) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(1.0f16), 13) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(1.0f16), 14) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(1.0f16), 14) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16), 15) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(2.0f16), 15) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(1.0f16), 16) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(1.0f16), 16) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16), 17) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(2.0f16), 17) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(1.0f16), 18) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16), 18) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(2.0f16), 18) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16), 19) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(1.0f16), 19) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16), 20) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(1.0f16), 20) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16), 21) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(1.0f16), 21) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(2.0f16), 21) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(1.0f16), 22) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16), 22) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(1.0f16), 22) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16), 23) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(2.0f16), 23) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(1.0f16), 24) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(1.0f16), 24) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16), 25) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(1.0f16), 25) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(1.0f16), 25) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16), 26) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(1.0f16), 26) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(2.0f16), 26) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(1.0f16), 27) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(1.0f16), 27) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16), 28) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(2.0f16), 28) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(1.0f16), 29) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(1.0f16), 29) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(1.0f16), 30) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(2.0f16), 30) == 0); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(1.0f16), 31) == 1); +TEST_CONSTEXPR(_mm_comi_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(1.0f16), 31) == 1); int test_mm_comieq_sh(__m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_comieq_sh // CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 16, i32 4) return _mm_comieq_sh(__A, __B); } +TEST_CONSTEXPR(_mm_comieq_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(1.0f16)) == 1); +TEST_CONSTEXPR(_mm_comieq_sh(_mm_set1_ph(__builtin_nanf("")), + _mm_set1_ph(1.0f16)) == 0); int test_mm_comilt_sh(__m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_comilt_sh // CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 1, i32 4) return _mm_comilt_sh(__A, __B); } +TEST_CONSTEXPR(_mm_comilt_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16)) == 1); +TEST_CONSTEXPR(_mm_comilt_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(1.0f16)) == 0); +TEST_CONSTEXPR(_mm_comilt_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(1.0f16)) == 0); +TEST_CONSTEXPR(_mm_comilt_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(2.0f16)) == 0); int test_mm_comile_sh(__m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_comile_sh // CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 2, i32 4) return _mm_comile_sh(__A, __B); } +TEST_CONSTEXPR(_mm_comile_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(1.0f16)) == 1); +TEST_CONSTEXPR(_mm_comile_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16)) == 1); +TEST_CONSTEXPR(_mm_comile_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(1.0f16)) == 0); +TEST_CONSTEXPR(_mm_comile_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(2.0f16)) == 0); int test_mm_comigt_sh(__m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_comigt_sh // CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 14, i32 4) return _mm_comigt_sh(__A, __B); } +TEST_CONSTEXPR(_mm_comigt_sh(_mm_set1_ph(3.0f16), _mm_set1_ph(2.0f16)) == 1); +TEST_CONSTEXPR(_mm_comigt_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(3.0f16)) == 0); +TEST_CONSTEXPR(_mm_comigt_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(2.0f16)) == 0); +TEST_CONSTEXPR(_mm_comigt_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(2.0f16)) == 0); int test_mm_comige_sh(__m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_comige_sh // CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 13, i32 4) return _mm_comige_sh(__A, __B); } +TEST_CONSTEXPR(_mm_comige_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(2.0f16)) == 1); +TEST_CONSTEXPR(_mm_comige_sh(_mm_set1_ph(3.0f16), _mm_set1_ph(2.0f16)) == 1); +TEST_CONSTEXPR(_mm_comige_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16)) == 0); +TEST_CONSTEXPR(_mm_comige_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(2.0f16)) == 0); int test_mm_comineq_sh(__m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_comineq_sh // CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 20, i32 4) return _mm_comineq_sh(__A, __B); } +TEST_CONSTEXPR(_mm_comineq_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(3.0f16)) == 1); +TEST_CONSTEXPR(_mm_comineq_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(3.0f16)) == 1); int test_mm_ucomieq_sh(__m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_ucomieq_sh // CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 0, i32 4) return _mm_ucomieq_sh(__A, __B); } +TEST_CONSTEXPR(_mm_ucomieq_sh(_mm_set1_ph(4.0f16), _mm_set1_ph(4.0f16)) == 1); +TEST_CONSTEXPR(_mm_ucomieq_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(4.0f16)) == 0); int test_mm_ucomilt_sh(__m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_ucomilt_sh // CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 17, i32 4) return _mm_ucomilt_sh(__A, __B); } +TEST_CONSTEXPR(_mm_ucomilt_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16)) == 1); +TEST_CONSTEXPR(_mm_ucomilt_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(1.0f16)) == 0); +TEST_CONSTEXPR(_mm_ucomilt_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(1.0f16)) == 0); +TEST_CONSTEXPR(_mm_ucomilt_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(2.0f16)) == 0); +TEST_CONSTEXPR(_mm_ucomilt_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(__builtin_nanf(""))) == 0); int test_mm_ucomile_sh(__m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_ucomile_sh // CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 18, i32 4) return _mm_ucomile_sh(__A, __B); } +TEST_CONSTEXPR(_mm_ucomile_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(1.0f16)) == 1); +TEST_CONSTEXPR(_mm_ucomile_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16)) == 1); +TEST_CONSTEXPR(_mm_ucomile_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(1.0f16)) == 0); +TEST_CONSTEXPR(_mm_ucomile_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(2.0f16)) == 0); int test_mm_ucomigt_sh(__m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_ucomigt_sh // CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 30, i32 4) return _mm_ucomigt_sh(__A, __B); } +TEST_CONSTEXPR(_mm_ucomigt_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(1.0f16)) == 1); +TEST_CONSTEXPR(_mm_ucomigt_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16)) == 0); +TEST_CONSTEXPR(_mm_ucomigt_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(1.0f16)) == 0); +TEST_CONSTEXPR(_mm_ucomigt_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(1.0f16)) == 0); int test_mm_ucomige_sh(__m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_ucomige_sh // CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 29, i32 4) return _mm_ucomige_sh(__A, __B); } +TEST_CONSTEXPR(_mm_ucomige_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(1.0f16)) == 1); +TEST_CONSTEXPR(_mm_ucomige_sh(_mm_set1_ph(2.0f16), _mm_set1_ph(1.0f16)) == 1); +TEST_CONSTEXPR(_mm_ucomige_sh(_mm_set1_ph(1.0f16), _mm_set1_ph(2.0f16)) == 0); +TEST_CONSTEXPR(_mm_ucomige_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(1.0f16)) == 0); int test_mm_ucomineq_sh(__m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_ucomineq_sh // CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 4, i32 4) return _mm_ucomineq_sh(__A, __B); } +TEST_CONSTEXPR(_mm_ucomineq_sh(_mm_set1_ph(5.0f16), _mm_set1_ph(5.0f16)) == 0); +TEST_CONSTEXPR(_mm_ucomineq_sh(_mm_set1_ph(__builtin_nanf("")), _mm_set1_ph(5.0f16)) == 1); __m512h test_mm512_add_ph(__m512h __A, __m512h __B) { // CHECK-LABEL: test_mm512_add_ph diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c index 3bad3426b1586..12b14b1d7efe8 100644 --- a/clang/test/CodeGen/X86/sse-builtins.c +++ b/clang/test/CodeGen/X86/sse-builtins.c @@ -289,36 +289,56 @@ int test_mm_comieq_ss(__m128 A, __m128 B) { // CHECK: call {{.*}}i32 @llvm.x86.sse.comieq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_comieq_ss(A, B); } +TEST_CONSTEXPR(_mm_comieq_ss(_mm_set1_ps(1.0f), _mm_set1_ps(1.0f)) == 1); +TEST_CONSTEXPR(_mm_comieq_ss(_mm_set1_ps(__builtin_nanf("")), _mm_set1_ps(1.0f)) == 0); int test_mm_comige_ss(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_comige_ss // CHECK: call {{.*}}i32 @llvm.x86.sse.comige.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_comige_ss(A, B); } +TEST_CONSTEXPR(_mm_comige_ss(_mm_set1_ps(2.0f), _mm_set1_ps(2.0f)) == 1); +TEST_CONSTEXPR(_mm_comige_ss(_mm_set1_ps(3.0f), _mm_set1_ps(2.0f)) == 1); +TEST_CONSTEXPR(_mm_comige_ss(_mm_set1_ps(1.0f), _mm_set1_ps(2.0f)) == 0); +TEST_CONSTEXPR(_mm_comige_ss(_mm_set1_ps(__builtin_nanf("")), _mm_set1_ps(2.0f)) == 0); int test_mm_comigt_ss(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_comigt_ss // CHECK: call {{.*}}i32 @llvm.x86.sse.comigt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_comigt_ss(A, B); } +TEST_CONSTEXPR(_mm_comigt_ss(_mm_set1_ps(3.0f), _mm_set1_ps(2.0f)) == 1); +TEST_CONSTEXPR(_mm_comigt_ss(_mm_set1_ps(2.0f), _mm_set1_ps(3.0f)) == 0); +TEST_CONSTEXPR(_mm_comigt_ss(_mm_set1_ps(2.0f), _mm_set1_ps(2.0f)) == 0); +TEST_CONSTEXPR(_mm_comigt_ss(_mm_set1_ps(__builtin_nanf("")), _mm_set1_ps(2.0f)) == 0); int test_mm_comile_ss(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_comile_ss // CHECK: call {{.*}}i32 @llvm.x86.sse.comile.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_comile_ss(A, B); } +TEST_CONSTEXPR(_mm_comile_ss(_mm_set1_ps(1.0f), _mm_set1_ps(1.0f)) == 1); +TEST_CONSTEXPR(_mm_comile_ss(_mm_set1_ps(1.0f), _mm_set1_ps(2.0f)) == 1); +TEST_CONSTEXPR(_mm_comile_ss(_mm_set1_ps(2.0f), _mm_set1_ps(1.0f)) == 0); +TEST_CONSTEXPR(_mm_comile_ss(_mm_set1_ps(__builtin_nanf("")), _mm_set1_ps(2.0f)) == 0); int test_mm_comilt_ss(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_comilt_ss // CHECK: call {{.*}}i32 @llvm.x86.sse.comilt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_comilt_ss(A, B); } +TEST_CONSTEXPR(_mm_comilt_ss(_mm_set1_ps(1.0f), _mm_set1_ps(2.0f)) == 1); +TEST_CONSTEXPR(_mm_comilt_ss(_mm_set1_ps(2.0f), _mm_set1_ps(1.0f)) == 0); +TEST_CONSTEXPR(_mm_comilt_ss(_mm_set1_ps(1.0f), _mm_set1_ps(1.0f)) == 0); +TEST_CONSTEXPR(_mm_comilt_ss(_mm_set1_ps(__builtin_nanf("")), _mm_set1_ps(2.0f)) == 0); int test_mm_comineq_ss(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_comineq_ss // CHECK: call {{.*}}i32 @llvm.x86.sse.comineq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_comineq_ss(A, B); } +TEST_CONSTEXPR(_mm_comineq_ss(_mm_set1_ps(2.0f), _mm_set1_ps(3.0f)) == 1); +TEST_CONSTEXPR(_mm_comineq_ss(_mm_set1_ps(__builtin_nanf("")), _mm_set1_ps(3.0f)) == 1); int test_mm_cvt_ss2si(__m128 A) { // CHECK-LABEL: test_mm_cvt_ss2si @@ -852,36 +872,57 @@ int test_mm_ucomieq_ss(__m128 A, __m128 B) { // CHECK: call {{.*}}i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_ucomieq_ss(A, B); } +TEST_CONSTEXPR(_mm_ucomieq_ss(_mm_set1_ps(1.0f), _mm_set1_ps(1.0f)) == 1); +TEST_CONSTEXPR(_mm_ucomieq_ss(_mm_set1_ps(__builtin_nanf("")), _mm_set1_ps(1.0f)) == 0); int test_mm_ucomige_ss(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_ucomige_ss // CHECK: call {{.*}}i32 @llvm.x86.sse.ucomige.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_ucomige_ss(A, B); } +TEST_CONSTEXPR(_mm_ucomige_ss(_mm_set1_ps(1.0f), _mm_set1_ps(1.0f)) == 1); +TEST_CONSTEXPR(_mm_ucomige_ss(_mm_set1_ps(2.0f), _mm_set1_ps(1.0f)) == 1); +TEST_CONSTEXPR(_mm_ucomige_ss(_mm_set1_ps(1.0f), _mm_set1_ps(2.0f)) == 0); +TEST_CONSTEXPR(_mm_ucomige_ss(_mm_set1_ps(__builtin_nanf("")), _mm_set1_ps(1.0f)) == 0); int test_mm_ucomigt_ss(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_ucomigt_ss // CHECK: call {{.*}}i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_ucomigt_ss(A, B); } +TEST_CONSTEXPR(_mm_ucomigt_ss(_mm_set1_ps(2.0f), _mm_set1_ps(1.0f)) == 1); +TEST_CONSTEXPR(_mm_ucomigt_ss(_mm_set1_ps(1.0f), _mm_set1_ps(2.0f)) == 0); +TEST_CONSTEXPR(_mm_ucomigt_ss(_mm_set1_ps(1.0f), _mm_set1_ps(1.0f)) == 0); +TEST_CONSTEXPR(_mm_ucomigt_ss(_mm_set1_ps(__builtin_nanf("")), _mm_set1_ps(1.0f)) == 0); int test_mm_ucomile_ss(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_ucomile_ss // CHECK: call {{.*}}i32 @llvm.x86.sse.ucomile.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_ucomile_ss(A, B); } +TEST_CONSTEXPR(_mm_ucomile_ss(_mm_set1_ps(1.0f), _mm_set1_ps(1.0f)) == 1); +TEST_CONSTEXPR(_mm_ucomile_ss(_mm_set1_ps(1.0f), _mm_set1_ps(2.0f)) == 1); +TEST_CONSTEXPR(_mm_ucomile_ss(_mm_set1_ps(2.0f), _mm_set1_ps(1.0f)) == 0); +TEST_CONSTEXPR(_mm_ucomile_ss(_mm_set1_ps(__builtin_nanf("")), _mm_set1_ps(2.0f)) == 0); int test_mm_ucomilt_ss(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_ucomilt_ss // CHECK: call {{.*}}i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_ucomilt_ss(A, B); } +TEST_CONSTEXPR(_mm_ucomilt_ss(_mm_set1_ps(1.0f), _mm_set1_ps(2.0f)) == 1); +TEST_CONSTEXPR(_mm_ucomilt_ss(_mm_set1_ps(2.0f), _mm_set1_ps(1.0f)) == 0); +TEST_CONSTEXPR(_mm_ucomilt_ss(_mm_set1_ps(1.0f), _mm_set1_ps(1.0f)) == 0); +TEST_CONSTEXPR(_mm_ucomilt_ss(_mm_set1_ps(__builtin_nanf("")), _mm_set1_ps(2.0f)) == 0); +TEST_CONSTEXPR(_mm_ucomilt_ss(_mm_set1_ps(2.0f), _mm_set1_ps(__builtin_nanf(""))) == 0); int test_mm_ucomineq_ss(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_ucomineq_ss // CHECK: call {{.*}}i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_ucomineq_ss(A, B); } +TEST_CONSTEXPR(_mm_ucomineq_ss(_mm_set1_ps(5.0f), _mm_set1_ps(5.0f)) == 0); +TEST_CONSTEXPR(_mm_ucomineq_ss(_mm_set1_ps(__builtin_nanf("")), _mm_set1_ps(5.0f)) == 1); __m128 test_mm_undefined_ps(void) { // CHECK-LABEL: test_mm_undefined_ps diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 8428fd6540ac9..0e34fe1b61deb 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -515,36 +515,56 @@ int test_mm_comieq_sd(__m128d A, __m128d B) { // CHECK: call {{.*}}i32 @llvm.x86.sse2.comieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_comieq_sd(A, B); } +TEST_CONSTEXPR(_mm_comieq_sd(_mm_set1_pd(1.0), _mm_set1_pd(1.0)) == 1); +TEST_CONSTEXPR(_mm_comieq_sd(_mm_set1_pd(__builtin_nan("")), _mm_set1_pd(1.0)) == 0); int test_mm_comige_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_comige_sd // CHECK: call {{.*}}i32 @llvm.x86.sse2.comige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_comige_sd(A, B); } +TEST_CONSTEXPR(_mm_comige_sd(_mm_set1_pd(2.0), _mm_set1_pd(2.0)) == 1); +TEST_CONSTEXPR(_mm_comige_sd(_mm_set1_pd(3.0), _mm_set1_pd(2.0)) == 1); +TEST_CONSTEXPR(_mm_comige_sd(_mm_set1_pd(1.0), _mm_set1_pd(2.0)) == 0); +TEST_CONSTEXPR(_mm_comige_sd(_mm_set1_pd(__builtin_nan("")), _mm_set1_pd(2.0)) == 0); int test_mm_comigt_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_comigt_sd // CHECK: call {{.*}}i32 @llvm.x86.sse2.comigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_comigt_sd(A, B); } +TEST_CONSTEXPR(_mm_comigt_sd(_mm_set1_pd(3.0), _mm_set1_pd(2.0)) == 1); +TEST_CONSTEXPR(_mm_comigt_sd(_mm_set1_pd(2.0), _mm_set1_pd(3.0)) == 0); +TEST_CONSTEXPR(_mm_comigt_sd(_mm_set1_pd(2.0), _mm_set1_pd(2.0)) == 0); +TEST_CONSTEXPR(_mm_comigt_sd(_mm_set1_pd(__builtin_nan("")), _mm_set1_pd(2.0)) == 0); int test_mm_comile_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_comile_sd // CHECK: call {{.*}}i32 @llvm.x86.sse2.comile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_comile_sd(A, B); } +TEST_CONSTEXPR(_mm_comile_sd(_mm_set1_pd(1.0), _mm_set1_pd(1.0)) == 1); +TEST_CONSTEXPR(_mm_comile_sd(_mm_set1_pd(1.0), _mm_set1_pd(2.0)) == 1); +TEST_CONSTEXPR(_mm_comile_sd(_mm_set1_pd(2.0), _mm_set1_pd(1.0)) == 0); +TEST_CONSTEXPR(_mm_comile_sd(_mm_set1_pd(__builtin_nan("")), _mm_set1_pd(2.0)) == 0); int test_mm_comilt_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_comilt_sd // CHECK: call {{.*}}i32 @llvm.x86.sse2.comilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_comilt_sd(A, B); } +TEST_CONSTEXPR(_mm_comilt_sd(_mm_set1_pd(1.0), _mm_set1_pd(2.0)) == 1); +TEST_CONSTEXPR(_mm_comilt_sd(_mm_set1_pd(2.0), _mm_set1_pd(1.0)) == 0); +TEST_CONSTEXPR(_mm_comilt_sd(_mm_set1_pd(1.0), _mm_set1_pd(1.0)) == 0); +TEST_CONSTEXPR(_mm_comilt_sd(_mm_set1_pd(__builtin_nan("")), _mm_set1_pd(2.0)) == 0); int test_mm_comineq_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_comineq_sd // CHECK: call {{.*}}i32 @llvm.x86.sse2.comineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_comineq_sd(A, B); } +TEST_CONSTEXPR(_mm_comineq_sd(_mm_set1_pd(2.0), _mm_set1_pd(3.0)) == 1); +TEST_CONSTEXPR(_mm_comineq_sd(_mm_set1_pd(__builtin_nan("")), _mm_set1_pd(3.0)) == 1); __m128d test_mm_cvtepi32_pd(__m128i A) { // CHECK-LABEL: test_mm_cvtepi32_pd @@ -1804,36 +1824,57 @@ int test_mm_ucomieq_sd(__m128d A, __m128d B) { // CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_ucomieq_sd(A, B); } +TEST_CONSTEXPR(_mm_ucomieq_sd(_mm_set1_pd(1.0), _mm_set1_pd(1.0)) == 1); +TEST_CONSTEXPR(_mm_ucomieq_sd(_mm_set1_pd(__builtin_nan("")), _mm_set1_pd(1.0)) == 0); int test_mm_ucomige_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_ucomige_sd // CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_ucomige_sd(A, B); } +TEST_CONSTEXPR(_mm_ucomige_sd(_mm_set1_pd(1.0), _mm_set1_pd(1.0)) == 1); +TEST_CONSTEXPR(_mm_ucomige_sd(_mm_set1_pd(2.0), _mm_set1_pd(1.0)) == 1); +TEST_CONSTEXPR(_mm_ucomige_sd(_mm_set1_pd(1.0), _mm_set1_pd(2.0)) == 0); +TEST_CONSTEXPR(_mm_ucomige_sd(_mm_set1_pd(__builtin_nan("")), _mm_set1_pd(1.0)) == 0); int test_mm_ucomigt_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_ucomigt_sd // CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_ucomigt_sd(A, B); } +TEST_CONSTEXPR(_mm_ucomigt_sd(_mm_set1_pd(2.0), _mm_set1_pd(1.0)) == 1); +TEST_CONSTEXPR(_mm_ucomigt_sd(_mm_set1_pd(1.0), _mm_set1_pd(2.0)) == 0); +TEST_CONSTEXPR(_mm_ucomigt_sd(_mm_set1_pd(1.0), _mm_set1_pd(1.0)) == 0); +TEST_CONSTEXPR(_mm_ucomigt_sd(_mm_set1_pd(__builtin_nan("")), _mm_set1_pd(1.0)) == 0); int test_mm_ucomile_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_ucomile_sd // CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_ucomile_sd(A, B); } +TEST_CONSTEXPR(_mm_ucomile_sd(_mm_set1_pd(1.0), _mm_set1_pd(1.0)) == 1); +TEST_CONSTEXPR(_mm_ucomile_sd(_mm_set1_pd(1.0), _mm_set1_pd(2.0)) == 1); +TEST_CONSTEXPR(_mm_ucomile_sd(_mm_set1_pd(2.0), _mm_set1_pd(1.0)) == 0); +TEST_CONSTEXPR(_mm_ucomile_sd(_mm_set1_pd(__builtin_nan("")), _mm_set1_pd(2.0)) == 0); int test_mm_ucomilt_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_ucomilt_sd // CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_ucomilt_sd(A, B); } +TEST_CONSTEXPR(_mm_ucomilt_sd(_mm_set1_pd(1.0), _mm_set1_pd(2.0)) == 1); +TEST_CONSTEXPR(_mm_ucomilt_sd(_mm_set1_pd(2.0), _mm_set1_pd(1.0)) == 0); +TEST_CONSTEXPR(_mm_ucomilt_sd(_mm_set1_pd(1.0), _mm_set1_pd(1.0)) == 0); +TEST_CONSTEXPR(_mm_ucomilt_sd(_mm_set1_pd(__builtin_nan("")), _mm_set1_pd(2.0)) == 0); +TEST_CONSTEXPR(_mm_ucomilt_sd(_mm_set1_pd(2.0), _mm_set1_pd(__builtin_nan(""))) == 0); int test_mm_ucomineq_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_ucomineq_sd // CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_ucomineq_sd(A, B); } +TEST_CONSTEXPR(_mm_ucomineq_sd(_mm_set1_pd(5.0), _mm_set1_pd(5.0)) == 0); +TEST_CONSTEXPR(_mm_ucomineq_sd(_mm_set1_pd(__builtin_nanf("")), _mm_set1_pd(5.0)) == 1); __m128d test_mm_undefined_pd(void) { // X64-LABEL: test_mm_undefined_pd diff --git a/clang/test/CodeGen/X86/x86-intrinsics-imm.c b/clang/test/CodeGen/X86/x86-intrinsics-imm.c index 76c0d2f01dc8b..5d3e0a4776b30 100644 --- a/clang/test/CodeGen/X86/x86-intrinsics-imm.c +++ b/clang/test/CodeGen/X86/x86-intrinsics-imm.c @@ -10,6 +10,7 @@ #define __MM_MALLOC_H #include +#include "builtin_test_helpers.h" unsigned short check__cvtss_sh(float val, const int I) { return _cvtss_sh(val, I); // expected-error {{argument to '__builtin_ia32_vcvtps2ph' must be a constant integer}} @@ -126,6 +127,166 @@ void check__mm_cmp_sd(__m128d a, __m128d b, const int c) { void check__mm_cmp_ss(__m128 a, __m128 b, const int c) { _mm_cmp_ss(a, b, c); // expected-error {{argument to '__builtin_ia32_cmpss' must be a constant integer}} } +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 0))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 0))[1] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 0))[2] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 0))[3] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 1))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 1))[1] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 1))[2] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 1))[3] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 2))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 2))[1] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 2))[2] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 2))[3] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){__builtin_nanf(""),10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 3))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){__builtin_nanf(""),10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 3))[1] == ((__v4si)(__m128)(__v4sf){__builtin_nanf(""),10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){__builtin_nanf(""),10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 3))[2] == ((__v4si)(__m128)(__v4sf){__builtin_nanf(""),10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){__builtin_nanf(""),10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 3))[3] == ((__v4si)(__m128)(__v4sf){__builtin_nanf(""),10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 4))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 4))[1] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 4))[2] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 4))[3] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 5))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 5))[1] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 5))[2] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 5))[3] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 6))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 6))[1] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 6))[2] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 6))[3] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 7))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 7))[1] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 7))[2] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 7))[3] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 8))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 8))[1] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 8))[2] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 8))[3] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 9))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 9))[1] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 9))[2] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 9))[3] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 10))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 10))[1] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 10))[2] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 10))[3] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 11))[0] == 0) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 11))[1] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 11))[2] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 11))[3] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 12))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 12))[1] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 12))[2] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 12))[3] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 13))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 13))[1] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 13))[2] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 13))[3] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 14))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 14))[1] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 14))[2] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 14))[3] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 15))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 15))[1] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 15))[2] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 15))[3] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 16))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 16))[1] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 16))[2] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 16))[3] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 17))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 17))[1] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 17))[2] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 17))[3] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 18))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 18))[1] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 18))[2] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 18))[3] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){__builtin_nanf(""),10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 19))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){__builtin_nanf(""),10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 19))[1] == ((__v4si)(__m128)(__v4sf){__builtin_nanf(""),10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){__builtin_nanf(""),10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 19))[2] == ((__v4si)(__m128)(__v4sf){__builtin_nanf(""),10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){__builtin_nanf(""),10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 19))[3] == ((__v4si)(__m128)(__v4sf){__builtin_nanf(""),10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 20))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 20))[1] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 20))[2] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 20))[3] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 21))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 21))[1] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 21))[2] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 21))[3] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 22))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 22))[1] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 22))[2] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 22))[3] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 23))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 23))[1] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 23))[2] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 23))[3] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){__builtin_nanf(""),10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 24))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){__builtin_nanf(""),10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 24))[1] == ((__v4si)(__m128)(__v4sf){__builtin_nanf(""),10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){__builtin_nanf(""),10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 24))[2] == ((__v4si)(__m128)(__v4sf){__builtin_nanf(""),10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){__builtin_nanf(""),10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 24))[3] == ((__v4si)(__m128)(__v4sf){__builtin_nanf(""),10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 25))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 25))[1] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 25))[2] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 25))[3] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 26))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 26))[1] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 26))[2] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 26))[3] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 27))[0] == 0) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 27))[1] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 27))[2] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 27))[3] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 28))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 28))[1] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 28))[2] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){1.0f,10,20,30}, (__m128)(__v4sf){2.0f,40,50,60}, 28))[3] == ((__v4si)(__m128)(__v4sf){1.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 29))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 29))[1] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 29))[2] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 29))[3] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 30))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 30))[1] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 30))[2] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){2.0f,10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 30))[3] == ((__v4si)(__m128)(__v4sf){2.0f,10,20,30})[3])); +TEST_CONSTEXPR( + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){__builtin_nanf(""),10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 31))[0] == -1) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){__builtin_nanf(""),10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 31))[1] == ((__v4si)(__m128)(__v4sf){__builtin_nanf(""),10,20,30})[1]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){__builtin_nanf(""),10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 31))[2] == ((__v4si)(__m128)(__v4sf){__builtin_nanf(""),10,20,30})[2]) && + (((__v4si)_mm_cmp_ss((__m128)(__v4sf){__builtin_nanf(""),10,20,30}, (__m128)(__v4sf){1.0f,40,50,60}, 31))[3] == ((__v4si)(__m128)(__v4sf){__builtin_nanf(""),10,20,30})[3])); void check__mm256_extractf128_pd(__m256d a, const int o) { _mm256_extractf128_pd(a, o); // expected-error {{argument to '__builtin_ia32_vextractf128_pd256' must be a constant integer}} @@ -382,4 +543,3 @@ void check__mm_extracti_si64(__m128i a, const char len, const char id) { void check__insert_si64(__m128 a, __m128 b, const char len, const char id) { _mm_inserti_si64(a, b, len, id); // expected-error {{argument to '__builtin_ia32_insertqi' must be a constant integer}} } -