Skip to content
Open
16 changes: 8 additions & 8 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ def undef128 : X86Builtin<"_Vector<2, double>()"> {
let Attributes = [Const, NoThrow, RequiredVectorWidth<128>];
}

def undef256 : X86Builtin<"_Vector<4, double>()"> {
let Attributes = [Const, NoThrow, RequiredVectorWidth<256>];
def undef256 : X86Builtin<"_Vector<4, double>()" > {
let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<256>];
}

def undef512 : X86Builtin<"_Vector<8, double>()"> {
let Attributes = [Const, NoThrow, RequiredVectorWidth<512>];
let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<512>];
}

// FLAGS
Expand Down Expand Up @@ -166,7 +166,7 @@ let Features = "sse2", Attributes = [NoThrow] in {
def movnti : X86Builtin<"void(int *, int)">;
}

let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
def cvtpd2dq : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>)">;
def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">;
Expand Down Expand Up @@ -462,7 +462,7 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
}

let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">;
def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
Expand Down Expand Up @@ -1004,7 +1004,7 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128
def cmppd128_mask : X86Builtin<"unsigned char(_Vector<2, double>, _Vector<2, double>, _Constant int, unsigned char)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def rndscaleps_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">;
def rndscalepd_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int, _Vector<8, double>, unsigned char, _Constant int)">;
def cvtps2dq512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, float>, _Vector<16, int>, unsigned short, _Constant int)">;
Expand Down Expand Up @@ -1452,7 +1452,7 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
def compressstoresi256_mask : X86Builtin<"void(_Vector<8, int *>, _Vector<8, int>, unsigned char)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtpd2dq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
def cvtpd2ps_mask : X86Builtin<"_Vector<4, float>(_Vector<2, double>, _Vector<4, float>, unsigned char)">;
def cvtpd2udq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
Expand Down Expand Up @@ -3287,7 +3287,7 @@ let Features = "avx512bw,avx512vl",
def cvtw2mask256 : X86Builtin<"unsigned short(_Vector<16, short>)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">;
def cvtsi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, int, _Constant int)">;
def cvtss2sd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<4, float>, _Vector<2, double>, unsigned char, _Constant int)">;
Expand Down
143 changes: 143 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12165,7 +12165,36 @@ static bool evalShuffleGeneric(
Out = APValue(ResultElements.data(), ResultElements.size());
return true;
}
static bool ConvertDoubleToFloatStrict(EvalInfo &Info, const Expr *E,
APFloat OrigVal, APValue &Result) {

if (OrigVal.isInfinity()) {
Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 0;
return false;
}
if (OrigVal.isNaN()) {
Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 1;
return false;
}

APFloat Val = OrigVal;
bool LosesInfo = false;
APFloat::opStatus Status = Val.convert(
APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo);

if (LosesInfo || Val.isDenormal()) {
Info.CCEDiag(E, diag::note_constexpr_float_arithmetic_strict);
return false;
}

if (Status != APFloat::opOK) {
Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr);
return false;
}

Result = APValue(Val);
return true;
}
static bool evalShiftWithCount(
EvalInfo &Info, const CallExpr *Call, APValue &Out,
llvm::function_ref<APInt(const APInt &, uint64_t)> ShiftOp,
Expand Down Expand Up @@ -12924,6 +12953,120 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {

return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}

case X86::BI__builtin_ia32_cvtsd2ss: {
APValue VecA, VecB;
if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
!EvaluateAsRValue(Info, E->getArg(1), VecB))
return false;

SmallVector<APValue, 4> Elements;

APValue ResultVal;
if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(),
ResultVal))
return false;

Elements.push_back(ResultVal);

unsigned NumEltsA = VecA.getVectorLength();
for (unsigned I = 1; I < NumEltsA; ++I) {
Elements.push_back(VecA.getVectorElt(I));
}

return Success(Elements, E);
}
case X86::BI__builtin_ia32_cvtsd2ss_round_mask: {
APValue VecA, VecB, VecSrc, MaskValue;

if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
!EvaluateAsRValue(Info, E->getArg(1), VecB) ||
!EvaluateAsRValue(Info, E->getArg(2), VecSrc) ||
!EvaluateAsRValue(Info, E->getArg(3), MaskValue))
return false;

unsigned Mask = MaskValue.getInt().getZExtValue();
SmallVector<APValue, 4> Elements;

if (Mask & 1) {
APValue ResultVal;
if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(),
ResultVal))
return false;
Elements.push_back(ResultVal);
} else {
Elements.push_back(VecSrc.getVectorElt(0));
}

unsigned NumEltsA = VecA.getVectorLength();
for (unsigned I = 1; I < NumEltsA; ++I) {
Elements.push_back(VecA.getVectorElt(I));
}

return Success(Elements, E);
}
case X86::BI__builtin_ia32_cvtpd2ps:
case X86::BI__builtin_ia32_cvtpd2ps256:
case X86::BI__builtin_ia32_cvtpd2ps_mask:
case X86::BI__builtin_ia32_cvtpd2ps512_mask: {

const auto BuiltinID = E->getBuiltinCallee();
bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask ||
BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);

APValue InputValue;
if (!EvaluateAsRValue(Info, E->getArg(0), InputValue))
return false;

APValue MergeValue;
unsigned Mask = 0xFFFFFFFF;
bool NeedsMerge = false;
if (IsMasked) {
APValue MaskValue;
if (!EvaluateAsRValue(Info, E->getArg(2), MaskValue))
return false;
Mask = MaskValue.getInt().getZExtValue();
auto NumEltsResult = E->getType()->getAs<VectorType>()->getNumElements();
for (unsigned I = 0; I < NumEltsResult; ++I) {
if (!((Mask >> I) & 1)) {
NeedsMerge = true;
break;
}
}
if (NeedsMerge) {
if (!EvaluateAsRValue(Info, E->getArg(1), MergeValue))
return false;
}
}

unsigned NumEltsResult =
E->getType()->getAs<VectorType>()->getNumElements();
unsigned NumEltsInput = InputValue.getVectorLength();
SmallVector<APValue, 8> Elements;
for (unsigned I = 0; I < NumEltsResult; ++I) {
if (IsMasked && !((Mask >> I) & 1)) {
if (!NeedsMerge) {
return false;
}
Elements.push_back(MergeValue.getVectorElt(I));
continue;
}

if (I >= NumEltsInput) {
Elements.push_back(APValue(APFloat::getZero(APFloat::IEEEsingle())));
continue;
}

APValue ResultVal;
if (!ConvertDoubleToFloatStrict(
Info, E, InputValue.getVectorElt(I).getFloat(), ResultVal))
return false;

Elements.push_back(ResultVal);
}
return Success(Elements, E);
}

case X86::BI__builtin_ia32_shufps:
case X86::BI__builtin_ia32_shufps256:
case X86::BI__builtin_ia32_shufps512: {
Expand Down
44 changes: 18 additions & 26 deletions clang/lib/Headers/avx512fintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,8 @@ _mm512_undefined(void)
return (__m512)__builtin_ia32_undef512();
}

static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_undefined_ps(void)
{
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_undefined_ps(void) {
return (__m512)__builtin_ia32_undef512();
}

Expand Down Expand Up @@ -3489,44 +3488,39 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
(__v8sf)_mm256_setzero_ps(), \
(__mmask8)(U), (int)(R)))

static __inline__ __m256 __DEFAULT_FN_ATTRS512
_mm512_cvtpd_ps (__m512d __A)
{
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_cvtpd_ps(__m512d __A) {
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
(__v8sf) _mm256_undefined_ps (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}

static __inline__ __m256 __DEFAULT_FN_ATTRS512
_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
{
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A) {
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
(__v8sf) __W,
(__mmask8) __U,
_MM_FROUND_CUR_DIRECTION);
}

static __inline__ __m256 __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
{
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A) {
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
(__v8sf) _mm256_setzero_ps (),
(__mmask8) __U,
_MM_FROUND_CUR_DIRECTION);
}

static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_cvtpd_pslo (__m512d __A)
{
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_cvtpd_pslo(__m512d __A) {
return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
(__v8sf) _mm256_setzero_ps (),
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
}

static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
{
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A) {
return (__m512) __builtin_shufflevector (
(__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
__U, __A),
Expand Down Expand Up @@ -5382,8 +5376,8 @@ _mm512_kmov (__mmask16 __A)
((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
#endif

static __inline__ __m512i
__DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi32(__m512i __A, __m128i __B) {
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_sll_epi32(__m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
}

Expand Down Expand Up @@ -8654,18 +8648,16 @@ _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) {
return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
(__v2df)__B,
(__v4sf)__W,
(__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B) {
return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
(__v2df)__B,
(__v4sf)_mm_setzero_ps(),
Expand Down
16 changes: 8 additions & 8 deletions clang/lib/Headers/avx512vlintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1791,30 +1791,30 @@ _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
(__v4si)_mm_setzero_si128());
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m128d __A) {
return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
(__v4sf) __W,
(__mmask8) __U);
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_cvtpd_ps(__mmask8 __U, __m128d __A) {
return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
(__v4sf)
_mm_setzero_ps (),
(__mmask8) __U);
}

static __inline__ __m128 __DEFAULT_FN_ATTRS256
_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m256d __A) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm256_cvtpd_ps(__A),
(__v4sf)__W);
}

static __inline__ __m128 __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_cvtpd_ps(__mmask8 __U, __m256d __A) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm256_cvtpd_ps(__A),
(__v4sf)_mm_setzero_ps());
Expand Down
10 changes: 4 additions & 6 deletions clang/lib/Headers/avxintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -2186,9 +2186,8 @@ _mm256_cvtepi32_ps(__m256i __a) {
/// \param __a
/// A 256-bit vector of [4 x double].
/// \returns A 128-bit vector of [4 x float] containing the converted values.
static __inline __m128 __DEFAULT_FN_ATTRS
_mm256_cvtpd_ps(__m256d __a)
{
static __inline __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_cvtpd_ps(__m256d __a) {
return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);
}

Expand Down Expand Up @@ -3606,9 +3605,8 @@ _mm256_undefined_pd(void)
/// This intrinsic has no corresponding instruction.
///
/// \returns A 256-bit vector of [8 x float] containing undefined values.
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_undefined_ps(void)
{
static __inline__ __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_undefined_ps(void) {
return (__m256)__builtin_ia32_undef256();
}

Expand Down
Loading