Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -199,11 +199,15 @@ let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def rcpss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
def rsqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
def rsqrtss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
def sqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
def sqrtss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
def shufps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
}

let Features = "sse",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def sqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
}

let Features = "sse2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
def maskmovdqu : X86Builtin<"void(_Vector<16, char>, _Vector<16, char>, char *)">;
}
Expand All @@ -221,8 +225,9 @@ let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] i
def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
def pshuflw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
def psadbw128
: X86Builtin<
"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
def shufpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
def cvtpd2dq : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>)">;
Expand Down Expand Up @@ -295,6 +300,8 @@ let Features = "sse2",

def psrawi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">;
def psradi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">;

def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
}

let Features = "sse3", Attributes = [NoThrow] in {
Expand Down Expand Up @@ -500,13 +507,14 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def vinsertf128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<2, double>, _Constant int)">;
def vinsertf128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<4, float>, _Constant int)">;
def vinsertf128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>, _Constant int)">;
def sqrtpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>)">;
def sqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
}

let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def vpermilpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
def vpermilps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">;
def sqrtpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>)">;
def sqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
def vpermilps256
: X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">;
def rsqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
def rcpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
def roundpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
Expand Down Expand Up @@ -946,7 +954,8 @@ let Features = "pku", Attributes = [NoThrow] in {
def wrpkru : X86Builtin<"void(unsigned int)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512f",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def sqrtpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int)">;
def sqrtps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int)">;
}
Expand Down
87 changes: 87 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SipHash.h"
#include <cmath>

namespace clang {
namespace interp {
Expand Down Expand Up @@ -2994,6 +2995,85 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
return true;
}

static llvm::APFloat apply_x86_sqrt(llvm::APFloat Val,
const llvm::fltSemantics &Semantics) {
if (Val.isNegative() && !Val.isZero()) {
return llvm::APFloat::getQNaN(Semantics);
} else {
double DoubleValue = Val.convertToDouble();
double SqrtValue = ::sqrt(DoubleValue);

llvm::APFloat TempValue(SqrtValue);

bool LosesInfo;
TempValue.convert(Semantics, llvm::APFloat::rmNearestTiesToEven,
&LosesInfo);
return TempValue;
}
}

static bool interp__builtin_x86_sqrt(InterpState &S, CodePtr OpPC,
const CallExpr *Call, unsigned ID) {
unsigned NumArgs = Call->getNumArgs();
assert(NumArgs == 1 || NumArgs == 2);
const Expr *ArgExpr = Call->getArg(0);
QualType ArgTy = ArgExpr->getType();

if (!(ArgTy->isRealFloatingType() ||
(ArgTy->isVectorType() &&
ArgTy->castAs<VectorType>()->getElementType()->isRealFloatingType())))
return false;

const llvm::fltSemantics *SemanticsPtr;
if (ArgTy->isVectorType())
SemanticsPtr = &S.getContext().getFloatSemantics(
ArgTy->castAs<VectorType>()->getElementType());
else
SemanticsPtr = &S.getContext().getFloatSemantics(ArgTy);
const llvm::fltSemantics &Semantics = *SemanticsPtr;

if (NumArgs == 2) {
if (!Call->getArg(1)->getType()->isIntegerType()) {
return false;
}
APSInt RoundingMode = popToAPSInt(S, Call->getArg(1));
if (RoundingMode.getZExtValue() != 4) {
return false;
}
}

// Scalar case
if (!ArgTy->isVectorType()) {
llvm::APFloat Val = S.Stk.pop<Floating>().getAPFloat();
Val = apply_x86_sqrt(Val, Semantics);
S.Stk.push<Floating>(Val);
return true;
}

// Vector case
assert(ArgTy->isVectorType());
const auto *VT = ArgTy->castAs<VectorType>();

const Pointer &Arg = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();

assert(Arg.getFieldDesc()->isPrimitiveArray());
assert(Dst.getFieldDesc()->isPrimitiveArray());
assert(Arg.getFieldDesc()->getNumElems() ==
Dst.getFieldDesc()->getNumElems());

unsigned NumElems = VT->getNumElements();

for (unsigned I = 0; I != NumElems; ++I) {
llvm::APFloat Val = Arg.elem<Floating>(I).getAPFloat();
Val = apply_x86_sqrt(Val, Semantics);
Dst.elem<Floating>(I) = Val;
}

Dst.initializeAllElements();
return true;
}

bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
Expand Down Expand Up @@ -3753,6 +3833,13 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_vinsertf128_si256:
case X86::BI__builtin_ia32_insert128i256:
return interp__builtin_x86_insert_subvector(S, OpPC, Call, BuiltinID);
case X86::BI__builtin_ia32_sqrtpd:
case X86::BI__builtin_ia32_sqrtps:
case X86::BI__builtin_ia32_sqrtpd256:
case X86::BI__builtin_ia32_sqrtps256:
case X86::BI__builtin_ia32_sqrtps512:
case X86::BI__builtin_ia32_sqrtpd512:
return interp__builtin_x86_sqrt(S, OpPC, Call, BuiltinID);

default:
S.FFDiag(S.Current->getLocation(OpPC),
Expand Down
40 changes: 40 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
#include "llvm/Support/SipHash.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/raw_ostream.h"
#include <cmath>
#include <cstring>
#include <functional>
#include <limits>
Expand Down Expand Up @@ -12235,6 +12236,45 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {

return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
case X86::BI__builtin_ia32_sqrtpd:
case X86::BI__builtin_ia32_sqrtps:
case X86::BI__builtin_ia32_sqrtpd256:
case X86::BI__builtin_ia32_sqrtps256:
case X86::BI__builtin_ia32_sqrtps512:
case X86::BI__builtin_ia32_sqrtpd512: {
APValue Source;
if (!EvaluateAsRValue(Info, E->getArg(0), Source))
return false;

QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
const llvm::fltSemantics &Semantics =
Info.Ctx.getFloatTypeSemantics(DestEltTy);
unsigned SourceLen = Source.getVectorLength();
SmallVector<APValue, 4> ResultElements;
ResultElements.reserve(SourceLen);

for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
APValue CurrentEle = Source.getVectorElt(EltNum);
if (DestEltTy->isFloatingType()) {
llvm::APFloat Value = CurrentEle.getFloat();
if (Value.isNegative() && !Value.isZero()) {
Value = llvm::APFloat::getQNaN(Value.getSemantics());
} else {
double DoubleValue = Value.convertToDouble();
double SqrtValue = sqrt(DoubleValue);
llvm::APFloat TempValue(SqrtValue);
bool LosesInfo;
TempValue.convert(Semantics, llvm::RoundingMode::NearestTiesToEven,
&LosesInfo);
Value = TempValue;
}
ResultElements.push_back(APValue(Value));
} else {
return false;
}
}
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
}
}

Expand Down
30 changes: 12 additions & 18 deletions clang/lib/Headers/avx512fintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1458,24 +1458,21 @@ _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
(__v8df)_mm512_sqrt_round_pd((A), (R)), \
(__v8df)_mm512_setzero_pd()))

static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_sqrt_pd(__m512d __A)
{
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_sqrt_pd(__m512d __A) {
return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
_MM_FROUND_CUR_DIRECTION);
}

static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
{
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A) {
return (__m512d)__builtin_ia32_selectpd_512(__U,
(__v8df)_mm512_sqrt_pd(__A),
(__v8df)__W);
}

static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
{
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A) {
return (__m512d)__builtin_ia32_selectpd_512(__U,
(__v8df)_mm512_sqrt_pd(__A),
(__v8df)_mm512_setzero_pd());
Expand All @@ -1494,24 +1491,21 @@ _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
(__v16sf)_mm512_sqrt_round_ps((A), (R)), \
(__v16sf)_mm512_setzero_ps()))

static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_sqrt_ps(__m512 __A)
{
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_sqrt_ps(__m512 __A) {
return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
_MM_FROUND_CUR_DIRECTION);
}

static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
{
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) {
return (__m512)__builtin_ia32_selectps_512(__U,
(__v16sf)_mm512_sqrt_ps(__A),
(__v16sf)__W);
}

static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
{
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A) {
return (__m512)__builtin_ia32_selectps_512(__U,
(__v16sf)_mm512_sqrt_ps(__A),
(__v16sf)_mm512_setzero_ps());
Expand Down
88 changes: 40 additions & 48 deletions clang/lib/Headers/avx512vlintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -3444,61 +3444,53 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
(__v8si)(__m256i)(index), \
(__v8si)(__m256i)(v1), (int)(scale))

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
(__v2df)_mm_sqrt_pd(__A),
(__v2df)__W);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
return (__m128d)__builtin_ia32_selectpd_128(
(__mmask8)__U, (__v2df)_mm_sqrt_pd(__A), (__v2df)__W);
}

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
(__v2df)_mm_sqrt_pd(__A),
(__v2df)_mm_setzero_pd());
}
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
return (__m128d)__builtin_ia32_selectpd_128(
(__mmask8)__U, (__v2df)_mm_sqrt_pd(__A), (__v2df)_mm_setzero_pd());
}

static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
(__v4df)_mm256_sqrt_pd(__A),
(__v4df)__W);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
return (__m256d)__builtin_ia32_selectpd_256(
(__mmask8)__U, (__v4df)_mm256_sqrt_pd(__A), (__v4df)__W);
}

static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
(__v4df)_mm256_sqrt_pd(__A),
(__v4df)_mm256_setzero_pd());
}
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
return (__m256d)__builtin_ia32_selectpd_256(
(__mmask8)__U, (__v4df)_mm256_sqrt_pd(__A), (__v4df)_mm256_setzero_pd());
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm_sqrt_ps(__A),
(__v4sf)__W);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
return (__m128)__builtin_ia32_selectps_128(
(__mmask8)__U, (__v4sf)_mm_sqrt_ps(__A), (__v4sf)__W);
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm_sqrt_ps(__A),
(__v4sf)_mm_setzero_ps());
}
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
return (__m128)__builtin_ia32_selectps_128(
(__mmask8)__U, (__v4sf)_mm_sqrt_ps(__A), (__v4sf)_mm_setzero_ps());
}

static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
(__v8sf)_mm256_sqrt_ps(__A),
(__v8sf)__W);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
return (__m256)__builtin_ia32_selectps_256(
(__mmask8)__U, (__v8sf)_mm256_sqrt_ps(__A), (__v8sf)__W);
}

static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) {
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
(__v8sf)_mm256_sqrt_ps(__A),
(__v8sf)_mm256_setzero_ps());
}
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) {
return (__m256)__builtin_ia32_selectps_256(
(__mmask8)__U, (__v8sf)_mm256_sqrt_ps(__A), (__v8sf)_mm256_setzero_ps());
}

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
Expand Down
Loading
Loading